1 //
    2 // Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
    3 // Copyright (c) 2012, 2023 SAP SE. All rights reserved.
    4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    5 //
    6 // This code is free software; you can redistribute it and/or modify it
    7 // under the terms of the GNU General Public License version 2 only, as
    8 // published by the Free Software Foundation.
    9 //
   10 // This code is distributed in the hope that it will be useful, but WITHOUT
   11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   13 // version 2 for more details (a copy is included in the LICENSE file that
   14 // accompanied this code).
   15 //
   16 // You should have received a copy of the GNU General Public License version
   17 // 2 along with this work; if not, write to the Free Software Foundation,
   18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   19 //
   20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   21 // or visit www.oracle.com if you need additional information or have any
   22 // questions.
   23 //
   24 //
   25 
   26 //
   27 // PPC64 Architecture Description File
   28 //
   29 
   30 //----------REGISTER DEFINITION BLOCK------------------------------------------
   31 // This information is used by the matcher and the register allocator to
   32 // describe individual registers and classes of registers within the target
   33 // architecture.
   34 register %{
   35 //----------Architecture Description Register Definitions----------------------
   36 // General Registers
   37 // "reg_def"  name (register save type, C convention save type,
   38 //                  ideal register type, encoding);
   39 //
   40 // Register Save Types:
   41 //
   42 //   NS  = No-Save:     The register allocator assumes that these registers
   43 //                      can be used without saving upon entry to the method, &
   44 //                      that they do not need to be saved at call sites.
   45 //
   46 //   SOC = Save-On-Call: The register allocator assumes that these registers
   47 //                      can be used without saving upon entry to the method,
   48 //                      but that they must be saved at call sites.
   49 //                      These are called "volatiles" on ppc.
   50 //
   51 //   SOE = Save-On-Entry: The register allocator assumes that these registers
   52 //                      must be saved before using them upon entry to the
   53 //                      method, but they do not need to be saved at call
   54 //                      sites.
   55 //                      These are called "nonvolatiles" on ppc.
   56 //
   57 //   AS  = Always-Save:   The register allocator assumes that these registers
   58 //                      must be saved before using them upon entry to the
   59 //                      method, & that they must be saved at call sites.
   60 //
   61 // Ideal Register Type is used to determine how to save & restore a
   62 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   63 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
   64 //
   65 // The encoding number is the actual bit-pattern placed into the opcodes.
   66 //
   67 // PPC64 register definitions, based on the 64-bit PowerPC ELF ABI
   68 // Supplement Version 1.7 as of 2003-10-29.
   69 //
   70 // For each 64-bit register we must define two registers: the register
   71 // itself, e.g. R3, and a corresponding virtual other (32-bit-)'half',
   72 // e.g. R3_H, which is needed by the allocator, but is not used
   73 // for stores, loads, etc.
   74 
   75 // ----------------------------
   76 // Integer/Long Registers
   77 // ----------------------------
   78 
   79   // PPC64 has 32 64-bit integer registers.
   80 
   81   // types: v = volatile, nv = non-volatile, s = system
   82   reg_def R0   ( SOC, SOC, Op_RegI,  0, R0->as_VMReg()         );  // v   used in prologs
   83   reg_def R0_H ( SOC, SOC, Op_RegI, 99, R0->as_VMReg()->next() );
   84   reg_def R1   ( NS,  NS,  Op_RegI,  1, R1->as_VMReg()         );  // s   SP
   85   reg_def R1_H ( NS,  NS,  Op_RegI, 99, R1->as_VMReg()->next() );
   86   reg_def R2   ( SOC, SOC, Op_RegI,  2, R2->as_VMReg()         );  // v   TOC
   87   reg_def R2_H ( SOC, SOC, Op_RegI, 99, R2->as_VMReg()->next() );
   88   reg_def R3   ( SOC, SOC, Op_RegI,  3, R3->as_VMReg()         );  // v   iarg1 & iret
   89   reg_def R3_H ( SOC, SOC, Op_RegI, 99, R3->as_VMReg()->next() );
   90   reg_def R4   ( SOC, SOC, Op_RegI,  4, R4->as_VMReg()         );  //     iarg2
   91   reg_def R4_H ( SOC, SOC, Op_RegI, 99, R4->as_VMReg()->next() );
   92   reg_def R5   ( SOC, SOC, Op_RegI,  5, R5->as_VMReg()         );  // v   iarg3
   93   reg_def R5_H ( SOC, SOC, Op_RegI, 99, R5->as_VMReg()->next() );
   94   reg_def R6   ( SOC, SOC, Op_RegI,  6, R6->as_VMReg()         );  // v   iarg4
   95   reg_def R6_H ( SOC, SOC, Op_RegI, 99, R6->as_VMReg()->next() );
   96   reg_def R7   ( SOC, SOC, Op_RegI,  7, R7->as_VMReg()         );  // v   iarg5
   97   reg_def R7_H ( SOC, SOC, Op_RegI, 99, R7->as_VMReg()->next() );
   98   reg_def R8   ( SOC, SOC, Op_RegI,  8, R8->as_VMReg()         );  // v   iarg6
   99   reg_def R8_H ( SOC, SOC, Op_RegI, 99, R8->as_VMReg()->next() );
  100   reg_def R9   ( SOC, SOC, Op_RegI,  9, R9->as_VMReg()         );  // v   iarg7
  101   reg_def R9_H ( SOC, SOC, Op_RegI, 99, R9->as_VMReg()->next() );
  102   reg_def R10  ( SOC, SOC, Op_RegI, 10, R10->as_VMReg()        );  // v   iarg8
  103   reg_def R10_H( SOC, SOC, Op_RegI, 99, R10->as_VMReg()->next());
  104   reg_def R11  ( SOC, SOC, Op_RegI, 11, R11->as_VMReg()        );  // v   ENV / scratch
  105   reg_def R11_H( SOC, SOC, Op_RegI, 99, R11->as_VMReg()->next());
  106   reg_def R12  ( SOC, SOC, Op_RegI, 12, R12->as_VMReg()        );  // v   scratch
  107   reg_def R12_H( SOC, SOC, Op_RegI, 99, R12->as_VMReg()->next());
  108   reg_def R13  ( NS,  NS,  Op_RegI, 13, R13->as_VMReg()        );  // s   system thread id
  109   reg_def R13_H( NS,  NS,  Op_RegI, 99, R13->as_VMReg()->next());
  110   reg_def R14  ( SOC, SOE, Op_RegI, 14, R14->as_VMReg()        );  // nv
  111   reg_def R14_H( SOC, SOE, Op_RegI, 99, R14->as_VMReg()->next());
  112   reg_def R15  ( SOC, SOE, Op_RegI, 15, R15->as_VMReg()        );  // nv
  113   reg_def R15_H( SOC, SOE, Op_RegI, 99, R15->as_VMReg()->next());
  114   reg_def R16  ( SOC, SOE, Op_RegI, 16, R16->as_VMReg()        );  // nv
  115   reg_def R16_H( SOC, SOE, Op_RegI, 99, R16->as_VMReg()->next());
  116   reg_def R17  ( SOC, SOE, Op_RegI, 17, R17->as_VMReg()        );  // nv
  117   reg_def R17_H( SOC, SOE, Op_RegI, 99, R17->as_VMReg()->next());
  118   reg_def R18  ( SOC, SOE, Op_RegI, 18, R18->as_VMReg()        );  // nv
  119   reg_def R18_H( SOC, SOE, Op_RegI, 99, R18->as_VMReg()->next());
  120   reg_def R19  ( SOC, SOE, Op_RegI, 19, R19->as_VMReg()        );  // nv
  121   reg_def R19_H( SOC, SOE, Op_RegI, 99, R19->as_VMReg()->next());
  122   reg_def R20  ( SOC, SOE, Op_RegI, 20, R20->as_VMReg()        );  // nv
  123   reg_def R20_H( SOC, SOE, Op_RegI, 99, R20->as_VMReg()->next());
  124   reg_def R21  ( SOC, SOE, Op_RegI, 21, R21->as_VMReg()        );  // nv
  125   reg_def R21_H( SOC, SOE, Op_RegI, 99, R21->as_VMReg()->next());
  126   reg_def R22  ( SOC, SOE, Op_RegI, 22, R22->as_VMReg()        );  // nv
  127   reg_def R22_H( SOC, SOE, Op_RegI, 99, R22->as_VMReg()->next());
  128   reg_def R23  ( SOC, SOE, Op_RegI, 23, R23->as_VMReg()        );  // nv
  129   reg_def R23_H( SOC, SOE, Op_RegI, 99, R23->as_VMReg()->next());
  130   reg_def R24  ( SOC, SOE, Op_RegI, 24, R24->as_VMReg()        );  // nv
  131   reg_def R24_H( SOC, SOE, Op_RegI, 99, R24->as_VMReg()->next());
  132   reg_def R25  ( SOC, SOE, Op_RegI, 25, R25->as_VMReg()        );  // nv
  133   reg_def R25_H( SOC, SOE, Op_RegI, 99, R25->as_VMReg()->next());
  134   reg_def R26  ( SOC, SOE, Op_RegI, 26, R26->as_VMReg()        );  // nv
  135   reg_def R26_H( SOC, SOE, Op_RegI, 99, R26->as_VMReg()->next());
  136   reg_def R27  ( SOC, SOE, Op_RegI, 27, R27->as_VMReg()        );  // nv
  137   reg_def R27_H( SOC, SOE, Op_RegI, 99, R27->as_VMReg()->next());
  138   reg_def R28  ( SOC, SOE, Op_RegI, 28, R28->as_VMReg()        );  // nv
  139   reg_def R28_H( SOC, SOE, Op_RegI, 99, R28->as_VMReg()->next());
  140   reg_def R29  ( SOC, SOE, Op_RegI, 29, R29->as_VMReg()        );  // nv
  141   reg_def R29_H( SOC, SOE, Op_RegI, 99, R29->as_VMReg()->next());
  142   reg_def R30  ( SOC, SOE, Op_RegI, 30, R30->as_VMReg()        );  // nv
  143   reg_def R30_H( SOC, SOE, Op_RegI, 99, R30->as_VMReg()->next());
  144   reg_def R31  ( SOC, SOE, Op_RegI, 31, R31->as_VMReg()        );  // nv
  145   reg_def R31_H( SOC, SOE, Op_RegI, 99, R31->as_VMReg()->next());
  146 
  147 
  148 // ----------------------------
  149 // Float/Double Registers
  150 // ----------------------------
  151 
  152   // Double Registers
  153   // The rules of ADL require that double registers be defined in pairs.
  154   // Each pair must be two 32-bit values, but not necessarily a pair of
  155   // single float registers. In each pair, ADLC-assigned register numbers
  156   // must be adjacent, with the lower number even. Finally, when the
  157   // CPU stores such a register pair to memory, the word associated with
  158   // the lower ADLC-assigned number must be stored to the lower address.
  159 
  160   // PPC64 has 32 64-bit floating-point registers. Each can store a single
  161   // or double precision floating-point value.
  162 
  163   // types: v = volatile, nv = non-volatile, s = system
  164   reg_def F0   ( SOC, SOC, Op_RegF,  0, F0->as_VMReg()         );  // v   scratch
  165   reg_def F0_H ( SOC, SOC, Op_RegF, 99, F0->as_VMReg()->next() );
  166   reg_def F1   ( SOC, SOC, Op_RegF,  1, F1->as_VMReg()         );  // v   farg1 & fret
  167   reg_def F1_H ( SOC, SOC, Op_RegF, 99, F1->as_VMReg()->next() );
  168   reg_def F2   ( SOC, SOC, Op_RegF,  2, F2->as_VMReg()         );  // v   farg2
  169   reg_def F2_H ( SOC, SOC, Op_RegF, 99, F2->as_VMReg()->next() );
  170   reg_def F3   ( SOC, SOC, Op_RegF,  3, F3->as_VMReg()         );  // v   farg3
  171   reg_def F3_H ( SOC, SOC, Op_RegF, 99, F3->as_VMReg()->next() );
  172   reg_def F4   ( SOC, SOC, Op_RegF,  4, F4->as_VMReg()         );  // v   farg4
  173   reg_def F4_H ( SOC, SOC, Op_RegF, 99, F4->as_VMReg()->next() );
  174   reg_def F5   ( SOC, SOC, Op_RegF,  5, F5->as_VMReg()         );  // v   farg5
  175   reg_def F5_H ( SOC, SOC, Op_RegF, 99, F5->as_VMReg()->next() );
  176   reg_def F6   ( SOC, SOC, Op_RegF,  6, F6->as_VMReg()         );  // v   farg6
  177   reg_def F6_H ( SOC, SOC, Op_RegF, 99, F6->as_VMReg()->next() );
  178   reg_def F7   ( SOC, SOC, Op_RegF,  7, F7->as_VMReg()         );  // v   farg7
  179   reg_def F7_H ( SOC, SOC, Op_RegF, 99, F7->as_VMReg()->next() );
  180   reg_def F8   ( SOC, SOC, Op_RegF,  8, F8->as_VMReg()         );  // v   farg8
  181   reg_def F8_H ( SOC, SOC, Op_RegF, 99, F8->as_VMReg()->next() );
  182   reg_def F9   ( SOC, SOC, Op_RegF,  9, F9->as_VMReg()         );  // v   farg9
  183   reg_def F9_H ( SOC, SOC, Op_RegF, 99, F9->as_VMReg()->next() );
  184   reg_def F10  ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()        );  // v   farg10
  185   reg_def F10_H( SOC, SOC, Op_RegF, 99, F10->as_VMReg()->next());
  186   reg_def F11  ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()        );  // v   farg11
  187   reg_def F11_H( SOC, SOC, Op_RegF, 99, F11->as_VMReg()->next());
  188   reg_def F12  ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()        );  // v   farg12
  189   reg_def F12_H( SOC, SOC, Op_RegF, 99, F12->as_VMReg()->next());
  190   reg_def F13  ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()        );  // v   farg13
  191   reg_def F13_H( SOC, SOC, Op_RegF, 99, F13->as_VMReg()->next());
  192   reg_def F14  ( SOC, SOE, Op_RegF, 14, F14->as_VMReg()        );  // nv
  193   reg_def F14_H( SOC, SOE, Op_RegF, 99, F14->as_VMReg()->next());
  194   reg_def F15  ( SOC, SOE, Op_RegF, 15, F15->as_VMReg()        );  // nv
  195   reg_def F15_H( SOC, SOE, Op_RegF, 99, F15->as_VMReg()->next());
  196   reg_def F16  ( SOC, SOE, Op_RegF, 16, F16->as_VMReg()        );  // nv
  197   reg_def F16_H( SOC, SOE, Op_RegF, 99, F16->as_VMReg()->next());
  198   reg_def F17  ( SOC, SOE, Op_RegF, 17, F17->as_VMReg()        );  // nv
  199   reg_def F17_H( SOC, SOE, Op_RegF, 99, F17->as_VMReg()->next());
  200   reg_def F18  ( SOC, SOE, Op_RegF, 18, F18->as_VMReg()        );  // nv
  201   reg_def F18_H( SOC, SOE, Op_RegF, 99, F18->as_VMReg()->next());
  202   reg_def F19  ( SOC, SOE, Op_RegF, 19, F19->as_VMReg()        );  // nv
  203   reg_def F19_H( SOC, SOE, Op_RegF, 99, F19->as_VMReg()->next());
  204   reg_def F20  ( SOC, SOE, Op_RegF, 20, F20->as_VMReg()        );  // nv
  205   reg_def F20_H( SOC, SOE, Op_RegF, 99, F20->as_VMReg()->next());
  206   reg_def F21  ( SOC, SOE, Op_RegF, 21, F21->as_VMReg()        );  // nv
  207   reg_def F21_H( SOC, SOE, Op_RegF, 99, F21->as_VMReg()->next());
  208   reg_def F22  ( SOC, SOE, Op_RegF, 22, F22->as_VMReg()        );  // nv
  209   reg_def F22_H( SOC, SOE, Op_RegF, 99, F22->as_VMReg()->next());
  210   reg_def F23  ( SOC, SOE, Op_RegF, 23, F23->as_VMReg()        );  // nv
  211   reg_def F23_H( SOC, SOE, Op_RegF, 99, F23->as_VMReg()->next());
  212   reg_def F24  ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()        );  // nv
  213   reg_def F24_H( SOC, SOE, Op_RegF, 99, F24->as_VMReg()->next());
  214   reg_def F25  ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()        );  // nv
  215   reg_def F25_H( SOC, SOE, Op_RegF, 99, F25->as_VMReg()->next());
  216   reg_def F26  ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()        );  // nv
  217   reg_def F26_H( SOC, SOE, Op_RegF, 99, F26->as_VMReg()->next());
  218   reg_def F27  ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()        );  // nv
  219   reg_def F27_H( SOC, SOE, Op_RegF, 99, F27->as_VMReg()->next());
  220   reg_def F28  ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()        );  // nv
  221   reg_def F28_H( SOC, SOE, Op_RegF, 99, F28->as_VMReg()->next());
  222   reg_def F29  ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()        );  // nv
  223   reg_def F29_H( SOC, SOE, Op_RegF, 99, F29->as_VMReg()->next());
  224   reg_def F30  ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()        );  // nv
  225   reg_def F30_H( SOC, SOE, Op_RegF, 99, F30->as_VMReg()->next());
  226   reg_def F31  ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()        );  // nv
  227   reg_def F31_H( SOC, SOE, Op_RegF, 99, F31->as_VMReg()->next());
  228 
  229 // ----------------------------
  230 // Special Registers
  231 // ----------------------------
  232 
  233 // Condition Codes Flag Registers
  234 
  235   // PPC64 has 8 condition code "registers" which are all contained
  236   // in the CR register.
  237 
  238   // types: v = volatile, nv = non-volatile, s = system
  239   reg_def CCR0(SOC, SOC, Op_RegFlags, 0, CCR0->as_VMReg());  // v
  240   reg_def CCR1(SOC, SOC, Op_RegFlags, 1, CCR1->as_VMReg());  // v
  241   reg_def CCR2(SOC, SOC, Op_RegFlags, 2, CCR2->as_VMReg());  // nv
  242   reg_def CCR3(SOC, SOC, Op_RegFlags, 3, CCR3->as_VMReg());  // nv
  243   reg_def CCR4(SOC, SOC, Op_RegFlags, 4, CCR4->as_VMReg());  // nv
  244   reg_def CCR5(SOC, SOC, Op_RegFlags, 5, CCR5->as_VMReg());  // v
  245   reg_def CCR6(SOC, SOC, Op_RegFlags, 6, CCR6->as_VMReg());  // v
  246   reg_def CCR7(SOC, SOC, Op_RegFlags, 7, CCR7->as_VMReg());  // v
  247 
  248   // Special registers of PPC64
  249 
  250   reg_def SR_XER(    SOC, SOC, Op_RegP, 0, SR_XER->as_VMReg());     // v
  251   reg_def SR_LR(     SOC, SOC, Op_RegP, 1, SR_LR->as_VMReg());      // v
  252   reg_def SR_CTR(    SOC, SOC, Op_RegP, 2, SR_CTR->as_VMReg());     // v
  253   reg_def SR_VRSAVE( SOC, SOC, Op_RegP, 3, SR_VRSAVE->as_VMReg());  // v
  254   reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v
  255   reg_def SR_PPR(    SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg());     // v
  256 
  257 // ----------------------------
  258 // Vector-Scalar Registers
  259 // ----------------------------
  260   // 1st 32 VSRs are aliases for the FPRs which are already defined above.
  261   reg_def VSR0 ( SOC, SOC, Op_VecX, 0, VMRegImpl::Bad());
  262   reg_def VSR1 ( SOC, SOC, Op_VecX, 1, VMRegImpl::Bad());
  263   reg_def VSR2 ( SOC, SOC, Op_VecX, 2, VMRegImpl::Bad());
  264   reg_def VSR3 ( SOC, SOC, Op_VecX, 3, VMRegImpl::Bad());
  265   reg_def VSR4 ( SOC, SOC, Op_VecX, 4, VMRegImpl::Bad());
  266   reg_def VSR5 ( SOC, SOC, Op_VecX, 5, VMRegImpl::Bad());
  267   reg_def VSR6 ( SOC, SOC, Op_VecX, 6, VMRegImpl::Bad());
  268   reg_def VSR7 ( SOC, SOC, Op_VecX, 7, VMRegImpl::Bad());
  269   reg_def VSR8 ( SOC, SOC, Op_VecX, 8, VMRegImpl::Bad());
  270   reg_def VSR9 ( SOC, SOC, Op_VecX, 9, VMRegImpl::Bad());
  271   reg_def VSR10 ( SOC, SOC, Op_VecX, 10, VMRegImpl::Bad());
  272   reg_def VSR11 ( SOC, SOC, Op_VecX, 11, VMRegImpl::Bad());
  273   reg_def VSR12 ( SOC, SOC, Op_VecX, 12, VMRegImpl::Bad());
  274   reg_def VSR13 ( SOC, SOC, Op_VecX, 13, VMRegImpl::Bad());
  275   reg_def VSR14 ( SOC, SOE, Op_VecX, 14, VMRegImpl::Bad());
  276   reg_def VSR15 ( SOC, SOE, Op_VecX, 15, VMRegImpl::Bad());
  277   reg_def VSR16 ( SOC, SOE, Op_VecX, 16, VMRegImpl::Bad());
  278   reg_def VSR17 ( SOC, SOE, Op_VecX, 17, VMRegImpl::Bad());
  279   reg_def VSR18 ( SOC, SOE, Op_VecX, 18, VMRegImpl::Bad());
  280   reg_def VSR19 ( SOC, SOE, Op_VecX, 19, VMRegImpl::Bad());
  281   reg_def VSR20 ( SOC, SOE, Op_VecX, 20, VMRegImpl::Bad());
  282   reg_def VSR21 ( SOC, SOE, Op_VecX, 21, VMRegImpl::Bad());
  283   reg_def VSR22 ( SOC, SOE, Op_VecX, 22, VMRegImpl::Bad());
  284   reg_def VSR23 ( SOC, SOE, Op_VecX, 23, VMRegImpl::Bad());
  285   reg_def VSR24 ( SOC, SOE, Op_VecX, 24, VMRegImpl::Bad());
  286   reg_def VSR25 ( SOC, SOE, Op_VecX, 25, VMRegImpl::Bad());
  287   reg_def VSR26 ( SOC, SOE, Op_VecX, 26, VMRegImpl::Bad());
  288   reg_def VSR27 ( SOC, SOE, Op_VecX, 27, VMRegImpl::Bad());
  289   reg_def VSR28 ( SOC, SOE, Op_VecX, 28, VMRegImpl::Bad());
  290   reg_def VSR29 ( SOC, SOE, Op_VecX, 29, VMRegImpl::Bad());
  291   reg_def VSR30 ( SOC, SOE, Op_VecX, 30, VMRegImpl::Bad());
  292   reg_def VSR31 ( SOC, SOE, Op_VecX, 31, VMRegImpl::Bad());
  293   // 2nd 32 VSRs are aliases for the VRs which are only defined here.
  294   reg_def VSR32 ( SOC, SOC, Op_VecX, 32, VSR32->as_VMReg());
  295   reg_def VSR33 ( SOC, SOC, Op_VecX, 33, VSR33->as_VMReg());
  296   reg_def VSR34 ( SOC, SOC, Op_VecX, 34, VSR34->as_VMReg());
  297   reg_def VSR35 ( SOC, SOC, Op_VecX, 35, VSR35->as_VMReg());
  298   reg_def VSR36 ( SOC, SOC, Op_VecX, 36, VSR36->as_VMReg());
  299   reg_def VSR37 ( SOC, SOC, Op_VecX, 37, VSR37->as_VMReg());
  300   reg_def VSR38 ( SOC, SOC, Op_VecX, 38, VSR38->as_VMReg());
  301   reg_def VSR39 ( SOC, SOC, Op_VecX, 39, VSR39->as_VMReg());
  302   reg_def VSR40 ( SOC, SOC, Op_VecX, 40, VSR40->as_VMReg());
  303   reg_def VSR41 ( SOC, SOC, Op_VecX, 41, VSR41->as_VMReg());
  304   reg_def VSR42 ( SOC, SOC, Op_VecX, 42, VSR42->as_VMReg());
  305   reg_def VSR43 ( SOC, SOC, Op_VecX, 43, VSR43->as_VMReg());
  306   reg_def VSR44 ( SOC, SOC, Op_VecX, 44, VSR44->as_VMReg());
  307   reg_def VSR45 ( SOC, SOC, Op_VecX, 45, VSR45->as_VMReg());
  308   reg_def VSR46 ( SOC, SOC, Op_VecX, 46, VSR46->as_VMReg());
  309   reg_def VSR47 ( SOC, SOC, Op_VecX, 47, VSR47->as_VMReg());
  310   reg_def VSR48 ( SOC, SOC, Op_VecX, 48, VSR48->as_VMReg());
  311   reg_def VSR49 ( SOC, SOC, Op_VecX, 49, VSR49->as_VMReg());
  312   reg_def VSR50 ( SOC, SOC, Op_VecX, 50, VSR50->as_VMReg());
  313   reg_def VSR51 ( SOC, SOC, Op_VecX, 51, VSR51->as_VMReg());
  314   reg_def VSR52 ( SOC, SOE, Op_VecX, 52, VSR52->as_VMReg());
  315   reg_def VSR53 ( SOC, SOE, Op_VecX, 53, VSR53->as_VMReg());
  316   reg_def VSR54 ( SOC, SOE, Op_VecX, 54, VSR54->as_VMReg());
  317   reg_def VSR55 ( SOC, SOE, Op_VecX, 55, VSR55->as_VMReg());
  318   reg_def VSR56 ( SOC, SOE, Op_VecX, 56, VSR56->as_VMReg());
  319   reg_def VSR57 ( SOC, SOE, Op_VecX, 57, VSR57->as_VMReg());
  320   reg_def VSR58 ( SOC, SOE, Op_VecX, 58, VSR58->as_VMReg());
  321   reg_def VSR59 ( SOC, SOE, Op_VecX, 59, VSR59->as_VMReg());
  322   reg_def VSR60 ( SOC, SOE, Op_VecX, 60, VSR60->as_VMReg());
  323   reg_def VSR61 ( SOC, SOE, Op_VecX, 61, VSR61->as_VMReg());
  324   reg_def VSR62 ( SOC, SOE, Op_VecX, 62, VSR62->as_VMReg());
  325   reg_def VSR63 ( SOC, SOE, Op_VecX, 63, VSR63->as_VMReg());
  326 
  327 // ----------------------------
  328 // Specify priority of register selection within phases of register
  329 // allocation. Highest priority is first. A useful heuristic is to
  330 // give registers a low priority when they are required by machine
  331 // instructions, like EAX and EDX on I486, and choose no-save registers
  332 // before save-on-call, & save-on-call before save-on-entry. Registers
  333 // which participate in fixed calling sequences should come last.
  334 // Registers which are used as pairs must fall on an even boundary.
  335 
  336 // It's worth about 1% on SPEC geomean to get this right.
  337 
  338 // Chunk0, chunk1, and chunk2 form the MachRegisterNumbers enumeration
  339 // in adGlobals_ppc.hpp which defines the <register>_num values, e.g.
  340 // R3_num. Therefore, R3_num may not be (and in reality is not)
  341 // the same as R3->encoding()! Furthermore, we cannot make any
  342 // assumptions on ordering, e.g. R3_num may be less than R2_num.
  343 // Additionally, the function
  344 //   static enum RC rc_class(OptoReg::Name reg )
  345 // maps a given <register>_num value to its chunk type (except for flags)
  346 // and its current implementation relies on chunk0 and chunk1 having a
  347 // size of 64 each.
  348 
  349 // If you change this allocation class, please have a look at the
  350 // default values for the parameters RoundRobinIntegerRegIntervalStart
  351 // and RoundRobinFloatRegIntervalStart
  352 
  353 alloc_class chunk0 (
  354   // Chunk0 contains *all* 64 integer registers halves.
  355 
  356   // "non-volatile" registers
  357   R14, R14_H,
  358   R15, R15_H,
  359   R17, R17_H,
  360   R18, R18_H,
  361   R19, R19_H,
  362   R20, R20_H,
  363   R21, R21_H,
  364   R22, R22_H,
  365   R23, R23_H,
  366   R24, R24_H,
  367   R25, R25_H,
  368   R26, R26_H,
  369   R27, R27_H,
  370   R28, R28_H,
  371   R29, R29_H,
  372   R30, R30_H,
  373   R31, R31_H,
  374 
  375   // scratch/special registers
  376   R11, R11_H,
  377   R12, R12_H,
  378 
  379   // argument registers
  380   R10, R10_H,
  381   R9,  R9_H,
  382   R8,  R8_H,
  383   R7,  R7_H,
  384   R6,  R6_H,
  385   R5,  R5_H,
  386   R4,  R4_H,
  387   R3,  R3_H,
  388 
  389   // special registers, not available for allocation
  390   R16, R16_H,     // R16_thread
  391   R13, R13_H,     // system thread id
  392   R2,  R2_H,      // may be used for TOC
  393   R1,  R1_H,      // SP
  394   R0,  R0_H       // R0 (scratch)
  395 );
  396 
  397 // If you change this allocation class, please have a look at the
  398 // default values for the parameters RoundRobinIntegerRegIntervalStart
  399 // and RoundRobinFloatRegIntervalStart
  400 
  401 alloc_class chunk1 (
  402   // Chunk1 contains *all* 64 floating-point registers halves.
  403 
  404   // scratch register
  405   F0,  F0_H,
  406 
  407   // argument registers
  408   F13, F13_H,
  409   F12, F12_H,
  410   F11, F11_H,
  411   F10, F10_H,
  412   F9,  F9_H,
  413   F8,  F8_H,
  414   F7,  F7_H,
  415   F6,  F6_H,
  416   F5,  F5_H,
  417   F4,  F4_H,
  418   F3,  F3_H,
  419   F2,  F2_H,
  420   F1,  F1_H,
  421 
  422   // non-volatile registers
  423   F14, F14_H,
  424   F15, F15_H,
  425   F16, F16_H,
  426   F17, F17_H,
  427   F18, F18_H,
  428   F19, F19_H,
  429   F20, F20_H,
  430   F21, F21_H,
  431   F22, F22_H,
  432   F23, F23_H,
  433   F24, F24_H,
  434   F25, F25_H,
  435   F26, F26_H,
  436   F27, F27_H,
  437   F28, F28_H,
  438   F29, F29_H,
  439   F30, F30_H,
  440   F31, F31_H
  441 );
  442 
  443 alloc_class chunk2 (
  444   // Chunk2 contains *all* 8 condition code registers.
  445 
  446   CCR0,
  447   CCR1,
  448   CCR2,
  449   CCR3,
  450   CCR4,
  451   CCR5,
  452   CCR6,
  453   CCR7
  454 );
  455 
  456 alloc_class chunk3 (
  457   VSR0,
  458   VSR1,
  459   VSR2,
  460   VSR3,
  461   VSR4,
  462   VSR5,
  463   VSR6,
  464   VSR7,
  465   VSR8,
  466   VSR9,
  467   VSR10,
  468   VSR11,
  469   VSR12,
  470   VSR13,
  471   VSR14,
  472   VSR15,
  473   VSR16,
  474   VSR17,
  475   VSR18,
  476   VSR19,
  477   VSR20,
  478   VSR21,
  479   VSR22,
  480   VSR23,
  481   VSR24,
  482   VSR25,
  483   VSR26,
  484   VSR27,
  485   VSR28,
  486   VSR29,
  487   VSR30,
  488   VSR31,
  489   VSR32,
  490   VSR33,
  491   VSR34,
  492   VSR35,
  493   VSR36,
  494   VSR37,
  495   VSR38,
  496   VSR39,
  497   VSR40,
  498   VSR41,
  499   VSR42,
  500   VSR43,
  501   VSR44,
  502   VSR45,
  503   VSR46,
  504   VSR47,
  505   VSR48,
  506   VSR49,
  507   VSR50,
  508   VSR51,
  509   VSR52,
  510   VSR53,
  511   VSR54,
  512   VSR55,
  513   VSR56,
  514   VSR57,
  515   VSR58,
  516   VSR59,
  517   VSR60,
  518   VSR61,
  519   VSR62,
  520   VSR63
  521 );
  522 
  523 alloc_class chunk4 (
  524   // special registers
  525   // These registers are not allocated, but used for nodes generated by postalloc expand.
  526   SR_XER,
  527   SR_LR,
  528   SR_CTR,
  529   SR_VRSAVE,
  530   SR_SPEFSCR,
  531   SR_PPR
  532 );
  533 
  534 //-------Architecture Description Register Classes-----------------------
  535 
  536 // Several register classes are automatically defined based upon
  537 // information in this architecture description.
  538 
  539 // 1) reg_class inline_cache_reg           ( as defined in frame section )
  540 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  541 //
  542 
  543 // ----------------------------
  544 // 32 Bit Register Classes
  545 // ----------------------------
  546 
  547 // We specify registers twice, once as read/write, and once read-only.
  548 // We use the read-only registers for source operands. With this, we
  549 // can include preset read only registers in this class, as a hard-coded
  550 // '0'-register. (We used to simulate this on ppc.)
  551 
  552 // 32 bit registers that can be read and written i.e. these registers
  553 // can be dest (or src) of normal instructions.
  554 reg_class bits32_reg_rw(
  555 /*R0*/              // R0
  556 /*R1*/              // SP
  557   R2,               // TOC
  558   R3,
  559   R4,
  560   R5,
  561   R6,
  562   R7,
  563   R8,
  564   R9,
  565   R10,
  566   R11,
  567   R12,
  568 /*R13*/             // system thread id
  569   R14,
  570   R15,
  571 /*R16*/             // R16_thread
  572   R17,
  573   R18,
  574   R19,
  575   R20,
  576   R21,
  577   R22,
  578   R23,
  579   R24,
  580   R25,
  581   R26,
  582   R27,
  583   R28,
  584 /*R29,*/             // global TOC
  585   R30,
  586   R31
  587 );
  588 
  589 // 32 bit registers that can only be read i.e. these registers can
  590 // only be src of all instructions.
  591 reg_class bits32_reg_ro(
  592 /*R0*/              // R0
  593 /*R1*/              // SP
  594   R2                // TOC
  595   R3,
  596   R4,
  597   R5,
  598   R6,
  599   R7,
  600   R8,
  601   R9,
  602   R10,
  603   R11,
  604   R12,
  605 /*R13*/             // system thread id
  606   R14,
  607   R15,
  608 /*R16*/             // R16_thread
  609   R17,
  610   R18,
  611   R19,
  612   R20,
  613   R21,
  614   R22,
  615   R23,
  616   R24,
  617   R25,
  618   R26,
  619   R27,
  620   R28,
  621 /*R29,*/
  622   R30,
  623   R31
  624 );
  625 
  626 reg_class rscratch1_bits32_reg(R11);
  627 reg_class rscratch2_bits32_reg(R12);
  628 reg_class rarg1_bits32_reg(R3);
  629 reg_class rarg2_bits32_reg(R4);
  630 reg_class rarg3_bits32_reg(R5);
  631 reg_class rarg4_bits32_reg(R6);
  632 
  633 // ----------------------------
  634 // 64 Bit Register Classes
  635 // ----------------------------
  636 // 64-bit build means 64-bit pointers means hi/lo pairs
  637 
  638 reg_class rscratch1_bits64_reg(R11_H, R11);
  639 reg_class rscratch2_bits64_reg(R12_H, R12);
  640 reg_class rarg1_bits64_reg(R3_H, R3);
  641 reg_class rarg2_bits64_reg(R4_H, R4);
  642 reg_class rarg3_bits64_reg(R5_H, R5);
  643 reg_class rarg4_bits64_reg(R6_H, R6);
  644 // Thread register, 'written' by tlsLoadP, see there.
  645 reg_class thread_bits64_reg(R16_H, R16);
  646 
  647 reg_class r19_bits64_reg(R19_H, R19);
  648 
  649 // 64 bit registers that can be read and written i.e. these registers
  650 // can be dest (or src) of normal instructions.
  651 reg_class bits64_reg_rw(
  652 /*R0_H,  R0*/     // R0
  653 /*R1_H,  R1*/     // SP
  654   R2_H,  R2,      // TOC
  655   R3_H,  R3,
  656   R4_H,  R4,
  657   R5_H,  R5,
  658   R6_H,  R6,
  659   R7_H,  R7,
  660   R8_H,  R8,
  661   R9_H,  R9,
  662   R10_H, R10,
  663   R11_H, R11,
  664   R12_H, R12,
  665 /*R13_H, R13*/   // system thread id
  666   R14_H, R14,
  667   R15_H, R15,
  668 /*R16_H, R16*/   // R16_thread
  669   R17_H, R17,
  670   R18_H, R18,
  671   R19_H, R19,
  672   R20_H, R20,
  673   R21_H, R21,
  674   R22_H, R22,
  675   R23_H, R23,
  676   R24_H, R24,
  677   R25_H, R25,
  678   R26_H, R26,
  679   R27_H, R27,
  680   R28_H, R28,
  681 /*R29_H, R29,*/
  682   R30_H, R30,
  683   R31_H, R31
  684 );
  685 
  686 // 64 bit registers used excluding r2, r11 and r12
  687 // Used to hold the TOC to avoid collisions with expanded LeafCall which uses
  688 // r2, r11 and r12 internally.
  689 reg_class bits64_reg_leaf_call(
  690 /*R0_H,  R0*/     // R0
  691 /*R1_H,  R1*/     // SP
  692 /*R2_H,  R2*/     // TOC
  693   R3_H,  R3,
  694   R4_H,  R4,
  695   R5_H,  R5,
  696   R6_H,  R6,
  697   R7_H,  R7,
  698   R8_H,  R8,
  699   R9_H,  R9,
  700   R10_H, R10,
  701 /*R11_H, R11*/
  702 /*R12_H, R12*/
  703 /*R13_H, R13*/   // system thread id
  704   R14_H, R14,
  705   R15_H, R15,
  706 /*R16_H, R16*/   // R16_thread
  707   R17_H, R17,
  708   R18_H, R18,
  709   R19_H, R19,
  710   R20_H, R20,
  711   R21_H, R21,
  712   R22_H, R22,
  713   R23_H, R23,
  714   R24_H, R24,
  715   R25_H, R25,
  716   R26_H, R26,
  717   R27_H, R27,
  718   R28_H, R28,
  719 /*R29_H, R29,*/
  720   R30_H, R30,
  721   R31_H, R31
  722 );
  723 
  724 // Used to hold the TOC to avoid collisions with expanded DynamicCall
  725 // which uses r19 as inline cache internally and expanded LeafCall which uses
  726 // r2, r11 and r12 internally.
  727 reg_class bits64_constant_table_base(
  728 /*R0_H,  R0*/     // R0
  729 /*R1_H,  R1*/     // SP
  730 /*R2_H,  R2*/     // TOC
  731   R3_H,  R3,
  732   R4_H,  R4,
  733   R5_H,  R5,
  734   R6_H,  R6,
  735   R7_H,  R7,
  736   R8_H,  R8,
  737   R9_H,  R9,
  738   R10_H, R10,
  739 /*R11_H, R11*/
  740 /*R12_H, R12*/
  741 /*R13_H, R13*/   // system thread id
  742   R14_H, R14,
  743   R15_H, R15,
  744 /*R16_H, R16*/   // R16_thread
  745   R17_H, R17,
  746   R18_H, R18,
  747 /*R19_H, R19*/
  748   R20_H, R20,
  749   R21_H, R21,
  750   R22_H, R22,
  751   R23_H, R23,
  752   R24_H, R24,
  753   R25_H, R25,
  754   R26_H, R26,
  755   R27_H, R27,
  756   R28_H, R28,
  757 /*R29_H, R29,*/
  758   R30_H, R30,
  759   R31_H, R31
  760 );
  761 
  762 // 64 bit registers that can only be read i.e. these registers can
  763 // only be src of all instructions.
  764 reg_class bits64_reg_ro(
  765 /*R0_H,  R0*/     // R0
  766   R1_H,  R1,
  767   R2_H,  R2,       // TOC
  768   R3_H,  R3,
  769   R4_H,  R4,
  770   R5_H,  R5,
  771   R6_H,  R6,
  772   R7_H,  R7,
  773   R8_H,  R8,
  774   R9_H,  R9,
  775   R10_H, R10,
  776   R11_H, R11,
  777   R12_H, R12,
  778 /*R13_H, R13*/   // system thread id
  779   R14_H, R14,
  780   R15_H, R15,
  781   R16_H, R16,    // R16_thread
  782   R17_H, R17,
  783   R18_H, R18,
  784   R19_H, R19,
  785   R20_H, R20,
  786   R21_H, R21,
  787   R22_H, R22,
  788   R23_H, R23,
  789   R24_H, R24,
  790   R25_H, R25,
  791   R26_H, R26,
  792   R27_H, R27,
  793   R28_H, R28,
  794 /*R29_H, R29,*/ // TODO: let allocator handle TOC!!
  795   R30_H, R30,
  796   R31_H, R31
  797 );
  798 
  799 
  800 // ----------------------------
  801 // Special Class for Condition Code Flags Register
  802 
  803 reg_class int_flags(
  804 /*CCR0*/             // scratch
  805 /*CCR1*/             // scratch
  806 /*CCR2*/             // nv!
  807 /*CCR3*/             // nv!
  808 /*CCR4*/             // nv!
  809   CCR5,
  810   CCR6,
  811   CCR7
  812 );
  813 
  814 reg_class int_flags_ro(
  815   CCR0,
  816   CCR1,
  817   CCR2,
  818   CCR3,
  819   CCR4,
  820   CCR5,
  821   CCR6,
  822   CCR7
  823 );
  824 
  825 reg_class int_flags_CR0(CCR0);
  826 reg_class int_flags_CR1(CCR1);
  827 reg_class int_flags_CR6(CCR6);
  828 reg_class ctr_reg(SR_CTR);
  829 
  830 // ----------------------------
  831 // Float Register Classes
  832 // ----------------------------
  833 
  834 reg_class flt_reg(
  835   F0,
  836   F1,
  837   F2,
  838   F3,
  839   F4,
  840   F5,
  841   F6,
  842   F7,
  843   F8,
  844   F9,
  845   F10,
  846   F11,
  847   F12,
  848   F13,
  849   F14,              // nv!
  850   F15,              // nv!
  851   F16,              // nv!
  852   F17,              // nv!
  853   F18,              // nv!
  854   F19,              // nv!
  855   F20,              // nv!
  856   F21,              // nv!
  857   F22,              // nv!
  858   F23,              // nv!
  859   F24,              // nv!
  860   F25,              // nv!
  861   F26,              // nv!
  862   F27,              // nv!
  863   F28,              // nv!
  864   F29,              // nv!
  865   F30,              // nv!
  866   F31               // nv!
  867 );
  868 
  869 // Double precision float registers have virtual `high halves' that
  870 // are needed by the allocator.
  871 reg_class dbl_reg(
  872   F0,  F0_H,
  873   F1,  F1_H,
  874   F2,  F2_H,
  875   F3,  F3_H,
  876   F4,  F4_H,
  877   F5,  F5_H,
  878   F6,  F6_H,
  879   F7,  F7_H,
  880   F8,  F8_H,
  881   F9,  F9_H,
  882   F10, F10_H,
  883   F11, F11_H,
  884   F12, F12_H,
  885   F13, F13_H,
  886   F14, F14_H,    // nv!
  887   F15, F15_H,    // nv!
  888   F16, F16_H,    // nv!
  889   F17, F17_H,    // nv!
  890   F18, F18_H,    // nv!
  891   F19, F19_H,    // nv!
  892   F20, F20_H,    // nv!
  893   F21, F21_H,    // nv!
  894   F22, F22_H,    // nv!
  895   F23, F23_H,    // nv!
  896   F24, F24_H,    // nv!
  897   F25, F25_H,    // nv!
  898   F26, F26_H,    // nv!
  899   F27, F27_H,    // nv!
  900   F28, F28_H,    // nv!
  901   F29, F29_H,    // nv!
  902   F30, F30_H,    // nv!
  903   F31, F31_H     // nv!
  904 );
  905 
  906 // ----------------------------
  907 // Vector-Scalar Register Class
  908 // ----------------------------
  909 
  910 reg_class vs_reg(
  911   // Attention: Only these ones are saved & restored at safepoint by RegisterSaver.
  912   VSR32,
  913   VSR33,
  914   VSR34,
  915   VSR35,
  916   VSR36,
  917   VSR37,
  918   VSR38,
  919   VSR39,
  920   VSR40,
  921   VSR41,
  922   VSR42,
  923   VSR43,
  924   VSR44,
  925   VSR45,
  926   VSR46,
  927   VSR47,
  928   VSR48,
  929   VSR49,
  930   VSR50,
  931   VSR51
  932   // VSR52-VSR63 // nv!
  933 );
  934 
  935  %}
  936 
  937 //----------DEFINITION BLOCK---------------------------------------------------
  938 // Define name --> value mappings to inform the ADLC of an integer valued name
  939 // Current support includes integer values in the range [0, 0x7FFFFFFF]
  940 // Format:
  941 //        int_def  <name>         ( <int_value>, <expression>);
  942 // Generated Code in ad_<arch>.hpp
  943 //        #define  <name>   (<expression>)
  944 //        // value == <int_value>
  945 // Generated code in ad_<arch>.cpp adlc_verification()
  946 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
  947 //
  948 definitions %{
  949   // The default cost (of an ALU instruction).
  950   int_def DEFAULT_COST_LOW        (     30,      30);
  951   int_def DEFAULT_COST            (    100,     100);
  952   int_def HUGE_COST               (1000000, 1000000);
  953 
  954   // Memory refs
  955   int_def MEMORY_REF_COST_LOW     (    200, DEFAULT_COST * 2);
  956   int_def MEMORY_REF_COST         (    300, DEFAULT_COST * 3);
  957 
  958   // Branches are even more expensive.
  959   int_def BRANCH_COST             (    900, DEFAULT_COST * 9);
  960   int_def CALL_COST               (   1300, DEFAULT_COST * 13);
  961 %}
  962 
  963 
  964 //----------SOURCE BLOCK-------------------------------------------------------
  965 // This is a block of C++ code which provides values, functions, and
  966 // definitions necessary in the rest of the architecture description.
  967 source_hpp %{
  968   // Header information of the source block.
  969   // Method declarations/definitions which are used outside
  970   // the ad-scope can conveniently be defined here.
  971   //
  972   // To keep related declarations/definitions/uses close together,
  973   // we switch between source %{ }% and source_hpp %{ }% freely as needed.
  974 
  975 #include "opto/convertnode.hpp"
  976 
  977   // Returns true if Node n is followed by a MemBar node that
  978   // will do an acquire. If so, this node must not do the acquire
  979   // operation.
  980   bool followed_by_acquire(const Node *n);
  981 %}
  982 
  983 source %{
  984 
  985 #include "opto/c2_CodeStubs.hpp"
  986 #include "oops/klass.inline.hpp"
  987 
  988 void PhaseOutput::pd_perform_mach_node_analysis() {
  989 }
  990 
  991 int MachNode::pd_alignment_required() const {
  992   return 1;
  993 }
  994 
  995 int MachNode::compute_padding(int current_offset) const {
  996   return 0;
  997 }
  998 
  999 // Should the matcher clone input 'm' of node 'n'?
 1000 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 1001   return false;
 1002 }
 1003 
 1004 // Should the Matcher clone shifts on addressing modes, expecting them
 1005 // to be subsumed into complex addressing expressions or compute them
 1006 // into registers?
 1007 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 1008   return clone_base_plus_offset_address(m, mstack, address_visited);
 1009 }
 1010 
 1011 // Optimize load-acquire.
 1012 //
 1013 // Check if acquire is unnecessary due to following operation that does
 1014 // acquire anyways.
 1015 // Walk the pattern:
 1016 //
 1017 //      n: Load.acq
 1018 //           |
 1019 //      MemBarAcquire
 1020 //       |         |
 1021 //  Proj(ctrl)  Proj(mem)
 1022 //       |         |
 1023 //   MemBarRelease/Volatile
 1024 //
 1025 bool followed_by_acquire(const Node *load) {
 1026   assert(load->is_Load(), "So far implemented only for loads.");
 1027 
 1028   // Find MemBarAcquire.
 1029   const Node *mba = NULL;
 1030   for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) {
 1031     const Node *out = load->fast_out(i);
 1032     if (out->Opcode() == Op_MemBarAcquire) {
 1033       if (out->in(0) == load) continue; // Skip control edge, membar should be found via precedence edge.
 1034       mba = out;
 1035       break;
 1036     }
 1037   }
 1038   if (!mba) return false;
 1039 
 1040   // Find following MemBar node.
 1041   //
 1042   // The following node must be reachable by control AND memory
 1043   // edge to assure no other operations are in between the two nodes.
 1044   //
 1045   // So first get the Proj node, mem_proj, to use it to iterate forward.
 1046   Node *mem_proj = NULL;
 1047   for (DUIterator_Fast imax, i = mba->fast_outs(imax); i < imax; i++) {
 1048     mem_proj = mba->fast_out(i);      // Runs out of bounds and asserts if Proj not found.
 1049     assert(mem_proj->is_Proj(), "only projections here");
 1050     ProjNode *proj = mem_proj->as_Proj();
 1051     if (proj->_con == TypeFunc::Memory &&
 1052         !Compile::current()->node_arena()->contains(mem_proj)) // Unmatched old-space only
 1053       break;
 1054   }
 1055   assert(mem_proj->as_Proj()->_con == TypeFunc::Memory, "Graph broken");
 1056 
 1057   // Search MemBar behind Proj. If there are other memory operations
 1058   // behind the Proj we lost.
 1059   for (DUIterator_Fast jmax, j = mem_proj->fast_outs(jmax); j < jmax; j++) {
 1060     Node *x = mem_proj->fast_out(j);
 1061     // Proj might have an edge to a store or load node which precedes the membar.
 1062     if (x->is_Mem()) return false;
 1063 
 1064     // On PPC64 release and volatile are implemented by an instruction
 1065     // that also has acquire semantics. I.e. there is no need for an
 1066     // acquire before these.
 1067     int xop = x->Opcode();
 1068     if (xop == Op_MemBarRelease || xop == Op_MemBarVolatile) {
 1069       // Make sure we're not missing Call/Phi/MergeMem by checking
 1070       // control edges. The control edge must directly lead back
 1071       // to the MemBarAcquire
 1072       Node *ctrl_proj = x->in(0);
 1073       if (ctrl_proj->is_Proj() && ctrl_proj->in(0) == mba) {
 1074         return true;
 1075       }
 1076     }
 1077   }
 1078 
 1079   return false;
 1080 }
 1081 
 1082 #define __ _masm.
 1083 
 1084 // Tertiary op of a LoadP or StoreP encoding.
 1085 #define REGP_OP true
 1086 
 1087 // ****************************************************************************
 1088 
 1089 // REQUIRED FUNCTIONALITY
 1090 
 1091 // !!!!! Special hack to get all type of calls to specify the byte offset
 1092 //       from the start of the call to the point where the return address
 1093 //       will point.
 1094 
 1095 // PPC port: Removed use of lazy constant construct.
 1096 
 1097 int MachCallStaticJavaNode::ret_addr_offset() {
 1098   // It's only a single branch-and-link instruction.
 1099   return 4;
 1100 }
 1101 
 1102 int MachCallDynamicJavaNode::ret_addr_offset() {
 1103   // Offset is 4 with postalloc expanded calls (bl is one instruction). We use
 1104   // postalloc expanded calls if we use inline caches and do not update method data.
 1105   if (UseInlineCaches) return 4;
 1106 
 1107   int vtable_index = this->_vtable_index;
 1108   if (vtable_index < 0) {
 1109     // Must be invalid_vtable_index, not nonvirtual_vtable_index.
 1110     assert(vtable_index == Method::invalid_vtable_index, "correct sentinel value");
 1111     return 12;
 1112   } else {
 1113     return 24 + MacroAssembler::instr_size_for_decode_klass_not_null();
 1114   }
 1115 }
 1116 
 1117 int MachCallRuntimeNode::ret_addr_offset() {
 1118   if (rule() == CallRuntimeDirect_rule) {
 1119     // CallRuntimeDirectNode uses call_c.
 1120 #if defined(ABI_ELFv2)
 1121     return 28;
 1122 #else
 1123     return 40;
 1124 #endif
 1125   }
 1126   assert(rule() == CallLeafDirect_rule, "unexpected node with rule %u", rule());
 1127   // CallLeafDirectNode uses bl.
 1128   return 4;
 1129 }
 1130 
 1131 //=============================================================================
 1132 
 1133 // condition code conversions
 1134 
 1135 static int cc_to_boint(int cc) {
 1136   return Assembler::bcondCRbiIs0 | (cc & 8);
 1137 }
 1138 
 1139 static int cc_to_inverse_boint(int cc) {
 1140   return Assembler::bcondCRbiIs0 | (8-(cc & 8));
 1141 }
 1142 
 1143 static int cc_to_biint(int cc, int flags_reg) {
 1144   return (flags_reg << 2) | (cc & 3);
 1145 }
 1146 
 1147 //=============================================================================
 1148 
 1149 // Compute padding required for nodes which need alignment. The padding
 1150 // is the number of bytes (not instructions) which will be inserted before
 1151 // the instruction. The padding must match the size of a NOP instruction.
 1152 
 1153 // Add nop if a prefixed (two-word) instruction is going to cross a 64-byte boundary.
 1154 // (See Section 1.6 of Power ISA Version 3.1)
 1155 static int compute_prefix_padding(int current_offset) {
 1156   assert(PowerArchitecturePPC64 >= 10 && (CodeEntryAlignment & 63) == 0,
 1157          "Code buffer must be aligned to a multiple of 64 bytes");
 1158   if (is_aligned(current_offset + BytesPerInstWord, 64)) {
 1159     return BytesPerInstWord;
 1160   }
 1161   return 0;
 1162 }
 1163 
 1164 int loadConI32Node::compute_padding(int current_offset) const {
 1165   return compute_prefix_padding(current_offset);
 1166 }
 1167 
 1168 int loadConL34Node::compute_padding(int current_offset) const {
 1169   return compute_prefix_padding(current_offset);
 1170 }
 1171 
 1172 int addI_reg_imm32Node::compute_padding(int current_offset) const {
 1173   return compute_prefix_padding(current_offset);
 1174 }
 1175 
 1176 int addL_reg_imm34Node::compute_padding(int current_offset) const {
 1177   return compute_prefix_padding(current_offset);
 1178 }
 1179 
 1180 int addP_reg_imm34Node::compute_padding(int current_offset) const {
 1181   return compute_prefix_padding(current_offset);
 1182 }
 1183 
 1184 int cmprb_Whitespace_reg_reg_prefixedNode::compute_padding(int current_offset) const {
 1185   return compute_prefix_padding(current_offset);
 1186 }
 1187 
 1188 
 1189 //=============================================================================
 1190 
 1191 // Emit an interrupt that is caught by the debugger (for debugging compiler).
 1192 void emit_break(CodeBuffer &cbuf) {
 1193   C2_MacroAssembler _masm(&cbuf);
 1194   __ illtrap();
 1195 }
 1196 
 1197 #ifndef PRODUCT
 1198 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1199   st->print("BREAKPOINT");
 1200 }
 1201 #endif
 1202 
 1203 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1204   emit_break(cbuf);
 1205 }
 1206 
 1207 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 1208   return MachNode::size(ra_);
 1209 }
 1210 
 1211 //=============================================================================
 1212 
 1213 void emit_nop(CodeBuffer &cbuf) {
 1214   C2_MacroAssembler _masm(&cbuf);
 1215   __ nop();
 1216 }
 1217 
 1218 static inline void emit_long(CodeBuffer &cbuf, int value) {
 1219   *((int*)(cbuf.insts_end())) = value;
 1220   cbuf.set_insts_end(cbuf.insts_end() + BytesPerInstWord);
 1221 }
 1222 
 1223 //=============================================================================
 1224 
 1225 %} // interrupt source
 1226 
 1227 source_hpp %{ // Header information of the source block.
 1228 
 1229 //--------------------------------------------------------------
 1230 //---<  Used for optimization in Compile::Shorten_branches  >---
 1231 //--------------------------------------------------------------
 1232 
 1233 class C2_MacroAssembler;
 1234 
 1235 class CallStubImpl {
 1236 
 1237  public:
 1238 
 1239   // Emit call stub, compiled java to interpreter.
 1240   static void emit_trampoline_stub(C2_MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset);
 1241 
 1242   // Size of call trampoline stub.
 1243   // This doesn't need to be accurate to the byte, but it
 1244   // must be larger than or equal to the real size of the stub.
 1245   static uint size_call_trampoline() {
 1246     return MacroAssembler::trampoline_stub_size;
 1247   }
 1248 
 1249   // number of relocations needed by a call trampoline stub
 1250   static uint reloc_call_trampoline() {
 1251     return 5;
 1252   }
 1253 
 1254 };
 1255 
 1256 %} // end source_hpp
 1257 
 1258 source %{
 1259 
 1260 // Emit a trampoline stub for a call to a target which is too far away.
 1261 //
 1262 // code sequences:
 1263 //
 1264 // call-site:
 1265 //   branch-and-link to <destination> or <trampoline stub>
 1266 //
 1267 // Related trampoline stub for this call-site in the stub section:
 1268 //   load the call target from the constant pool
 1269 //   branch via CTR (LR/link still points to the call-site above)
 1270 
 1271 void CallStubImpl::emit_trampoline_stub(C2_MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
 1272   address stub = __ emit_trampoline_stub(destination_toc_offset, insts_call_instruction_offset);
 1273   if (stub == NULL) {
 1274     ciEnv::current()->record_out_of_memory_failure();
 1275   }
 1276 }
 1277 
 1278 //=============================================================================
 1279 
 1280 // Emit an inline branch-and-link call and a related trampoline stub.
 1281 //
 1282 // code sequences:
 1283 //
 1284 // call-site:
 1285 //   branch-and-link to <destination> or <trampoline stub>
 1286 //
 1287 // Related trampoline stub for this call-site in the stub section:
 1288 //   load the call target from the constant pool
 1289 //   branch via CTR (LR/link still points to the call-site above)
 1290 //
 1291 
 1292 typedef struct {
 1293   int insts_call_instruction_offset;
 1294   int ret_addr_offset;
 1295 } EmitCallOffsets;
 1296 
 1297 // Emit a branch-and-link instruction that branches to a trampoline.
 1298 // - Remember the offset of the branch-and-link instruction.
 1299 // - Add a relocation at the branch-and-link instruction.
 1300 // - Emit a branch-and-link.
 1301 // - Remember the return pc offset.
 1302 EmitCallOffsets emit_call_with_trampoline_stub(C2_MacroAssembler &_masm, address entry_point, relocInfo::relocType rtype) {
 1303   EmitCallOffsets offsets = { -1, -1 };
 1304   const int start_offset = __ offset();
 1305   offsets.insts_call_instruction_offset = __ offset();
 1306 
 1307   // No entry point given, use the current pc.
 1308   if (entry_point == NULL) entry_point = __ pc();
 1309 
 1310   // Put the entry point as a constant into the constant pool.
 1311   const address entry_point_toc_addr   = __ address_constant(entry_point, RelocationHolder::none);
 1312   if (entry_point_toc_addr == NULL) {
 1313     ciEnv::current()->record_out_of_memory_failure();
 1314     return offsets;
 1315   }
 1316   const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
 1317 
 1318   // Emit the trampoline stub which will be related to the branch-and-link below.
 1319   CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, offsets.insts_call_instruction_offset);
 1320   if (ciEnv::current()->failing()) { return offsets; } // Code cache may be full.
 1321   __ relocate(rtype);
 1322 
 1323   // Note: At this point we do not have the address of the trampoline
 1324   // stub, and the entry point might be too far away for bl, so __ pc()
 1325   // serves as dummy and the bl will be patched later.
 1326   __ bl((address) __ pc());
 1327 
 1328   offsets.ret_addr_offset = __ offset() - start_offset;
 1329 
 1330   return offsets;
 1331 }
 1332 
 1333 //=============================================================================
 1334 
 1335 // Factory for creating loadConL* nodes for large/small constant pool.
 1336 
 1337 static inline jlong replicate_immF(float con) {
 1338   // Replicate float con 2 times and pack into vector.
 1339   int val = *((int*)&con);
 1340   jlong lval = val;
 1341   lval = (lval << 32) | (lval & 0xFFFFFFFFl);
 1342   return lval;
 1343 }
 1344 
 1345 //=============================================================================
 1346 
 1347 const RegMask& MachConstantBaseNode::_out_RegMask = BITS64_CONSTANT_TABLE_BASE_mask();
 1348 int ConstantTable::calculate_table_base_offset() const {
 1349   return 0;  // absolute addressing, no offset
 1350 }
 1351 
 1352 bool MachConstantBaseNode::requires_postalloc_expand() const { return true; }
 1353 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1354   iRegPdstOper *op_dst = new iRegPdstOper();
 1355   MachNode *m1 = new loadToc_hiNode();
 1356   MachNode *m2 = new loadToc_loNode();
 1357 
 1358   m1->add_req(NULL);
 1359   m2->add_req(NULL, m1);
 1360   m1->_opnds[0] = op_dst;
 1361   m2->_opnds[0] = op_dst;
 1362   m2->_opnds[1] = op_dst;
 1363   ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 1364   ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 1365   nodes->push(m1);
 1366   nodes->push(m2);
 1367 }
 1368 
 1369 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 1370   // Is postalloc expanded.
 1371   ShouldNotReachHere();
 1372 }
 1373 
 1374 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1375   return 0;
 1376 }
 1377 
 1378 #ifndef PRODUCT
 1379 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1380   st->print("-- \t// MachConstantBaseNode (empty encoding)");
 1381 }
 1382 #endif
 1383 
 1384 //=============================================================================
 1385 
 1386 #ifndef PRODUCT
 1387 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1388   Compile* C = ra_->C;
 1389   const long framesize = C->output()->frame_slots() << LogBytesPerInt;
 1390 
 1391   st->print("PROLOG\n\t");
 1392   if (C->output()->need_stack_bang(framesize)) {
 1393     st->print("stack_overflow_check\n\t");
 1394   }
 1395 
 1396   if (!false /* TODO: PPC port C->is_frameless_method()*/) {
 1397     st->print("save return pc\n\t");
 1398     st->print("push frame %ld\n\t", -framesize);
 1399   }
 1400 
 1401   if (C->stub_function() == NULL) {
 1402     st->print("nmethod entry barrier\n\t");
 1403   }
 1404 }
 1405 #endif
 1406 
 1407 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1408   Compile* C = ra_->C;
 1409   C2_MacroAssembler _masm(&cbuf);
 1410 
 1411   const long framesize = C->output()->frame_size_in_bytes();
 1412   assert(framesize % (2 * wordSize) == 0, "must preserve 2*wordSize alignment");
 1413 
 1414   const bool method_is_frameless      = false /* TODO: PPC port C->is_frameless_method()*/;
 1415 
 1416   const Register return_pc            = R20; // Must match return_addr() in frame section.
 1417   const Register callers_sp           = R21;
 1418   const Register push_frame_temp      = R22;
 1419   const Register toc_temp             = R23;
 1420   assert_different_registers(R11, return_pc, callers_sp, push_frame_temp, toc_temp);
 1421 
 1422   if (method_is_frameless) {
 1423     // Add nop at beginning of all frameless methods to prevent any
 1424     // oop instructions from getting overwritten by make_not_entrant
 1425     // (patching attempt would fail).
 1426     __ nop();
 1427   } else {
 1428     // Get return pc.
 1429     __ mflr(return_pc);
 1430   }
 1431 
 1432   if (C->clinit_barrier_on_entry()) {
 1433     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1434 
 1435     Label L_skip_barrier;
 1436     Register klass = toc_temp;
 1437 
 1438     // Notify OOP recorder (don't need the relocation)
 1439     AddressLiteral md = __ constant_metadata_address(C->method()->holder()->constant_encoding());
 1440     __ load_const_optimized(klass, md.value(), R0);
 1441     __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/);
 1442 
 1443     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0);
 1444     __ mtctr(klass);
 1445     __ bctr();
 1446 
 1447     __ bind(L_skip_barrier);
 1448   }
 1449 
 1450   // Calls to C2R adapters often do not accept exceptional returns.
 1451   // We require that their callers must bang for them. But be
 1452   // careful, because some VM calls (such as call site linkage) can
 1453   // use several kilobytes of stack. But the stack safety zone should
 1454   // account for that. See bugs 4446381, 4468289, 4497237.
 1455 
 1456   int bangsize = C->output()->bang_size_in_bytes();
 1457   assert(bangsize >= framesize || bangsize <= 0, "stack bang size incorrect");
 1458   if (C->output()->need_stack_bang(bangsize)) {
 1459     // Unfortunately we cannot use the function provided in
 1460     // assembler.cpp as we have to emulate the pipes. So I had to
 1461     // insert the code of generate_stack_overflow_check(), see
 1462     // assembler.cpp for some illuminative comments.
 1463     const int page_size = os::vm_page_size();
 1464     int bang_end = StackOverflow::stack_shadow_zone_size();
 1465 
 1466     // This is how far the previous frame's stack banging extended.
 1467     const int bang_end_safe = bang_end;
 1468 
 1469     if (bangsize > page_size) {
 1470       bang_end += bangsize;
 1471     }
 1472 
 1473     int bang_offset = bang_end_safe;
 1474 
 1475     while (bang_offset <= bang_end) {
 1476       // Need at least one stack bang at end of shadow zone.
 1477 
 1478       // Again I had to copy code, this time from assembler_ppc.cpp,
 1479       // bang_stack_with_offset - see there for comments.
 1480 
 1481       // Stack grows down, caller passes positive offset.
 1482       assert(bang_offset > 0, "must bang with positive offset");
 1483 
 1484       long stdoffset = -bang_offset;
 1485 
 1486       if (Assembler::is_simm(stdoffset, 16)) {
 1487         // Signed 16 bit offset, a simple std is ok.
 1488         if (UseLoadInstructionsForStackBangingPPC64) {
 1489           __ ld(R0,  (int)(signed short)stdoffset, R1_SP);
 1490         } else {
 1491           __ std(R0, (int)(signed short)stdoffset, R1_SP);
 1492         }
 1493       } else if (Assembler::is_simm(stdoffset, 31)) {
 1494         // Use largeoffset calculations for addis & ld/std.
 1495         const int hi = MacroAssembler::largeoffset_si16_si16_hi(stdoffset);
 1496         const int lo = MacroAssembler::largeoffset_si16_si16_lo(stdoffset);
 1497 
 1498         Register tmp = R11;
 1499         __ addis(tmp, R1_SP, hi);
 1500         if (UseLoadInstructionsForStackBangingPPC64) {
 1501           __ ld(R0, lo, tmp);
 1502         } else {
 1503           __ std(R0, lo, tmp);
 1504         }
 1505       } else {
 1506         ShouldNotReachHere();
 1507       }
 1508 
 1509       bang_offset += page_size;
 1510     }
 1511     // R11 trashed
 1512   } // C->output()->need_stack_bang(framesize)
 1513 
 1514   unsigned int bytes = (unsigned int)framesize;
 1515   long offset = Assembler::align_addr(bytes, frame::alignment_in_bytes);
 1516   ciMethod *currMethod = C->method();
 1517 
 1518   if (!method_is_frameless) {
 1519     // Get callers sp.
 1520     __ mr(callers_sp, R1_SP);
 1521 
 1522     // Push method's frame, modifies SP.
 1523     assert(Assembler::is_uimm(framesize, 32U), "wrong type");
 1524     // The ABI is already accounted for in 'framesize' via the
 1525     // 'out_preserve' area.
 1526     Register tmp = push_frame_temp;
 1527     // Had to insert code of push_frame((unsigned int)framesize, push_frame_temp).
 1528     if (Assembler::is_simm(-offset, 16)) {
 1529       __ stdu(R1_SP, -offset, R1_SP);
 1530     } else {
 1531       long x = -offset;
 1532       // Had to insert load_const(tmp, -offset).
 1533       __ lis( tmp, (int)((signed short)(((x >> 32) & 0xffff0000) >> 16)));
 1534       __ ori( tmp, tmp, ((x >> 32) & 0x0000ffff));
 1535       __ sldi(tmp, tmp, 32);
 1536       __ oris(tmp, tmp, (x & 0xffff0000) >> 16);
 1537       __ ori( tmp, tmp, (x & 0x0000ffff));
 1538 
 1539       __ stdux(R1_SP, R1_SP, tmp);
 1540     }
 1541   }
 1542 #if 0 // TODO: PPC port
 1543   // For testing large constant pools, emit a lot of constants to constant pool.
 1544   // "Randomize" const_size.
 1545   if (ConstantsALot) {
 1546     const int num_consts = const_size();
 1547     for (int i = 0; i < num_consts; i++) {
 1548       __ long_constant(0xB0B5B00BBABE);
 1549     }
 1550   }
 1551 #endif
 1552   if (!method_is_frameless) {
 1553     // Save return pc.
 1554     __ std(return_pc, _abi0(lr), callers_sp);
 1555   }
 1556 
 1557   if (C->stub_function() == NULL) {
 1558     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1559     bs->nmethod_entry_barrier(&_masm, push_frame_temp);
 1560   }
 1561 
 1562   C->output()->set_frame_complete(cbuf.insts_size());
 1563 }
 1564 
 1565 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 1566   // Variable size. determine dynamically.
 1567   return MachNode::size(ra_);
 1568 }
 1569 
 1570 int MachPrologNode::reloc() const {
 1571   // Return number of relocatable values contained in this instruction.
 1572   return 1; // 1 reloc entry for load_const(toc).
 1573 }
 1574 
 1575 //=============================================================================
 1576 
 1577 #ifndef PRODUCT
 1578 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1579   Compile* C = ra_->C;
 1580 
 1581   st->print("EPILOG\n\t");
 1582   st->print("restore return pc\n\t");
 1583   st->print("pop frame\n\t");
 1584 
 1585   if (do_polling() && C->is_method_compilation()) {
 1586     st->print("safepoint poll\n\t");
 1587   }
 1588 }
 1589 #endif
 1590 
 1591 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1592   Compile* C = ra_->C;
 1593   C2_MacroAssembler _masm(&cbuf);
 1594 
 1595   const long framesize = ((long)C->output()->frame_slots()) << LogBytesPerInt;
 1596   assert(framesize >= 0, "negative frame-size?");
 1597 
 1598   const bool method_needs_polling = do_polling() && C->is_method_compilation();
 1599   const bool method_is_frameless  = false /* TODO: PPC port C->is_frameless_method()*/;
 1600   const Register return_pc        = R31;  // Must survive C-call to enable_stack_reserved_zone().
 1601   const Register temp             = R12;
 1602 
 1603   if (!method_is_frameless) {
 1604     // Restore return pc relative to callers' sp.
 1605     __ ld(return_pc, ((int)framesize) + _abi0(lr), R1_SP);
 1606     // Move return pc to LR.
 1607     __ mtlr(return_pc);
 1608     // Pop frame (fixed frame-size).
 1609     __ addi(R1_SP, R1_SP, (int)framesize);
 1610   }
 1611 
 1612   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1613     __ reserved_stack_check(return_pc);
 1614   }
 1615 
 1616   if (method_needs_polling) {
 1617     Label dummy_label;
 1618     Label* code_stub = &dummy_label;
 1619     if (!UseSIGTRAP && !C->output()->in_scratch_emit_size()) {
 1620       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1621       C->output()->add_stub(stub);
 1622       code_stub = &stub->entry();
 1623       __ relocate(relocInfo::poll_return_type);
 1624     }
 1625     __ safepoint_poll(*code_stub, temp, true /* at_return */, true /* in_nmethod */);
 1626   }
 1627 }
 1628 
 1629 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 1630   // Variable size. Determine dynamically.
 1631   return MachNode::size(ra_);
 1632 }
 1633 
 1634 int MachEpilogNode::reloc() const {
 1635   // Return number of relocatable values contained in this instruction.
 1636   return 1; // 1 for load_from_polling_page.
 1637 }
 1638 
 1639 const Pipeline * MachEpilogNode::pipeline() const {
 1640   return MachNode::pipeline_class();
 1641 }
 1642 
 1643 // =============================================================================
 1644 
 1645 // Figure out which register class each belongs in: rc_int, rc_float, rc_vs or
 1646 // rc_stack.
 1647 enum RC { rc_bad, rc_int, rc_float, rc_vs, rc_stack };
 1648 
 1649 static enum RC rc_class(OptoReg::Name reg) {
 1650   // Return the register class for the given register. The given register
 1651   // reg is a <register>_num value, which is an index into the MachRegisterNumbers
 1652   // enumeration in adGlobals_ppc.hpp.
 1653 
 1654   if (reg == OptoReg::Bad) return rc_bad;
 1655 
 1656   // We have 64 integer register halves, starting at index 0.
 1657   if (reg < 64) return rc_int;
 1658 
 1659   // We have 64 floating-point register halves, starting at index 64.
 1660   if (reg < 64+64) return rc_float;
 1661 
 1662   // We have 64 vector-scalar registers, starting at index 128.
 1663   if (reg < 64+64+64) return rc_vs;
 1664 
 1665   // Between float regs & stack are the flags regs.
 1666   assert(OptoReg::is_stack(reg) || reg < 64+64+64, "blow up if spilling flags");
 1667 
 1668   return rc_stack;
 1669 }
 1670 
 1671 static int ld_st_helper(CodeBuffer *cbuf, const char *op_str, uint opcode, int reg, int offset,
 1672                         bool do_print, Compile* C, outputStream *st) {
 1673 
 1674   assert(opcode == Assembler::LD_OPCODE   ||
 1675          opcode == Assembler::STD_OPCODE  ||
 1676          opcode == Assembler::LWZ_OPCODE  ||
 1677          opcode == Assembler::STW_OPCODE  ||
 1678          opcode == Assembler::LFD_OPCODE  ||
 1679          opcode == Assembler::STFD_OPCODE ||
 1680          opcode == Assembler::LFS_OPCODE  ||
 1681          opcode == Assembler::STFS_OPCODE,
 1682          "opcode not supported");
 1683 
 1684   if (cbuf) {
 1685     int d =
 1686       (Assembler::LD_OPCODE == opcode || Assembler::STD_OPCODE == opcode) ?
 1687         Assembler::ds(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/)
 1688       : Assembler::d1(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/); // Makes no difference in opt build.
 1689     emit_long(*cbuf, opcode | Assembler::rt(Matcher::_regEncode[reg]) | d | Assembler::ra(R1_SP));
 1690   }
 1691 #ifndef PRODUCT
 1692   else if (do_print) {
 1693     st->print("%-7s %s, [R1_SP + #%d+%d] \t// spill copy",
 1694               op_str,
 1695               Matcher::regName[reg],
 1696               offset, 0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/);
 1697   }
 1698 #endif
 1699   return 4; // size
 1700 }
 1701 
 1702 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
 1703   Compile* C = ra_->C;
 1704 
 1705   // Get registers to move.
 1706   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
 1707   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
 1708   OptoReg::Name dst_hi = ra_->get_reg_second(this);
 1709   OptoReg::Name dst_lo = ra_->get_reg_first(this);
 1710 
 1711   enum RC src_hi_rc = rc_class(src_hi);
 1712   enum RC src_lo_rc = rc_class(src_lo);
 1713   enum RC dst_hi_rc = rc_class(dst_hi);
 1714   enum RC dst_lo_rc = rc_class(dst_lo);
 1715 
 1716   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
 1717   if (src_hi != OptoReg::Bad)
 1718     assert((src_lo&1)==0 && src_lo+1==src_hi &&
 1719            (dst_lo&1)==0 && dst_lo+1==dst_hi,
 1720            "expected aligned-adjacent pairs");
 1721   // Generate spill code!
 1722   int size = 0;
 1723 
 1724   if (src_lo == dst_lo && src_hi == dst_hi)
 1725     return size;            // Self copy, no move.
 1726 
 1727   if (bottom_type()->isa_vect() != NULL && ideal_reg() == Op_VecX) {
 1728     // Memory->Memory Spill.
 1729     if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
 1730       int src_offset = ra_->reg2offset(src_lo);
 1731       int dst_offset = ra_->reg2offset(dst_lo);
 1732       if (cbuf) {
 1733         C2_MacroAssembler _masm(cbuf);
 1734         __ ld(R0, src_offset, R1_SP);
 1735         __ std(R0, dst_offset, R1_SP);
 1736         __ ld(R0, src_offset+8, R1_SP);
 1737         __ std(R0, dst_offset+8, R1_SP);
 1738       }
 1739       size += 16;
 1740     }
 1741     // VectorSRegister->Memory Spill.
 1742     else if (src_lo_rc == rc_vs && dst_lo_rc == rc_stack) {
 1743       VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]);
 1744       int dst_offset = ra_->reg2offset(dst_lo);
 1745       if (cbuf) {
 1746         C2_MacroAssembler _masm(cbuf);
 1747         __ addi(R0, R1_SP, dst_offset);
 1748         __ stxvd2x(Rsrc, R0);
 1749       }
 1750       size += 8;
 1751     }
 1752     // Memory->VectorSRegister Spill.
 1753     else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vs) {
 1754       VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]);
 1755       int src_offset = ra_->reg2offset(src_lo);
 1756       if (cbuf) {
 1757         C2_MacroAssembler _masm(cbuf);
 1758         __ addi(R0, R1_SP, src_offset);
 1759         __ lxvd2x(Rdst, R0);
 1760       }
 1761       size += 8;
 1762     }
 1763     // VectorSRegister->VectorSRegister.
 1764     else if (src_lo_rc == rc_vs && dst_lo_rc == rc_vs) {
 1765       VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]);
 1766       VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]);
 1767       if (cbuf) {
 1768         C2_MacroAssembler _masm(cbuf);
 1769         __ xxlor(Rdst, Rsrc, Rsrc);
 1770       }
 1771       size += 4;
 1772     }
 1773     else {
 1774       ShouldNotReachHere(); // No VSR spill.
 1775     }
 1776     return size;
 1777   }
 1778 
 1779   // --------------------------------------
 1780   // Memory->Memory Spill. Use R0 to hold the value.
 1781   if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
 1782     int src_offset = ra_->reg2offset(src_lo);
 1783     int dst_offset = ra_->reg2offset(dst_lo);
 1784     if (src_hi != OptoReg::Bad) {
 1785       assert(src_hi_rc==rc_stack && dst_hi_rc==rc_stack,
 1786              "expected same type of move for high parts");
 1787       size += ld_st_helper(cbuf, "LD  ", Assembler::LD_OPCODE,  R0_num, src_offset, !do_size, C, st);
 1788       if (!cbuf && !do_size) st->print("\n\t");
 1789       size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, R0_num, dst_offset, !do_size, C, st);
 1790     } else {
 1791       size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, R0_num, src_offset, !do_size, C, st);
 1792       if (!cbuf && !do_size) st->print("\n\t");
 1793       size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, R0_num, dst_offset, !do_size, C, st);
 1794     }
 1795     return size;
 1796   }
 1797 
 1798   // --------------------------------------
 1799   // Check for float->int copy; requires a trip through memory.
 1800   if (src_lo_rc == rc_float && dst_lo_rc == rc_int) {
 1801     Unimplemented();
 1802   }
 1803 
 1804   // --------------------------------------
 1805   // Check for integer reg-reg copy.
 1806   if (src_lo_rc == rc_int && dst_lo_rc == rc_int) {
 1807       Register Rsrc = as_Register(Matcher::_regEncode[src_lo]);
 1808       Register Rdst = as_Register(Matcher::_regEncode[dst_lo]);
 1809       size = (Rsrc != Rdst) ? 4 : 0;
 1810 
 1811       if (cbuf) {
 1812         C2_MacroAssembler _masm(cbuf);
 1813         if (size) {
 1814           __ mr(Rdst, Rsrc);
 1815         }
 1816       }
 1817 #ifndef PRODUCT
 1818       else if (!do_size) {
 1819         if (size) {
 1820           st->print("%-7s %s, %s \t// spill copy", "MR", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 1821         } else {
 1822           st->print("%-7s %s, %s \t// spill copy", "MR-NOP", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 1823         }
 1824       }
 1825 #endif
 1826       return size;
 1827   }
 1828 
 1829   // Check for integer store.
 1830   if (src_lo_rc == rc_int && dst_lo_rc == rc_stack) {
 1831     int dst_offset = ra_->reg2offset(dst_lo);
 1832     if (src_hi != OptoReg::Bad) {
 1833       assert(src_hi_rc==rc_int && dst_hi_rc==rc_stack,
 1834              "expected same type of move for high parts");
 1835       size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1836     } else {
 1837       size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1838     }
 1839     return size;
 1840   }
 1841 
 1842   // Check for integer load.
 1843   if (dst_lo_rc == rc_int && src_lo_rc == rc_stack) {
 1844     int src_offset = ra_->reg2offset(src_lo);
 1845     if (src_hi != OptoReg::Bad) {
 1846       assert(dst_hi_rc==rc_int && src_hi_rc==rc_stack,
 1847              "expected same type of move for high parts");
 1848       size += ld_st_helper(cbuf, "LD  ", Assembler::LD_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1849     } else {
 1850       size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1851     }
 1852     return size;
 1853   }
 1854 
 1855   // Check for float reg-reg copy.
 1856   if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
 1857     if (cbuf) {
 1858       C2_MacroAssembler _masm(cbuf);
 1859       FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
 1860       FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
 1861       __ fmr(Rdst, Rsrc);
 1862     }
 1863 #ifndef PRODUCT
 1864     else if (!do_size) {
 1865       st->print("%-7s %s, %s \t// spill copy", "FMR", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 1866     }
 1867 #endif
 1868     return 4;
 1869   }
 1870 
 1871   // Check for float store.
 1872   if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
 1873     int dst_offset = ra_->reg2offset(dst_lo);
 1874     if (src_hi != OptoReg::Bad) {
 1875       assert(src_hi_rc==rc_float && dst_hi_rc==rc_stack,
 1876              "expected same type of move for high parts");
 1877       size += ld_st_helper(cbuf, "STFD", Assembler::STFD_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1878     } else {
 1879       size += ld_st_helper(cbuf, "STFS", Assembler::STFS_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1880     }
 1881     return size;
 1882   }
 1883 
 1884   // Check for float load.
 1885   if (dst_lo_rc == rc_float && src_lo_rc == rc_stack) {
 1886     int src_offset = ra_->reg2offset(src_lo);
 1887     if (src_hi != OptoReg::Bad) {
 1888       assert(dst_hi_rc==rc_float && src_hi_rc==rc_stack,
 1889              "expected same type of move for high parts");
 1890       size += ld_st_helper(cbuf, "LFD ", Assembler::LFD_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1891     } else {
 1892       size += ld_st_helper(cbuf, "LFS ", Assembler::LFS_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1893     }
 1894     return size;
 1895   }
 1896 
 1897   // --------------------------------------------------------------------
 1898   // Check for hi bits still needing moving. Only happens for misaligned
 1899   // arguments to native calls.
 1900   if (src_hi == dst_hi)
 1901     return size;               // Self copy; no move.
 1902 
 1903   assert(src_hi_rc != rc_bad && dst_hi_rc != rc_bad, "src_hi & dst_hi cannot be Bad");
 1904   ShouldNotReachHere(); // Unimplemented
 1905   return 0;
 1906 }
 1907 
 1908 #ifndef PRODUCT
 1909 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1910   if (!ra_)
 1911     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
 1912   else
 1913     implementation(NULL, ra_, false, st);
 1914 }
 1915 #endif
 1916 
 1917 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1918   implementation(&cbuf, ra_, false, NULL);
 1919 }
 1920 
 1921 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1922   return implementation(NULL, ra_, true, NULL);
 1923 }
 1924 
 1925 #ifndef PRODUCT
 1926 void MachNopNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1927   st->print("NOP \t// %d nops to pad for loops or prefixed instructions.", _count);
 1928 }
 1929 #endif
 1930 
 1931 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const {
 1932   C2_MacroAssembler _masm(&cbuf);
 1933   // _count contains the number of nops needed for padding.
 1934   for (int i = 0; i < _count; i++) {
 1935     __ nop();
 1936   }
 1937 }
 1938 
 1939 uint MachNopNode::size(PhaseRegAlloc *ra_) const {
 1940   return _count * 4;
 1941 }
 1942 
 1943 #ifndef PRODUCT
 1944 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1945   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1946   char reg_str[128];
 1947   ra_->dump_register(this, reg_str, sizeof(reg_str));
 1948   st->print("ADDI    %s, SP, %d \t// box node", reg_str, offset);
 1949 }
 1950 #endif
 1951 
 1952 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1953   C2_MacroAssembler _masm(&cbuf);
 1954 
 1955   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1956   int reg    = ra_->get_encode(this);
 1957 
 1958   if (Assembler::is_simm(offset, 16)) {
 1959     __ addi(as_Register(reg), R1, offset);
 1960   } else {
 1961     ShouldNotReachHere();
 1962   }
 1963 }
 1964 
 1965 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1966   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 1967   return 4;
 1968 }
 1969 
 1970 #ifndef PRODUCT
 1971 void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1972   st->print_cr("---- MachUEPNode ----");
 1973   st->print_cr("...");
 1974 }
 1975 #endif
 1976 
 1977 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1978   // This is the unverified entry point.
 1979   C2_MacroAssembler _masm(&cbuf);
 1980 
 1981   // Inline_cache contains a klass.
 1982   Register ic_klass       = as_Register(Matcher::inline_cache_reg_encode());
 1983   Register receiver_klass = R12_scratch2;  // tmp
 1984 
 1985   assert_different_registers(ic_klass, receiver_klass, R11_scratch1, R3_ARG1);
 1986   assert(R11_scratch1 == R11, "need prologue scratch register");
 1987 
 1988   // Check for NULL argument if we don't have implicit null checks.
 1989   if (!ImplicitNullChecks || !os::zero_page_read_protected()) {
 1990     if (TrapBasedNullChecks) {
 1991       __ trap_null_check(R3_ARG1);
 1992     } else {
 1993       Label valid;
 1994       __ cmpdi(CCR0, R3_ARG1, 0);
 1995       __ bne_predict_taken(CCR0, valid);
 1996       // We have a null argument, branch to ic_miss_stub.
 1997       __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
 1998                            relocInfo::runtime_call_type);
 1999       __ bind(valid);
 2000     }
 2001   }
 2002   // Assume argument is not NULL, load klass from receiver.
 2003   __ load_klass(receiver_klass, R3_ARG1);
 2004 
 2005   if (TrapBasedICMissChecks) {
 2006     __ trap_ic_miss_check(receiver_klass, ic_klass);
 2007   } else {
 2008     Label valid;
 2009     __ cmpd(CCR0, receiver_klass, ic_klass);
 2010     __ beq_predict_taken(CCR0, valid);
 2011     // We have an unexpected klass, branch to ic_miss_stub.
 2012     __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
 2013                          relocInfo::runtime_call_type);
 2014     __ bind(valid);
 2015   }
 2016 
 2017   // Argument is valid and klass is as expected, continue.
 2018 }
 2019 
 2020 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 2021   // Variable size. Determine dynamically.
 2022   return MachNode::size(ra_);
 2023 }
 2024 
 2025 //=============================================================================
 2026 
 2027 %} // interrupt source
 2028 
 2029 source_hpp %{ // Header information of the source block.
 2030 
 2031 class HandlerImpl {
 2032 
 2033  public:
 2034 
 2035   static int emit_exception_handler(CodeBuffer &cbuf);
 2036   static int emit_deopt_handler(CodeBuffer& cbuf);
 2037 
 2038   static uint size_exception_handler() {
 2039     // The exception_handler is a b64_patchable.
 2040     return MacroAssembler::b64_patchable_size;
 2041   }
 2042 
 2043   static uint size_deopt_handler() {
 2044     // The deopt_handler is a bl64_patchable.
 2045     return MacroAssembler::bl64_patchable_size;
 2046   }
 2047 
 2048 };
 2049 
 2050 class Node::PD {
 2051 public:
 2052   enum NodeFlags {
 2053     _last_flag = Node::_last_flag
 2054   };
 2055 };
 2056 
 2057 %} // end source_hpp
 2058 
 2059 source %{
 2060 
 2061 int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
 2062   C2_MacroAssembler _masm(&cbuf);
 2063 
 2064   address base = __ start_a_stub(size_exception_handler());
 2065   if (base == nullptr) {
 2066     ciEnv::current()->record_failure("CodeCache is full");
 2067     return 0;  // CodeBuffer::expand failed
 2068   }
 2069 
 2070   int offset = __ offset();
 2071   __ b64_patchable((address)OptoRuntime::exception_blob()->content_begin(),
 2072                        relocInfo::runtime_call_type);
 2073   assert(__ offset() - offset == (int)size_exception_handler(), "must be fixed size");
 2074   __ end_a_stub();
 2075 
 2076   return offset;
 2077 }
 2078 
 2079 // The deopt_handler is like the exception handler, but it calls to
 2080 // the deoptimization blob instead of jumping to the exception blob.
 2081 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
 2082   C2_MacroAssembler _masm(&cbuf);
 2083 
 2084   address base = __ start_a_stub(size_deopt_handler());
 2085   if (base == nullptr) {
 2086     ciEnv::current()->record_failure("CodeCache is full");
 2087     return 0;  // CodeBuffer::expand failed
 2088   }
 2089 
 2090   int offset = __ offset();
 2091   __ bl64_patchable((address)SharedRuntime::deopt_blob()->unpack(),
 2092                         relocInfo::runtime_call_type);
 2093   assert(__ offset() - offset == (int) size_deopt_handler(), "must be fixed size");
 2094   __ end_a_stub();
 2095 
 2096   return offset;
 2097 }
 2098 
 2099 //=============================================================================
 2100 
 2101 // Use a frame slots bias for frameless methods if accessing the stack.
 2102 static int frame_slots_bias(int reg_enc, PhaseRegAlloc* ra_) {
 2103   if (as_Register(reg_enc) == R1_SP) {
 2104     return 0; // TODO: PPC port ra_->C->frame_slots_sp_bias_in_bytes();
 2105   }
 2106   return 0;
 2107 }
 2108 
 2109 const bool Matcher::match_rule_supported(int opcode) {
 2110   if (!has_match_rule(opcode)) {
 2111     return false; // no match rule present
 2112   }
 2113 
 2114   switch (opcode) {
 2115     case Op_SqrtD:
 2116       return VM_Version::has_fsqrt();
 2117     case Op_RoundDoubleMode:
 2118       return VM_Version::has_vsx();
 2119     case Op_CountLeadingZerosI:
 2120     case Op_CountLeadingZerosL:
 2121       return UseCountLeadingZerosInstructionsPPC64;
 2122     case Op_CountTrailingZerosI:
 2123     case Op_CountTrailingZerosL:
 2124       return (UseCountLeadingZerosInstructionsPPC64 || UseCountTrailingZerosInstructionsPPC64);
 2125     case Op_PopCountI:
 2126     case Op_PopCountL:
 2127       return (UsePopCountInstruction && VM_Version::has_popcntw());
 2128 
 2129     case Op_AddVB:
 2130     case Op_AddVS:
 2131     case Op_AddVI:
 2132     case Op_AddVF:
 2133     case Op_AddVD:
 2134     case Op_SubVB:
 2135     case Op_SubVS:
 2136     case Op_SubVI:
 2137     case Op_SubVF:
 2138     case Op_SubVD:
 2139     case Op_MulVS:
 2140     case Op_MulVF:
 2141     case Op_MulVD:
 2142     case Op_DivVF:
 2143     case Op_DivVD:
 2144     case Op_AbsVF:
 2145     case Op_AbsVD:
 2146     case Op_NegVF:
 2147     case Op_NegVD:
 2148     case Op_SqrtVF:
 2149     case Op_SqrtVD:
 2150     case Op_AddVL:
 2151     case Op_SubVL:
 2152     case Op_MulVI:
 2153     case Op_RoundDoubleModeV:
 2154       return SuperwordUseVSX;
 2155     case Op_PopCountVI:
 2156       return (SuperwordUseVSX && UsePopCountInstruction);
 2157     case Op_FmaVF:
 2158     case Op_FmaVD:
 2159       return (SuperwordUseVSX && UseFMA);
 2160 
 2161     case Op_Digit:
 2162       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isDigit);
 2163     case Op_LowerCase:
 2164       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isLowerCase);
 2165     case Op_UpperCase:
 2166       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isUpperCase);
 2167     case Op_Whitespace:
 2168       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isWhitespace);
 2169 
 2170     case Op_CacheWB:
 2171     case Op_CacheWBPreSync:
 2172     case Op_CacheWBPostSync:
 2173       return VM_Version::supports_data_cache_line_flush();
 2174   }
 2175 
 2176   return true; // Per default match rules are supported.
 2177 }
 2178 
 2179 const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) {
 2180   return match_rule_supported_vector(opcode, vlen, bt);
 2181 }
 2182 
 2183 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 2184   if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
 2185     return false;
 2186   }
 2187   return true; // Per default match rules are supported.
 2188 }
 2189 
 2190 const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 2191   return false;
 2192 }
 2193 
 2194 const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 2195   return false;
 2196 }
 2197 
 2198 const RegMask* Matcher::predicate_reg_mask(void) {
 2199   return NULL;
 2200 }
 2201 
 2202 const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
 2203   return NULL;
 2204 }
 2205 
 2206 // Vector calling convention not yet implemented.
 2207 const bool Matcher::supports_vector_calling_convention(void) {
 2208   return false;
 2209 }
 2210 
 2211 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2212   Unimplemented();
 2213   return OptoRegPair(0, 0);
 2214 }
 2215 
 2216 // Vector width in bytes.
 2217 const int Matcher::vector_width_in_bytes(BasicType bt) {
 2218   if (SuperwordUseVSX) {
 2219     assert(MaxVectorSize == 16, "");
 2220     return 16;
 2221   } else {
 2222     assert(MaxVectorSize == 8, "");
 2223     return 8;
 2224   }
 2225 }
 2226 
 2227 // Vector ideal reg.
 2228 const uint Matcher::vector_ideal_reg(int size) {
 2229   if (SuperwordUseVSX) {
 2230     assert(MaxVectorSize == 16 && size == 16, "");
 2231     return Op_VecX;
 2232   } else {
 2233     assert(MaxVectorSize == 8 && size == 8, "");
 2234     return Op_RegL;
 2235   }
 2236 }
 2237 
 2238 // Limits on vector size (number of elements) loaded into vector.
 2239 const int Matcher::max_vector_size(const BasicType bt) {
 2240   assert(is_java_primitive(bt), "only primitive type vectors");
 2241   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 2242 }
 2243 
 2244 const int Matcher::min_vector_size(const BasicType bt) {
 2245   return max_vector_size(bt); // Same as max.
 2246 }
 2247 
 2248 const int Matcher::superword_max_vector_size(const BasicType bt) {
 2249   return Matcher::max_vector_size(bt);
 2250 }
 2251 
 2252 const int Matcher::scalable_vector_reg_size(const BasicType bt) {
 2253   return -1;
 2254 }
 2255 
 2256 // RETURNS: whether this branch offset is short enough that a short
 2257 // branch can be used.
 2258 //
 2259 // If the platform does not provide any short branch variants, then
 2260 // this method should return `false' for offset 0.
 2261 //
 2262 // `Compile::Fill_buffer' will decide on basis of this information
 2263 // whether to do the pass `Compile::Shorten_branches' at all.
 2264 //
 2265 // And `Compile::Shorten_branches' will decide on basis of this
 2266 // information whether to replace particular branch sites by short
 2267 // ones.
 2268 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2269   // Is the offset within the range of a ppc64 pc relative branch?
 2270   bool b;
 2271 
 2272   const int safety_zone = 3 * BytesPerInstWord;
 2273   b = Assembler::is_simm((offset<0 ? offset-safety_zone : offset+safety_zone),
 2274                          29 - 16 + 1 + 2);
 2275   return b;
 2276 }
 2277 
 2278 /* TODO: PPC port
 2279 // Make a new machine dependent decode node (with its operands).
 2280 MachTypeNode *Matcher::make_decode_node() {
 2281   assert(CompressedOops::base() == NULL && CompressedOops::shift() == 0,
 2282          "This method is only implemented for unscaled cOops mode so far");
 2283   MachTypeNode *decode = new decodeN_unscaledNode();
 2284   decode->set_opnd_array(0, new iRegPdstOper());
 2285   decode->set_opnd_array(1, new iRegNsrcOper());
 2286   return decode;
 2287 }
 2288 */
 2289 
 2290 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
 2291   ShouldNotReachHere(); // generic vector operands not supported
 2292   return NULL;
 2293 }
 2294 
 2295 bool Matcher::is_reg2reg_move(MachNode* m) {
 2296   ShouldNotReachHere();  // generic vector operands not supported
 2297   return false;
 2298 }
 2299 
 2300 bool Matcher::is_generic_vector(MachOper* opnd)  {
 2301   ShouldNotReachHere();  // generic vector operands not supported
 2302   return false;
 2303 }
 2304 
 2305 // Constants for c2c and c calling conventions.
 2306 
 2307 const MachRegisterNumbers iarg_reg[8] = {
 2308   R3_num, R4_num, R5_num, R6_num,
 2309   R7_num, R8_num, R9_num, R10_num
 2310 };
 2311 
 2312 const MachRegisterNumbers farg_reg[13] = {
 2313   F1_num, F2_num, F3_num, F4_num,
 2314   F5_num, F6_num, F7_num, F8_num,
 2315   F9_num, F10_num, F11_num, F12_num,
 2316   F13_num
 2317 };
 2318 
 2319 const MachRegisterNumbers vsarg_reg[64] = {
 2320   VSR0_num, VSR1_num, VSR2_num, VSR3_num,
 2321   VSR4_num, VSR5_num, VSR6_num, VSR7_num,
 2322   VSR8_num, VSR9_num, VSR10_num, VSR11_num,
 2323   VSR12_num, VSR13_num, VSR14_num, VSR15_num,
 2324   VSR16_num, VSR17_num, VSR18_num, VSR19_num,
 2325   VSR20_num, VSR21_num, VSR22_num, VSR23_num,
 2326   VSR24_num, VSR23_num, VSR24_num, VSR25_num,
 2327   VSR28_num, VSR29_num, VSR30_num, VSR31_num,
 2328   VSR32_num, VSR33_num, VSR34_num, VSR35_num,
 2329   VSR36_num, VSR37_num, VSR38_num, VSR39_num,
 2330   VSR40_num, VSR41_num, VSR42_num, VSR43_num,
 2331   VSR44_num, VSR45_num, VSR46_num, VSR47_num,
 2332   VSR48_num, VSR49_num, VSR50_num, VSR51_num,
 2333   VSR52_num, VSR53_num, VSR54_num, VSR55_num,
 2334   VSR56_num, VSR57_num, VSR58_num, VSR59_num,
 2335   VSR60_num, VSR61_num, VSR62_num, VSR63_num
 2336 };
 2337 
 2338 const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);
 2339 
 2340 const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);
 2341 
 2342 const int num_vsarg_registers = sizeof(vsarg_reg) / sizeof(vsarg_reg[0]);
 2343 
 2344 // Return whether or not this register is ever used as an argument. This
 2345 // function is used on startup to build the trampoline stubs in generateOptoStub.
 2346 // Registers not mentioned will be killed by the VM call in the trampoline, and
 2347 // arguments in those registers not be available to the callee.
 2348 bool Matcher::can_be_java_arg(int reg) {
 2349   // We return true for all registers contained in iarg_reg[] and
 2350   // farg_reg[] and their virtual halves.
 2351   // We must include the virtual halves in order to get STDs and LDs
 2352   // instead of STWs and LWs in the trampoline stubs.
 2353 
 2354   if (   reg == R3_num  || reg == R3_H_num
 2355       || reg == R4_num  || reg == R4_H_num
 2356       || reg == R5_num  || reg == R5_H_num
 2357       || reg == R6_num  || reg == R6_H_num
 2358       || reg == R7_num  || reg == R7_H_num
 2359       || reg == R8_num  || reg == R8_H_num
 2360       || reg == R9_num  || reg == R9_H_num
 2361       || reg == R10_num || reg == R10_H_num)
 2362     return true;
 2363 
 2364   if (   reg == F1_num  || reg == F1_H_num
 2365       || reg == F2_num  || reg == F2_H_num
 2366       || reg == F3_num  || reg == F3_H_num
 2367       || reg == F4_num  || reg == F4_H_num
 2368       || reg == F5_num  || reg == F5_H_num
 2369       || reg == F6_num  || reg == F6_H_num
 2370       || reg == F7_num  || reg == F7_H_num
 2371       || reg == F8_num  || reg == F8_H_num
 2372       || reg == F9_num  || reg == F9_H_num
 2373       || reg == F10_num || reg == F10_H_num
 2374       || reg == F11_num || reg == F11_H_num
 2375       || reg == F12_num || reg == F12_H_num
 2376       || reg == F13_num || reg == F13_H_num)
 2377     return true;
 2378 
 2379   return false;
 2380 }
 2381 
 2382 bool Matcher::is_spillable_arg(int reg) {
 2383   return can_be_java_arg(reg);
 2384 }
 2385 
 2386 uint Matcher::int_pressure_limit()
 2387 {
 2388   return (INTPRESSURE == -1) ? 26 : INTPRESSURE;
 2389 }
 2390 
 2391 uint Matcher::float_pressure_limit()
 2392 {
 2393   return (FLOATPRESSURE == -1) ? 28 : FLOATPRESSURE;
 2394 }
 2395 
 2396 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
 2397   return false;
 2398 }
 2399 
 2400 // Register for DIVI projection of divmodI.
 2401 RegMask Matcher::divI_proj_mask() {
 2402   ShouldNotReachHere();
 2403   return RegMask();
 2404 }
 2405 
 2406 // Register for MODI projection of divmodI.
 2407 RegMask Matcher::modI_proj_mask() {
 2408   ShouldNotReachHere();
 2409   return RegMask();
 2410 }
 2411 
 2412 // Register for DIVL projection of divmodL.
 2413 RegMask Matcher::divL_proj_mask() {
 2414   ShouldNotReachHere();
 2415   return RegMask();
 2416 }
 2417 
 2418 // Register for MODL projection of divmodL.
 2419 RegMask Matcher::modL_proj_mask() {
 2420   ShouldNotReachHere();
 2421   return RegMask();
 2422 }
 2423 
 2424 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 2425   return RegMask();
 2426 }
 2427 
 2428 %}
 2429 
 2430 //----------ENCODING BLOCK-----------------------------------------------------
 2431 // This block specifies the encoding classes used by the compiler to output
 2432 // byte streams. Encoding classes are parameterized macros used by
 2433 // Machine Instruction Nodes in order to generate the bit encoding of the
 2434 // instruction. Operands specify their base encoding interface with the
 2435 // interface keyword. There are currently supported four interfaces,
 2436 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
 2437 // operand to generate a function which returns its register number when
 2438 // queried. CONST_INTER causes an operand to generate a function which
 2439 // returns the value of the constant when queried. MEMORY_INTER causes an
 2440 // operand to generate four functions which return the Base Register, the
 2441 // Index Register, the Scale Value, and the Offset Value of the operand when
 2442 // queried. COND_INTER causes an operand to generate six functions which
 2443 // return the encoding code (ie - encoding bits for the instruction)
 2444 // associated with each basic boolean condition for a conditional instruction.
 2445 //
 2446 // Instructions specify two basic values for encoding. Again, a function
 2447 // is available to check if the constant displacement is an oop. They use the
 2448 // ins_encode keyword to specify their encoding classes (which must be
 2449 // a sequence of enc_class names, and their parameters, specified in
 2450 // the encoding block), and they use the
 2451 // opcode keyword to specify, in order, their primary, secondary, and
 2452 // tertiary opcode. Only the opcode sections which a particular instruction
 2453 // needs for encoding need to be specified.
 2454 encode %{
 2455   enc_class enc_unimplemented %{
 2456     C2_MacroAssembler _masm(&cbuf);
 2457     __ unimplemented("Unimplemented mach node encoding in AD file.", 13);
 2458   %}
 2459 
 2460   enc_class enc_untested %{
 2461 #ifdef ASSERT
 2462     C2_MacroAssembler _masm(&cbuf);
 2463     __ untested("Untested mach node encoding in AD file.");
 2464 #else
 2465 #endif
 2466   %}
 2467 
 2468   enc_class enc_lbz(iRegIdst dst, memory mem) %{
 2469     C2_MacroAssembler _masm(&cbuf);
 2470     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2471     __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
 2472   %}
 2473 
 2474   // Load acquire.
 2475   enc_class enc_lbz_ac(iRegIdst dst, memory mem) %{
 2476     C2_MacroAssembler _masm(&cbuf);
 2477     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2478     __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
 2479     __ twi_0($dst$$Register);
 2480     __ isync();
 2481   %}
 2482 
 2483   enc_class enc_lhz(iRegIdst dst, memory mem) %{
 2484 
 2485     C2_MacroAssembler _masm(&cbuf);
 2486     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2487     __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
 2488   %}
 2489 
 2490   // Load acquire.
 2491   enc_class enc_lhz_ac(iRegIdst dst, memory mem) %{
 2492 
 2493     C2_MacroAssembler _masm(&cbuf);
 2494     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2495     __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
 2496     __ twi_0($dst$$Register);
 2497     __ isync();
 2498   %}
 2499 
 2500   enc_class enc_lwz(iRegIdst dst, memory mem) %{
 2501 
 2502     C2_MacroAssembler _masm(&cbuf);
 2503     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2504     __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
 2505   %}
 2506 
 2507   // Load acquire.
 2508   enc_class enc_lwz_ac(iRegIdst dst, memory mem) %{
 2509 
 2510     C2_MacroAssembler _masm(&cbuf);
 2511     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2512     __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
 2513     __ twi_0($dst$$Register);
 2514     __ isync();
 2515   %}
 2516 
 2517   enc_class enc_ld(iRegLdst dst, memoryAlg4 mem) %{
 2518     C2_MacroAssembler _masm(&cbuf);
 2519     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2520     // Operand 'ds' requires 4-alignment.
 2521     assert((Idisp & 0x3) == 0, "unaligned offset");
 2522     __ ld($dst$$Register, Idisp, $mem$$base$$Register);
 2523   %}
 2524 
 2525   // Load acquire.
 2526   enc_class enc_ld_ac(iRegLdst dst, memoryAlg4 mem) %{
 2527     C2_MacroAssembler _masm(&cbuf);
 2528     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2529     // Operand 'ds' requires 4-alignment.
 2530     assert((Idisp & 0x3) == 0, "unaligned offset");
 2531     __ ld($dst$$Register, Idisp, $mem$$base$$Register);
 2532     __ twi_0($dst$$Register);
 2533     __ isync();
 2534   %}
 2535 
 2536   enc_class enc_lfd(RegF dst, memory mem) %{
 2537     C2_MacroAssembler _masm(&cbuf);
 2538     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2539     __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 2540   %}
 2541 
 2542   enc_class enc_load_long_constL(iRegLdst dst, immL src, iRegLdst toc) %{
 2543 
 2544     C2_MacroAssembler _masm(&cbuf);
 2545     int toc_offset = 0;
 2546 
 2547     address const_toc_addr;
 2548     // Create a non-oop constant, no relocation needed.
 2549     // If it is an IC, it has a virtual_call_Relocation.
 2550     const_toc_addr = __ long_constant((jlong)$src$$constant);
 2551     if (const_toc_addr == NULL) {
 2552       ciEnv::current()->record_out_of_memory_failure();
 2553       return;
 2554     }
 2555 
 2556     // Get the constant's TOC offset.
 2557     toc_offset = __ offset_to_method_toc(const_toc_addr);
 2558 
 2559     // Keep the current instruction offset in mind.
 2560     ((loadConLNode*)this)->_cbuf_insts_offset = __ offset();
 2561 
 2562     __ ld($dst$$Register, toc_offset, $toc$$Register);
 2563   %}
 2564 
 2565   enc_class enc_load_long_constL_hi(iRegLdst dst, iRegLdst toc, immL src) %{
 2566 
 2567     C2_MacroAssembler _masm(&cbuf);
 2568 
 2569     if (!ra_->C->output()->in_scratch_emit_size()) {
 2570       address const_toc_addr;
 2571       // Create a non-oop constant, no relocation needed.
 2572       // If it is an IC, it has a virtual_call_Relocation.
 2573       const_toc_addr = __ long_constant((jlong)$src$$constant);
 2574       if (const_toc_addr == NULL) {
 2575         ciEnv::current()->record_out_of_memory_failure();
 2576         return;
 2577       }
 2578 
 2579       // Get the constant's TOC offset.
 2580       const int toc_offset = __ offset_to_method_toc(const_toc_addr);
 2581       // Store the toc offset of the constant.
 2582       ((loadConL_hiNode*)this)->_const_toc_offset = toc_offset;
 2583 
 2584       // Also keep the current instruction offset in mind.
 2585       ((loadConL_hiNode*)this)->_cbuf_insts_offset = __ offset();
 2586     }
 2587 
 2588     __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset));
 2589   %}
 2590 
 2591 %} // encode
 2592 
 2593 source %{
 2594 
 2595 typedef struct {
 2596   loadConL_hiNode *_large_hi;
 2597   loadConL_loNode *_large_lo;
 2598   loadConLNode    *_small;
 2599   MachNode        *_last;
 2600 } loadConLNodesTuple;
 2601 
 2602 loadConLNodesTuple loadConLNodesTuple_create(PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
 2603                                              OptoReg::Name reg_second, OptoReg::Name reg_first) {
 2604   loadConLNodesTuple nodes;
 2605 
 2606   const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2607   if (large_constant_pool) {
 2608     // Create new nodes.
 2609     loadConL_hiNode *m1 = new loadConL_hiNode();
 2610     loadConL_loNode *m2 = new loadConL_loNode();
 2611 
 2612     // inputs for new nodes
 2613     m1->add_req(NULL, toc);
 2614     m2->add_req(NULL, m1);
 2615 
 2616     // operands for new nodes
 2617     m1->_opnds[0] = new iRegLdstOper(); // dst
 2618     m1->_opnds[1] = immSrc;             // src
 2619     m1->_opnds[2] = new iRegPdstOper(); // toc
 2620     m2->_opnds[0] = new iRegLdstOper(); // dst
 2621     m2->_opnds[1] = immSrc;             // src
 2622     m2->_opnds[2] = new iRegLdstOper(); // base
 2623 
 2624     // Initialize ins_attrib TOC fields.
 2625     m1->_const_toc_offset = -1;
 2626     m2->_const_toc_offset_hi_node = m1;
 2627 
 2628     // Initialize ins_attrib instruction offset.
 2629     m1->_cbuf_insts_offset = -1;
 2630 
 2631     // register allocation for new nodes
 2632     ra_->set_pair(m1->_idx, reg_second, reg_first);
 2633     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2634 
 2635     // Create result.
 2636     nodes._large_hi = m1;
 2637     nodes._large_lo = m2;
 2638     nodes._small = NULL;
 2639     nodes._last = nodes._large_lo;
 2640     assert(m2->bottom_type()->isa_long(), "must be long");
 2641   } else {
 2642     loadConLNode *m2 = new loadConLNode();
 2643 
 2644     // inputs for new nodes
 2645     m2->add_req(NULL, toc);
 2646 
 2647     // operands for new nodes
 2648     m2->_opnds[0] = new iRegLdstOper(); // dst
 2649     m2->_opnds[1] = immSrc;             // src
 2650     m2->_opnds[2] = new iRegPdstOper(); // toc
 2651 
 2652     // Initialize ins_attrib instruction offset.
 2653     m2->_cbuf_insts_offset = -1;
 2654 
 2655     // register allocation for new nodes
 2656     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2657 
 2658     // Create result.
 2659     nodes._large_hi = NULL;
 2660     nodes._large_lo = NULL;
 2661     nodes._small = m2;
 2662     nodes._last = nodes._small;
 2663     assert(m2->bottom_type()->isa_long(), "must be long");
 2664   }
 2665 
 2666   return nodes;
 2667 }
 2668 
 2669 typedef struct {
 2670   loadConL_hiNode *_large_hi;
 2671   loadConL_loNode *_large_lo;
 2672   mtvsrdNode      *_moved;
 2673   xxspltdNode     *_replicated;
 2674   loadConLNode    *_small;
 2675   MachNode        *_last;
 2676 } loadConLReplicatedNodesTuple;
 2677 
 2678 loadConLReplicatedNodesTuple loadConLReplicatedNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
 2679                                                  vecXOper *dst, immI_0Oper *zero,
 2680                                                  OptoReg::Name reg_second, OptoReg::Name reg_first,
 2681                                                  OptoReg::Name reg_vec_second, OptoReg::Name reg_vec_first) {
 2682   loadConLReplicatedNodesTuple nodes;
 2683 
 2684   const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2685   if (large_constant_pool) {
 2686     // Create new nodes.
 2687     loadConL_hiNode *m1 = new  loadConL_hiNode();
 2688     loadConL_loNode *m2 = new  loadConL_loNode();
 2689     mtvsrdNode *m3 = new  mtvsrdNode();
 2690     xxspltdNode *m4 = new  xxspltdNode();
 2691 
 2692     // inputs for new nodes
 2693     m1->add_req(NULL, toc);
 2694     m2->add_req(NULL, m1);
 2695     m3->add_req(NULL, m2);
 2696     m4->add_req(NULL, m3);
 2697 
 2698     // operands for new nodes
 2699     m1->_opnds[0] = new  iRegLdstOper(); // dst
 2700     m1->_opnds[1] = immSrc;              // src
 2701     m1->_opnds[2] = new  iRegPdstOper(); // toc
 2702 
 2703     m2->_opnds[0] = new  iRegLdstOper(); // dst
 2704     m2->_opnds[1] = immSrc;              // src
 2705     m2->_opnds[2] = new  iRegLdstOper(); // base
 2706 
 2707     m3->_opnds[0] = new  vecXOper();     // dst
 2708     m3->_opnds[1] = new  iRegLdstOper(); // src
 2709 
 2710     m4->_opnds[0] = new  vecXOper();     // dst
 2711     m4->_opnds[1] = new  vecXOper();     // src
 2712     m4->_opnds[2] = zero;
 2713 
 2714     // Initialize ins_attrib TOC fields.
 2715     m1->_const_toc_offset = -1;
 2716     m2->_const_toc_offset_hi_node = m1;
 2717 
 2718     // Initialize ins_attrib instruction offset.
 2719     m1->_cbuf_insts_offset = -1;
 2720 
 2721     // register allocation for new nodes
 2722     ra_->set_pair(m1->_idx, reg_second, reg_first);
 2723     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2724     ra_->set1(m3->_idx, reg_second);
 2725     ra_->set2(m3->_idx, reg_vec_first);
 2726     ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
 2727 
 2728     // Create result.
 2729     nodes._large_hi = m1;
 2730     nodes._large_lo = m2;
 2731     nodes._moved = m3;
 2732     nodes._replicated = m4;
 2733     nodes._small = NULL;
 2734     nodes._last = nodes._replicated;
 2735     assert(m2->bottom_type()->isa_long(), "must be long");
 2736   } else {
 2737     loadConLNode *m2 = new  loadConLNode();
 2738     mtvsrdNode *m3 = new  mtvsrdNode();
 2739     xxspltdNode *m4 = new  xxspltdNode();
 2740 
 2741     // inputs for new nodes
 2742     m2->add_req(NULL, toc);
 2743 
 2744     // operands for new nodes
 2745     m2->_opnds[0] = new  iRegLdstOper(); // dst
 2746     m2->_opnds[1] = immSrc;              // src
 2747     m2->_opnds[2] = new  iRegPdstOper(); // toc
 2748 
 2749     m3->_opnds[0] = new  vecXOper();     // dst
 2750     m3->_opnds[1] = new  iRegLdstOper(); // src
 2751 
 2752     m4->_opnds[0] = new  vecXOper();     // dst
 2753     m4->_opnds[1] = new  vecXOper();     // src
 2754     m4->_opnds[2] = zero;
 2755 
 2756     // Initialize ins_attrib instruction offset.
 2757     m2->_cbuf_insts_offset = -1;
 2758     ra_->set1(m3->_idx, reg_second);
 2759     ra_->set2(m3->_idx, reg_vec_first);
 2760     ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
 2761 
 2762     // register allocation for new nodes
 2763     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2764 
 2765     // Create result.
 2766     nodes._large_hi = NULL;
 2767     nodes._large_lo = NULL;
 2768     nodes._small = m2;
 2769     nodes._moved = m3;
 2770     nodes._replicated = m4;
 2771     nodes._last = nodes._replicated;
 2772     assert(m2->bottom_type()->isa_long(), "must be long");
 2773   }
 2774 
 2775   return nodes;
 2776 }
 2777 
 2778 %} // source
 2779 
 2780 encode %{
 2781   // Postalloc expand emitter for loading a long constant from the method's TOC.
 2782   // Enc_class needed as consttanttablebase is not supported by postalloc
 2783   // expand.
 2784   enc_class postalloc_expand_load_long_constant(iRegLdst dst, immL src, iRegLdst toc) %{
 2785     // Create new nodes.
 2786     loadConLNodesTuple loadConLNodes =
 2787       loadConLNodesTuple_create(ra_, n_toc, op_src,
 2788                                 ra_->get_reg_second(this), ra_->get_reg_first(this));
 2789 
 2790     // Push new nodes.
 2791     if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi);
 2792     if (loadConLNodes._last)     nodes->push(loadConLNodes._last);
 2793 
 2794     // some asserts
 2795     assert(nodes->length() >= 1, "must have created at least 1 node");
 2796     assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
 2797   %}
 2798 
 2799   enc_class enc_load_long_constP(iRegLdst dst, immP src, iRegLdst toc) %{
 2800 
 2801     C2_MacroAssembler _masm(&cbuf);
 2802     int toc_offset = 0;
 2803 
 2804     intptr_t val = $src$$constant;
 2805     relocInfo::relocType constant_reloc = $src->constant_reloc();  // src
 2806     address const_toc_addr;
 2807     RelocationHolder r; // Initializes type to none.
 2808     if (constant_reloc == relocInfo::oop_type) {
 2809       // Create an oop constant and a corresponding relocation.
 2810       AddressLiteral a = __ constant_oop_address((jobject)val);
 2811       const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2812       r = a.rspec();
 2813     } else if (constant_reloc == relocInfo::metadata_type) {
 2814       // Notify OOP recorder (don't need the relocation)
 2815       AddressLiteral a = __ constant_metadata_address((Metadata *)val);
 2816       const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2817     } else {
 2818       // Create a non-oop constant, no relocation needed.
 2819       const_toc_addr = __ long_constant((jlong)$src$$constant);
 2820     }
 2821 
 2822     if (const_toc_addr == NULL) {
 2823       ciEnv::current()->record_out_of_memory_failure();
 2824       return;
 2825     }
 2826     __ relocate(r); // If set above.
 2827     // Get the constant's TOC offset.
 2828     toc_offset = __ offset_to_method_toc(const_toc_addr);
 2829 
 2830     __ ld($dst$$Register, toc_offset, $toc$$Register);
 2831   %}
 2832 
 2833   enc_class enc_load_long_constP_hi(iRegLdst dst, immP src, iRegLdst toc) %{
 2834 
 2835     C2_MacroAssembler _masm(&cbuf);
 2836     if (!ra_->C->output()->in_scratch_emit_size()) {
 2837       intptr_t val = $src$$constant;
 2838       relocInfo::relocType constant_reloc = $src->constant_reloc();  // src
 2839       address const_toc_addr;
 2840       RelocationHolder r; // Initializes type to none.
 2841       if (constant_reloc == relocInfo::oop_type) {
 2842         // Create an oop constant and a corresponding relocation.
 2843         AddressLiteral a = __ constant_oop_address((jobject)val);
 2844         const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2845         r = a.rspec();
 2846       } else if (constant_reloc == relocInfo::metadata_type) {
 2847         // Notify OOP recorder (don't need the relocation)
 2848         AddressLiteral a = __ constant_metadata_address((Metadata *)val);
 2849         const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2850       } else {  // non-oop pointers, e.g. card mark base, heap top
 2851         // Create a non-oop constant, no relocation needed.
 2852         const_toc_addr = __ long_constant((jlong)$src$$constant);
 2853       }
 2854 
 2855       if (const_toc_addr == NULL) {
 2856         ciEnv::current()->record_out_of_memory_failure();
 2857         return;
 2858       }
 2859       __ relocate(r); // If set above.
 2860       // Get the constant's TOC offset.
 2861       const int toc_offset = __ offset_to_method_toc(const_toc_addr);
 2862       // Store the toc offset of the constant.
 2863       ((loadConP_hiNode*)this)->_const_toc_offset = toc_offset;
 2864     }
 2865 
 2866     __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset));
 2867   %}
 2868 
 2869   // Postalloc expand emitter for loading a ptr constant from the method's TOC.
 2870   // Enc_class needed as consttanttablebase is not supported by postalloc
 2871   // expand.
 2872   enc_class postalloc_expand_load_ptr_constant(iRegPdst dst, immP src, iRegLdst toc) %{
 2873     const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2874     if (large_constant_pool) {
 2875       // Create new nodes.
 2876       loadConP_hiNode *m1 = new loadConP_hiNode();
 2877       loadConP_loNode *m2 = new loadConP_loNode();
 2878 
 2879       // inputs for new nodes
 2880       m1->add_req(NULL, n_toc);
 2881       m2->add_req(NULL, m1);
 2882 
 2883       // operands for new nodes
 2884       m1->_opnds[0] = new iRegPdstOper(); // dst
 2885       m1->_opnds[1] = op_src;             // src
 2886       m1->_opnds[2] = new iRegPdstOper(); // toc
 2887       m2->_opnds[0] = new iRegPdstOper(); // dst
 2888       m2->_opnds[1] = op_src;             // src
 2889       m2->_opnds[2] = new iRegLdstOper(); // base
 2890 
 2891       // Initialize ins_attrib TOC fields.
 2892       m1->_const_toc_offset = -1;
 2893       m2->_const_toc_offset_hi_node = m1;
 2894 
 2895       // Register allocation for new nodes.
 2896       ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2897       ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2898 
 2899       nodes->push(m1);
 2900       nodes->push(m2);
 2901       assert(m2->bottom_type()->isa_ptr(), "must be ptr");
 2902     } else {
 2903       loadConPNode *m2 = new loadConPNode();
 2904 
 2905       // inputs for new nodes
 2906       m2->add_req(NULL, n_toc);
 2907 
 2908       // operands for new nodes
 2909       m2->_opnds[0] = new iRegPdstOper(); // dst
 2910       m2->_opnds[1] = op_src;             // src
 2911       m2->_opnds[2] = new iRegPdstOper(); // toc
 2912 
 2913       // Register allocation for new nodes.
 2914       ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2915 
 2916       nodes->push(m2);
 2917       assert(m2->bottom_type()->isa_ptr(), "must be ptr");
 2918     }
 2919   %}
 2920 
 2921   // Enc_class needed as consttanttablebase is not supported by postalloc
 2922   // expand.
 2923   enc_class postalloc_expand_load_float_constant(regF dst, immF src, iRegLdst toc) %{
 2924     bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2925 
 2926     MachNode *m2;
 2927     if (large_constant_pool) {
 2928       m2 = new loadConFCompNode();
 2929     } else {
 2930       m2 = new loadConFNode();
 2931     }
 2932     // inputs for new nodes
 2933     m2->add_req(NULL, n_toc);
 2934 
 2935     // operands for new nodes
 2936     m2->_opnds[0] = op_dst;
 2937     m2->_opnds[1] = op_src;
 2938     m2->_opnds[2] = new iRegPdstOper(); // constanttablebase
 2939 
 2940     // register allocation for new nodes
 2941     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2942     nodes->push(m2);
 2943   %}
 2944 
 2945   // Enc_class needed as consttanttablebase is not supported by postalloc
 2946   // expand.
 2947   enc_class postalloc_expand_load_double_constant(regD dst, immD src, iRegLdst toc) %{
 2948     bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2949 
 2950     MachNode *m2;
 2951     if (large_constant_pool) {
 2952       m2 = new loadConDCompNode();
 2953     } else {
 2954       m2 = new loadConDNode();
 2955     }
 2956     // inputs for new nodes
 2957     m2->add_req(NULL, n_toc);
 2958 
 2959     // operands for new nodes
 2960     m2->_opnds[0] = op_dst;
 2961     m2->_opnds[1] = op_src;
 2962     m2->_opnds[2] = new iRegPdstOper(); // constanttablebase
 2963 
 2964     // register allocation for new nodes
 2965     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2966     nodes->push(m2);
 2967   %}
 2968 
 2969   enc_class enc_stw(iRegIsrc src, memory mem) %{
 2970     C2_MacroAssembler _masm(&cbuf);
 2971     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2972     __ stw($src$$Register, Idisp, $mem$$base$$Register);
 2973   %}
 2974 
 2975   enc_class enc_std(iRegIsrc src, memoryAlg4 mem) %{
 2976     C2_MacroAssembler _masm(&cbuf);
 2977     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2978     // Operand 'ds' requires 4-alignment.
 2979     assert((Idisp & 0x3) == 0, "unaligned offset");
 2980     __ std($src$$Register, Idisp, $mem$$base$$Register);
 2981   %}
 2982 
 2983   enc_class enc_stfs(RegF src, memory mem) %{
 2984     C2_MacroAssembler _masm(&cbuf);
 2985     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2986     __ stfs($src$$FloatRegister, Idisp, $mem$$base$$Register);
 2987   %}
 2988 
 2989   enc_class enc_stfd(RegF src, memory mem) %{
 2990     C2_MacroAssembler _masm(&cbuf);
 2991     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2992     __ stfd($src$$FloatRegister, Idisp, $mem$$base$$Register);
 2993   %}
 2994 
 2995   enc_class postalloc_expand_encode_oop(iRegNdst dst, iRegPdst src, flagsReg crx) %{
 2996 
 2997     if (VM_Version::has_isel()) {
 2998       // use isel instruction with Power 7
 2999       cmpP_reg_imm16Node *n_compare  = new cmpP_reg_imm16Node();
 3000       encodeP_subNode    *n_sub_base = new encodeP_subNode();
 3001       encodeP_shiftNode  *n_shift    = new encodeP_shiftNode();
 3002       cond_set_0_oopNode *n_cond_set = new cond_set_0_oopNode();
 3003 
 3004       n_compare->add_req(n_region, n_src);
 3005       n_compare->_opnds[0] = op_crx;
 3006       n_compare->_opnds[1] = op_src;
 3007       n_compare->_opnds[2] = new immL16Oper(0);
 3008 
 3009       n_sub_base->add_req(n_region, n_src);
 3010       n_sub_base->_opnds[0] = op_dst;
 3011       n_sub_base->_opnds[1] = op_src;
 3012       n_sub_base->_bottom_type = _bottom_type;
 3013 
 3014       n_shift->add_req(n_region, n_sub_base);
 3015       n_shift->_opnds[0] = op_dst;
 3016       n_shift->_opnds[1] = op_dst;
 3017       n_shift->_bottom_type = _bottom_type;
 3018 
 3019       n_cond_set->add_req(n_region, n_compare, n_shift);
 3020       n_cond_set->_opnds[0] = op_dst;
 3021       n_cond_set->_opnds[1] = op_crx;
 3022       n_cond_set->_opnds[2] = op_dst;
 3023       n_cond_set->_bottom_type = _bottom_type;
 3024 
 3025       ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 3026       ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3027       ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3028       ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3029 
 3030       nodes->push(n_compare);
 3031       nodes->push(n_sub_base);
 3032       nodes->push(n_shift);
 3033       nodes->push(n_cond_set);
 3034 
 3035     } else {
 3036       // before Power 7
 3037       moveRegNode        *n_move     = new moveRegNode();
 3038       cmpP_reg_imm16Node *n_compare  = new cmpP_reg_imm16Node();
 3039       encodeP_shiftNode  *n_shift    = new encodeP_shiftNode();
 3040       cond_sub_baseNode  *n_sub_base = new cond_sub_baseNode();
 3041 
 3042       n_move->add_req(n_region, n_src);
 3043       n_move->_opnds[0] = op_dst;
 3044       n_move->_opnds[1] = op_src;
 3045       ra_->set_oop(n_move, true); // Until here, 'n_move' still produces an oop.
 3046 
 3047       n_compare->add_req(n_region, n_src);
 3048       n_compare->add_prec(n_move);
 3049 
 3050       n_compare->_opnds[0] = op_crx;
 3051       n_compare->_opnds[1] = op_src;
 3052       n_compare->_opnds[2] = new immL16Oper(0);
 3053 
 3054       n_sub_base->add_req(n_region, n_compare, n_src);
 3055       n_sub_base->_opnds[0] = op_dst;
 3056       n_sub_base->_opnds[1] = op_crx;
 3057       n_sub_base->_opnds[2] = op_src;
 3058       n_sub_base->_bottom_type = _bottom_type;
 3059 
 3060       n_shift->add_req(n_region, n_sub_base);
 3061       n_shift->_opnds[0] = op_dst;
 3062       n_shift->_opnds[1] = op_dst;
 3063       n_shift->_bottom_type = _bottom_type;
 3064 
 3065       ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3066       ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 3067       ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3068       ra_->set_pair(n_move->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3069 
 3070       nodes->push(n_move);
 3071       nodes->push(n_compare);
 3072       nodes->push(n_sub_base);
 3073       nodes->push(n_shift);
 3074     }
 3075 
 3076     assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed.
 3077   %}
 3078 
 3079   enc_class postalloc_expand_encode_oop_not_null(iRegNdst dst, iRegPdst src) %{
 3080 
 3081     encodeP_subNode *n1 = new encodeP_subNode();
 3082     n1->add_req(n_region, n_src);
 3083     n1->_opnds[0] = op_dst;
 3084     n1->_opnds[1] = op_src;
 3085     n1->_bottom_type = _bottom_type;
 3086 
 3087     encodeP_shiftNode *n2 = new encodeP_shiftNode();
 3088     n2->add_req(n_region, n1);
 3089     n2->_opnds[0] = op_dst;
 3090     n2->_opnds[1] = op_dst;
 3091     n2->_bottom_type = _bottom_type;
 3092     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3093     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3094 
 3095     nodes->push(n1);
 3096     nodes->push(n2);
 3097     assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed.
 3098   %}
 3099 
 3100   enc_class postalloc_expand_decode_oop(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
 3101     decodeN_shiftNode *n_shift    = new decodeN_shiftNode();
 3102     cmpN_reg_imm0Node *n_compare  = new cmpN_reg_imm0Node();
 3103 
 3104     n_compare->add_req(n_region, n_src);
 3105     n_compare->_opnds[0] = op_crx;
 3106     n_compare->_opnds[1] = op_src;
 3107     n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR);
 3108 
 3109     n_shift->add_req(n_region, n_src);
 3110     n_shift->_opnds[0] = op_dst;
 3111     n_shift->_opnds[1] = op_src;
 3112     n_shift->_bottom_type = _bottom_type;
 3113 
 3114     if (VM_Version::has_isel()) {
 3115       // use isel instruction with Power 7
 3116 
 3117       decodeN_addNode *n_add_base = new decodeN_addNode();
 3118       n_add_base->add_req(n_region, n_shift);
 3119       n_add_base->_opnds[0] = op_dst;
 3120       n_add_base->_opnds[1] = op_dst;
 3121       n_add_base->_bottom_type = _bottom_type;
 3122 
 3123       cond_set_0_ptrNode *n_cond_set = new cond_set_0_ptrNode();
 3124       n_cond_set->add_req(n_region, n_compare, n_add_base);
 3125       n_cond_set->_opnds[0] = op_dst;
 3126       n_cond_set->_opnds[1] = op_crx;
 3127       n_cond_set->_opnds[2] = op_dst;
 3128       n_cond_set->_bottom_type = _bottom_type;
 3129 
 3130       assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 3131       ra_->set_oop(n_cond_set, true);
 3132 
 3133       ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3134       ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 3135       ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3136       ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3137 
 3138       nodes->push(n_compare);
 3139       nodes->push(n_shift);
 3140       nodes->push(n_add_base);
 3141       nodes->push(n_cond_set);
 3142 
 3143     } else {
 3144       // before Power 7
 3145       cond_add_baseNode *n_add_base = new cond_add_baseNode();
 3146 
 3147       n_add_base->add_req(n_region, n_compare, n_shift);
 3148       n_add_base->_opnds[0] = op_dst;
 3149       n_add_base->_opnds[1] = op_crx;
 3150       n_add_base->_opnds[2] = op_dst;
 3151       n_add_base->_bottom_type = _bottom_type;
 3152 
 3153       assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 3154       ra_->set_oop(n_add_base, true);
 3155 
 3156       ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3157       ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 3158       ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3159 
 3160       nodes->push(n_compare);
 3161       nodes->push(n_shift);
 3162       nodes->push(n_add_base);
 3163     }
 3164   %}
 3165 
 3166   enc_class postalloc_expand_decode_oop_not_null(iRegPdst dst, iRegNsrc src) %{
 3167     decodeN_shiftNode *n1 = new decodeN_shiftNode();
 3168     n1->add_req(n_region, n_src);
 3169     n1->_opnds[0] = op_dst;
 3170     n1->_opnds[1] = op_src;
 3171     n1->_bottom_type = _bottom_type;
 3172 
 3173     decodeN_addNode *n2 = new decodeN_addNode();
 3174     n2->add_req(n_region, n1);
 3175     n2->_opnds[0] = op_dst;
 3176     n2->_opnds[1] = op_dst;
 3177     n2->_bottom_type = _bottom_type;
 3178     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3179     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3180 
 3181     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 3182     ra_->set_oop(n2, true);
 3183 
 3184     nodes->push(n1);
 3185     nodes->push(n2);
 3186   %}
 3187 
 3188   enc_class enc_cmove_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src, cmpOp cmp) %{
 3189 
 3190     C2_MacroAssembler _masm(&cbuf);
 3191     int cc        = $cmp$$cmpcode;
 3192     int flags_reg = $crx$$reg;
 3193     Label done;
 3194     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 3195     // Branch if not (cmp crx).
 3196     __ bc(cc_to_inverse_boint(cc), cc_to_biint(cc, flags_reg), done);
 3197     __ mr($dst$$Register, $src$$Register);
 3198     __ bind(done);
 3199   %}
 3200 
 3201   enc_class enc_cmove_imm(iRegIdst dst, flagsRegSrc crx, immI16 src, cmpOp cmp) %{
 3202 
 3203     C2_MacroAssembler _masm(&cbuf);
 3204     Label done;
 3205     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 3206     // Branch if not (cmp crx).
 3207     __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
 3208     __ li($dst$$Register, $src$$constant);
 3209     __ bind(done);
 3210   %}
 3211 
 3212   // This enc_class is needed so that scheduler gets proper
 3213   // input mapping for latency computation.
 3214   enc_class enc_andc(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 3215     C2_MacroAssembler _masm(&cbuf);
 3216     __ andc($dst$$Register, $src1$$Register, $src2$$Register);
 3217   %}
 3218 
 3219   enc_class enc_convI2B_regI__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
 3220 
 3221     C2_MacroAssembler _masm(&cbuf);
 3222 
 3223     Label done;
 3224     __ cmpwi($crx$$CondRegister, $src$$Register, 0);
 3225     __ li($dst$$Register, $zero$$constant);
 3226     __ beq($crx$$CondRegister, done);
 3227     __ li($dst$$Register, $notzero$$constant);
 3228     __ bind(done);
 3229   %}
 3230 
 3231   enc_class enc_convP2B_regP__cmove(iRegIdst dst, iRegPsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
 3232 
 3233     C2_MacroAssembler _masm(&cbuf);
 3234 
 3235     Label done;
 3236     __ cmpdi($crx$$CondRegister, $src$$Register, 0);
 3237     __ li($dst$$Register, $zero$$constant);
 3238     __ beq($crx$$CondRegister, done);
 3239     __ li($dst$$Register, $notzero$$constant);
 3240     __ bind(done);
 3241   %}
 3242 
 3243   enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL mem ) %{
 3244 
 3245     C2_MacroAssembler _masm(&cbuf);
 3246     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 3247     Label done;
 3248     __ bso($crx$$CondRegister, done);
 3249     __ ld($dst$$Register, Idisp, $mem$$base$$Register);
 3250     __ bind(done);
 3251   %}
 3252 
 3253   enc_class enc_cmove_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
 3254 
 3255     C2_MacroAssembler _masm(&cbuf);
 3256     Label done;
 3257     __ bso($crx$$CondRegister, done);
 3258     __ mffprd($dst$$Register, $src$$FloatRegister);
 3259     __ bind(done);
 3260   %}
 3261 
 3262   enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
 3263 
 3264     C2_MacroAssembler _masm(&cbuf);
 3265     Label d;   // dummy
 3266     __ bind(d);
 3267     Label* p = ($lbl$$label);
 3268     // `p' is `NULL' when this encoding class is used only to
 3269     // determine the size of the encoded instruction.
 3270     Label& l = (NULL == p)? d : *(p);
 3271     int cc = $cmp$$cmpcode;
 3272     int flags_reg = $crx$$reg;
 3273     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 3274     int bhint = Assembler::bhintNoHint;
 3275 
 3276     if (UseStaticBranchPredictionForUncommonPathsPPC64) {
 3277       if (_prob <= PROB_NEVER) {
 3278         bhint = Assembler::bhintIsNotTaken;
 3279       } else if (_prob >= PROB_ALWAYS) {
 3280         bhint = Assembler::bhintIsTaken;
 3281       }
 3282     }
 3283 
 3284     __ bc(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
 3285           cc_to_biint(cc, flags_reg),
 3286           l);
 3287   %}
 3288 
 3289   enc_class enc_bc_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
 3290     // The scheduler doesn't know about branch shortening, so we set the opcode
 3291     // to ppc64Opcode_bc in order to hide this detail from the scheduler.
 3292 
 3293     C2_MacroAssembler _masm(&cbuf);
 3294     Label d;    // dummy
 3295     __ bind(d);
 3296     Label* p = ($lbl$$label);
 3297     // `p' is `NULL' when this encoding class is used only to
 3298     // determine the size of the encoded instruction.
 3299     Label& l = (NULL == p)? d : *(p);
 3300     int cc = $cmp$$cmpcode;
 3301     int flags_reg = $crx$$reg;
 3302     int bhint = Assembler::bhintNoHint;
 3303 
 3304     if (UseStaticBranchPredictionForUncommonPathsPPC64) {
 3305       if (_prob <= PROB_NEVER) {
 3306         bhint = Assembler::bhintIsNotTaken;
 3307       } else if (_prob >= PROB_ALWAYS) {
 3308         bhint = Assembler::bhintIsTaken;
 3309       }
 3310     }
 3311 
 3312     // Tell the conditional far branch to optimize itself when being relocated.
 3313     __ bc_far(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
 3314                   cc_to_biint(cc, flags_reg),
 3315                   l,
 3316                   MacroAssembler::bc_far_optimize_on_relocate);
 3317   %}
 3318 
 3319   // Postalloc expand emitter for loading a replicatef float constant from
 3320   // the method's TOC.
 3321   // Enc_class needed as consttanttablebase is not supported by postalloc
 3322   // expand.
 3323   enc_class postalloc_expand_load_replF_constant(iRegLdst dst, immF src, iRegLdst toc) %{
 3324     // Create new nodes.
 3325 
 3326     // Make an operand with the bit pattern to load as float.
 3327     immLOper *op_repl = new immLOper((jlong)replicate_immF(op_src->constantF()));
 3328 
 3329     loadConLNodesTuple loadConLNodes =
 3330       loadConLNodesTuple_create(ra_, n_toc, op_repl,
 3331                                 ra_->get_reg_second(this), ra_->get_reg_first(this));
 3332 
 3333     // Push new nodes.
 3334     if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi);
 3335     if (loadConLNodes._last)     nodes->push(loadConLNodes._last);
 3336 
 3337     assert(nodes->length() >= 1, "must have created at least 1 node");
 3338     assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
 3339   %}
 3340 
 3341   enc_class postalloc_expand_load_replF_constant_vsx(vecX dst, immF src, iRegLdst toc, iRegLdst tmp) %{
 3342     // Create new nodes.
 3343 
 3344     // Make an operand with the bit pattern to load as float.
 3345     immLOper *op_repl = new  immLOper((jlong)replicate_immF(op_src->constantF()));
 3346     immI_0Oper *op_zero = new  immI_0Oper(0);
 3347 
 3348     loadConLReplicatedNodesTuple loadConLNodes =
 3349       loadConLReplicatedNodesTuple_create(C, ra_, n_toc, op_repl, op_dst, op_zero,
 3350                                 ra_->get_reg_second(n_tmp), ra_->get_reg_first(n_tmp),
 3351                                 ra_->get_reg_second(this), ra_->get_reg_first(this));
 3352 
 3353     // Push new nodes.
 3354     if (loadConLNodes._large_hi) { nodes->push(loadConLNodes._large_hi); }
 3355     if (loadConLNodes._large_lo) { nodes->push(loadConLNodes._large_lo); }
 3356     if (loadConLNodes._moved)    { nodes->push(loadConLNodes._moved); }
 3357     if (loadConLNodes._last)     { nodes->push(loadConLNodes._last); }
 3358 
 3359     assert(nodes->length() >= 1, "must have created at least 1 node");
 3360   %}
 3361 
 3362   // This enc_class is needed so that scheduler gets proper
 3363   // input mapping for latency computation.
 3364   enc_class enc_poll(immI dst, iRegLdst poll) %{
 3365     // Fake operand dst needed for PPC scheduler.
 3366     assert($dst$$constant == 0x0, "dst must be 0x0");
 3367 
 3368     C2_MacroAssembler _masm(&cbuf);
 3369     // Mark the code position where the load from the safepoint
 3370     // polling page was emitted as relocInfo::poll_type.
 3371     __ relocate(relocInfo::poll_type);
 3372     __ load_from_polling_page($poll$$Register);
 3373   %}
 3374 
 3375   // A Java static call or a runtime call.
 3376   //
 3377   // Branch-and-link relative to a trampoline.
 3378   // The trampoline loads the target address and does a long branch to there.
 3379   // In case we call java, the trampoline branches to a interpreter_stub
 3380   // which loads the inline cache and the real call target from the constant pool.
 3381   //
 3382   // This basically looks like this:
 3383   //
 3384   // >>>> consts      -+  -+
 3385   //                   |   |- offset1
 3386   // [call target1]    | <-+
 3387   // [IC cache]        |- offset2
 3388   // [call target2] <--+
 3389   //
 3390   // <<<< consts
 3391   // >>>> insts
 3392   //
 3393   // bl offset16               -+  -+             ??? // How many bits available?
 3394   //                            |   |
 3395   // <<<< insts                 |   |
 3396   // >>>> stubs                 |   |
 3397   //                            |   |- trampoline_stub_Reloc
 3398   // trampoline stub:           | <-+
 3399   //   r2 = toc                 |
 3400   //   r2 = [r2 + offset1]      |       // Load call target1 from const section
 3401   //   mtctr r2                 |
 3402   //   bctr                     |- static_stub_Reloc
 3403   // comp_to_interp_stub:   <---+
 3404   //   r1 = toc
 3405   //   ICreg = [r1 + IC_offset]         // Load IC from const section
 3406   //   r1    = [r1 + offset2]           // Load call target2 from const section
 3407   //   mtctr r1
 3408   //   bctr
 3409   //
 3410   // <<<< stubs
 3411   //
 3412   // The call instruction in the code either
 3413   // - Branches directly to a compiled method if the offset is encodable in instruction.
 3414   // - Branches to the trampoline stub if the offset to the compiled method is not encodable.
 3415   // - Branches to the compiled_to_interp stub if the target is interpreted.
 3416   //
 3417   // Further there are three relocations from the loads to the constants in
 3418   // the constant section.
 3419   //
 3420   // Usage of r1 and r2 in the stubs allows to distinguish them.
 3421   enc_class enc_java_static_call(method meth) %{
 3422 
 3423     C2_MacroAssembler _masm(&cbuf);
 3424     address entry_point = (address)$meth$$method;
 3425 
 3426     if (!_method) {
 3427       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
 3428       emit_call_with_trampoline_stub(_masm, entry_point, relocInfo::runtime_call_type);
 3429       if (ciEnv::current()->failing()) { return; } // Code cache may be full.
 3430     } else {
 3431       // Remember the offset not the address.
 3432       const int start_offset = __ offset();
 3433 
 3434       // The trampoline stub.
 3435       // No entry point given, use the current pc.
 3436       // Make sure branch fits into
 3437       if (entry_point == 0) entry_point = __ pc();
 3438 
 3439       // Put the entry point as a constant into the constant pool.
 3440       const address entry_point_toc_addr = __ address_constant(entry_point, RelocationHolder::none);
 3441       if (entry_point_toc_addr == NULL) {
 3442         ciEnv::current()->record_out_of_memory_failure();
 3443         return;
 3444       }
 3445       const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
 3446 
 3447       // Emit the trampoline stub which will be related to the branch-and-link below.
 3448       CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset);
 3449       if (ciEnv::current()->failing()) { return; } // Code cache may be full.
 3450       int method_index = resolved_method_index(cbuf);
 3451       __ relocate(_optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 3452                   : static_call_Relocation::spec(method_index));
 3453 
 3454       // The real call.
 3455       // Note: At this point we do not have the address of the trampoline
 3456       // stub, and the entry point might be too far away for bl, so __ pc()
 3457       // serves as dummy and the bl will be patched later.
 3458       cbuf.set_insts_mark();
 3459       __ bl(__ pc());  // Emits a relocation.
 3460 
 3461       // The stub for call to interpreter.
 3462       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 3463       if (stub == NULL) {
 3464         ciEnv::current()->record_failure("CodeCache is full");
 3465         return;
 3466       }
 3467     }
 3468     __ post_call_nop();
 3469   %}
 3470 
 3471   // Second node of expanded dynamic call - the call.
 3472   enc_class enc_java_dynamic_call_sched(method meth) %{
 3473 
 3474     C2_MacroAssembler _masm(&cbuf);
 3475 
 3476     if (!ra_->C->output()->in_scratch_emit_size()) {
 3477       // Create a call trampoline stub for the given method.
 3478       const address entry_point = !($meth$$method) ? 0 : (address)$meth$$method;
 3479       const address entry_point_const = __ address_constant(entry_point, RelocationHolder::none);
 3480       if (entry_point_const == NULL) {
 3481         ciEnv::current()->record_out_of_memory_failure();
 3482         return;
 3483       }
 3484       const int entry_point_const_toc_offset = __ offset_to_method_toc(entry_point_const);
 3485       CallStubImpl::emit_trampoline_stub(_masm, entry_point_const_toc_offset, __ offset());
 3486       if (ra_->C->env()->failing()) { return; } // Code cache may be full.
 3487 
 3488       // Build relocation at call site with ic position as data.
 3489       assert((_load_ic_hi_node != NULL && _load_ic_node == NULL) ||
 3490              (_load_ic_hi_node == NULL && _load_ic_node != NULL),
 3491              "must have one, but can't have both");
 3492       assert((_load_ic_hi_node != NULL && _load_ic_hi_node->_cbuf_insts_offset != -1) ||
 3493              (_load_ic_node != NULL    && _load_ic_node->_cbuf_insts_offset != -1),
 3494              "must contain instruction offset");
 3495       const int virtual_call_oop_addr_offset = _load_ic_hi_node != NULL
 3496         ? _load_ic_hi_node->_cbuf_insts_offset
 3497         : _load_ic_node->_cbuf_insts_offset;
 3498       const address virtual_call_oop_addr = __ addr_at(virtual_call_oop_addr_offset);
 3499       assert(MacroAssembler::is_load_const_from_method_toc_at(virtual_call_oop_addr),
 3500              "should be load from TOC");
 3501       int method_index = resolved_method_index(cbuf);
 3502       __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr, method_index));
 3503     }
 3504 
 3505     // At this point I do not have the address of the trampoline stub,
 3506     // and the entry point might be too far away for bl. Pc() serves
 3507     // as dummy and bl will be patched later.
 3508     __ bl((address) __ pc());
 3509     __ post_call_nop();
 3510   %}
 3511 
 3512   // postalloc expand emitter for virtual calls.
 3513   enc_class postalloc_expand_java_dynamic_call_sched(method meth, iRegLdst toc) %{
 3514 
 3515     // Create the nodes for loading the IC from the TOC.
 3516     loadConLNodesTuple loadConLNodes_IC =
 3517       loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong)Universe::non_oop_word()),
 3518                                 OptoReg::Name(R19_H_num), OptoReg::Name(R19_num));
 3519 
 3520     // Create the call node.
 3521     CallDynamicJavaDirectSchedNode *call = new CallDynamicJavaDirectSchedNode();
 3522     call->_method_handle_invoke = _method_handle_invoke;
 3523     call->_vtable_index      = _vtable_index;
 3524     call->_method            = _method;
 3525     call->_optimized_virtual = _optimized_virtual;
 3526     call->_tf                = _tf;
 3527     call->_entry_point       = _entry_point;
 3528     call->_cnt               = _cnt;
 3529     call->_guaranteed_safepoint = true;
 3530     call->_oop_map           = _oop_map;
 3531     call->_jvms              = _jvms;
 3532     call->_jvmadj            = _jvmadj;
 3533     call->_in_rms            = _in_rms;
 3534     call->_nesting           = _nesting;
 3535     call->_override_symbolic_info = _override_symbolic_info;
 3536 
 3537     // New call needs all inputs of old call.
 3538     // Req...
 3539     for (uint i = 0; i < req(); ++i) {
 3540       // The expanded node does not need toc any more.
 3541       // Add the inline cache constant here instead. This expresses the
 3542       // register of the inline cache must be live at the call.
 3543       // Else we would have to adapt JVMState by -1.
 3544       if (i == mach_constant_base_node_input()) {
 3545         call->add_req(loadConLNodes_IC._last);
 3546       } else {
 3547         call->add_req(in(i));
 3548       }
 3549     }
 3550     // ...as well as prec
 3551     for (uint i = req(); i < len(); ++i) {
 3552       call->add_prec(in(i));
 3553     }
 3554 
 3555     // Remember nodes loading the inline cache into r19.
 3556     call->_load_ic_hi_node = loadConLNodes_IC._large_hi;
 3557     call->_load_ic_node    = loadConLNodes_IC._small;
 3558 
 3559     // Operands for new nodes.
 3560     call->_opnds[0] = _opnds[0];
 3561     call->_opnds[1] = _opnds[1];
 3562 
 3563     // Only the inline cache is associated with a register.
 3564     assert(Matcher::inline_cache_reg() == OptoReg::Name(R19_num), "ic reg should be R19");
 3565 
 3566     // Push new nodes.
 3567     if (loadConLNodes_IC._large_hi) nodes->push(loadConLNodes_IC._large_hi);
 3568     if (loadConLNodes_IC._last)     nodes->push(loadConLNodes_IC._last);
 3569     nodes->push(call);
 3570   %}
 3571 
 3572   // Compound version of call dynamic
 3573   // Toc is only passed so that it can be used in ins_encode statement.
 3574   // In the code we have to use $constanttablebase.
 3575   enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{
 3576     C2_MacroAssembler _masm(&cbuf);
 3577     int start_offset = __ offset();
 3578 
 3579     Register Rtoc = (ra_) ? $constanttablebase : R2_TOC;
 3580 
 3581     int vtable_index = this->_vtable_index;
 3582     if (vtable_index < 0) {
 3583       // Must be invalid_vtable_index, not nonvirtual_vtable_index.
 3584       assert(vtable_index == Method::invalid_vtable_index, "correct sentinel value");
 3585       Register ic_reg = as_Register(Matcher::inline_cache_reg_encode());
 3586 
 3587       // Virtual call relocation will point to ic load.
 3588       address virtual_call_meta_addr = __ pc();
 3589       // Load a clear inline cache.
 3590       AddressLiteral empty_ic((address) Universe::non_oop_word());
 3591       bool success = __ load_const_from_method_toc(ic_reg, empty_ic, Rtoc, /*fixed_size*/ true);
 3592       if (!success) {
 3593         ciEnv::current()->record_out_of_memory_failure();
 3594         return;
 3595       }
 3596       // CALL to fixup routine.  Fixup routine uses ScopeDesc info
 3597       // to determine who we intended to call.
 3598       __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr));
 3599       emit_call_with_trampoline_stub(_masm, (address)$meth$$method, relocInfo::none);
 3600       if (ciEnv::current()->failing()) { return; } // Code cache may be full.
 3601       assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
 3602              "Fix constant in ret_addr_offset(), expected %d", __ offset() - start_offset);
 3603     } else {
 3604       assert(!UseInlineCaches, "expect vtable calls only if not using ICs");
 3605       // Go thru the vtable. Get receiver klass. Receiver already
 3606       // checked for non-null. If we'll go thru a C2I adapter, the
 3607       // interpreter expects method in R19_method.
 3608 
 3609       __ load_klass(R11_scratch1, R3);
 3610 
 3611       int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes();
 3612       int v_off = entry_offset + in_bytes(vtableEntry::method_offset());
 3613       __ li(R19_method, v_off);
 3614       __ ldx(R19_method/*method*/, R19_method/*method offset*/, R11_scratch1/*class*/);
 3615       // NOTE: for vtable dispatches, the vtable entry will never be
 3616       // null. However it may very well end up in handle_wrong_method
 3617       // if the method is abstract for the particular class.
 3618       __ ld(R11_scratch1, in_bytes(Method::from_compiled_offset()), R19_method);
 3619       // Call target. Either compiled code or C2I adapter.
 3620       __ mtctr(R11_scratch1);
 3621       __ bctrl();
 3622       assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
 3623              "Fix constant in ret_addr_offset(), expected %d", __ offset() - start_offset);
 3624     }
 3625     __ post_call_nop();
 3626   %}
 3627 
 3628   // a runtime call
 3629   enc_class enc_java_to_runtime_call (method meth) %{
 3630 
 3631     C2_MacroAssembler _masm(&cbuf);
 3632     const address start_pc = __ pc();
 3633 
 3634 #if defined(ABI_ELFv2)
 3635     address entry= !($meth$$method) ? NULL : (address)$meth$$method;
 3636     __ call_c(entry, relocInfo::runtime_call_type);
 3637     __ post_call_nop();
 3638 #else
 3639     // The function we're going to call.
 3640     FunctionDescriptor fdtemp;
 3641     const FunctionDescriptor* fd = !($meth$$method) ? &fdtemp : (FunctionDescriptor*)$meth$$method;
 3642 
 3643     Register Rtoc = R12_scratch2;
 3644     // Calculate the method's TOC.
 3645     __ calculate_address_from_global_toc(Rtoc, __ method_toc());
 3646     // Put entry, env, toc into the constant pool, this needs up to 3 constant
 3647     // pool entries; call_c_using_toc will optimize the call.
 3648     bool success = __ call_c_using_toc(fd, relocInfo::runtime_call_type, Rtoc);
 3649     if (!success) {
 3650       ciEnv::current()->record_out_of_memory_failure();
 3651       return;
 3652     }
 3653     __ post_call_nop();
 3654 #endif
 3655 
 3656     // Check the ret_addr_offset.
 3657     assert(((MachCallRuntimeNode*)this)->ret_addr_offset() ==  __ last_calls_return_pc() - start_pc,
 3658            "Fix constant in ret_addr_offset()");
 3659   %}
 3660 
 3661   // Move to ctr for leaf call.
 3662   // This enc_class is needed so that scheduler gets proper
 3663   // input mapping for latency computation.
 3664   enc_class enc_leaf_call_mtctr(iRegLsrc src) %{
 3665     C2_MacroAssembler _masm(&cbuf);
 3666     __ mtctr($src$$Register);
 3667   %}
 3668 
 3669   // Postalloc expand emitter for runtime leaf calls.
 3670   enc_class postalloc_expand_java_to_runtime_call(method meth, iRegLdst toc) %{
 3671     loadConLNodesTuple loadConLNodes_Entry;
 3672 #if defined(ABI_ELFv2)
 3673     jlong entry_address = (jlong) this->entry_point();
 3674     assert(entry_address, "need address here");
 3675     loadConLNodes_Entry = loadConLNodesTuple_create(ra_, n_toc, new immLOper(entry_address),
 3676                                                     OptoReg::Name(R12_H_num), OptoReg::Name(R12_num));
 3677 #else
 3678     // Get the struct that describes the function we are about to call.
 3679     FunctionDescriptor* fd = (FunctionDescriptor*) this->entry_point();
 3680     assert(fd, "need fd here");
 3681     jlong entry_address = (jlong) fd->entry();
 3682     // new nodes
 3683     loadConLNodesTuple loadConLNodes_Env;
 3684     loadConLNodesTuple loadConLNodes_Toc;
 3685 
 3686     // Create nodes and operands for loading the entry point.
 3687     loadConLNodes_Entry = loadConLNodesTuple_create(ra_, n_toc, new immLOper(entry_address),
 3688                                                     OptoReg::Name(R12_H_num), OptoReg::Name(R12_num));
 3689 
 3690 
 3691     // Create nodes and operands for loading the env pointer.
 3692     if (fd->env() != NULL) {
 3693       loadConLNodes_Env = loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong) fd->env()),
 3694                                                     OptoReg::Name(R11_H_num), OptoReg::Name(R11_num));
 3695     } else {
 3696       loadConLNodes_Env._large_hi = NULL;
 3697       loadConLNodes_Env._large_lo = NULL;
 3698       loadConLNodes_Env._small    = NULL;
 3699       loadConLNodes_Env._last = new loadConL16Node();
 3700       loadConLNodes_Env._last->_opnds[0] = new iRegLdstOper();
 3701       loadConLNodes_Env._last->_opnds[1] = new immL16Oper(0);
 3702       ra_->set_pair(loadConLNodes_Env._last->_idx, OptoReg::Name(R11_H_num), OptoReg::Name(R11_num));
 3703     }
 3704 
 3705     // Create nodes and operands for loading the Toc point.
 3706     loadConLNodes_Toc = loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong) fd->toc()),
 3707                                                   OptoReg::Name(R2_H_num), OptoReg::Name(R2_num));
 3708 #endif // ABI_ELFv2
 3709     // mtctr node
 3710     MachNode *mtctr = new CallLeafDirect_mtctrNode();
 3711 
 3712     assert(loadConLNodes_Entry._last != NULL, "entry must exist");
 3713     mtctr->add_req(0, loadConLNodes_Entry._last);
 3714 
 3715     mtctr->_opnds[0] = new iRegLdstOper();
 3716     mtctr->_opnds[1] = new iRegLdstOper();
 3717 
 3718     // call node
 3719     MachCallLeafNode *call = new CallLeafDirectNode();
 3720 
 3721     call->_opnds[0] = _opnds[0];
 3722     call->_opnds[1] = new methodOper((intptr_t) entry_address); // May get set later.
 3723 
 3724     // Make the new call node look like the old one.
 3725     call->_name        = _name;
 3726     call->_tf          = _tf;
 3727     call->_entry_point = _entry_point;
 3728     call->_cnt         = _cnt;
 3729     call->_guaranteed_safepoint = false;
 3730     call->_oop_map     = _oop_map;
 3731     guarantee(!_jvms, "You must clone the jvms and adapt the offsets by fix_jvms().");
 3732     call->_jvms        = NULL;
 3733     call->_jvmadj      = _jvmadj;
 3734     call->_in_rms      = _in_rms;
 3735     call->_nesting     = _nesting;
 3736 
 3737     // New call needs all inputs of old call.
 3738     // Req...
 3739     for (uint i = 0; i < req(); ++i) {
 3740       if (i != mach_constant_base_node_input()) {
 3741         call->add_req(in(i));
 3742       }
 3743     }
 3744 
 3745     // These must be reqired edges, as the registers are live up to
 3746     // the call. Else the constants are handled as kills.
 3747     call->add_req(mtctr);
 3748 #if !defined(ABI_ELFv2)
 3749     call->add_req(loadConLNodes_Env._last);
 3750     call->add_req(loadConLNodes_Toc._last);
 3751 #endif
 3752 
 3753     // ...as well as prec
 3754     for (uint i = req(); i < len(); ++i) {
 3755       call->add_prec(in(i));
 3756     }
 3757 
 3758     // registers
 3759     ra_->set1(mtctr->_idx, OptoReg::Name(SR_CTR_num));
 3760 
 3761     // Insert the new nodes.
 3762     if (loadConLNodes_Entry._large_hi) nodes->push(loadConLNodes_Entry._large_hi);
 3763     if (loadConLNodes_Entry._last)     nodes->push(loadConLNodes_Entry._last);
 3764 #if !defined(ABI_ELFv2)
 3765     if (loadConLNodes_Env._large_hi)   nodes->push(loadConLNodes_Env._large_hi);
 3766     if (loadConLNodes_Env._last)       nodes->push(loadConLNodes_Env._last);
 3767     if (loadConLNodes_Toc._large_hi)   nodes->push(loadConLNodes_Toc._large_hi);
 3768     if (loadConLNodes_Toc._last)       nodes->push(loadConLNodes_Toc._last);
 3769 #endif
 3770     nodes->push(mtctr);
 3771     nodes->push(call);
 3772   %}
 3773 %}
 3774 
 3775 //----------FRAME--------------------------------------------------------------
 3776 // Definition of frame structure and management information.
 3777 
 3778 frame %{
 3779   // These two registers define part of the calling convention between
 3780   // compiled code and the interpreter.
 3781 
 3782   // Inline Cache Register or method for I2C.
 3783   inline_cache_reg(R19); // R19_method
 3784 
 3785   // Optional: name the operand used by cisc-spilling to access
 3786   // [stack_pointer + offset].
 3787   cisc_spilling_operand_name(indOffset);
 3788 
 3789   // Number of stack slots consumed by a Monitor enter.
 3790   sync_stack_slots((frame::jit_monitor_size / VMRegImpl::stack_slot_size));
 3791 
 3792   // Compiled code's Frame Pointer.
 3793   frame_pointer(R1); // R1_SP
 3794 
 3795   // Interpreter stores its frame pointer in a register which is
 3796   // stored to the stack by I2CAdaptors. I2CAdaptors convert from
 3797   // interpreted java to compiled java.
 3798   //
 3799   // R14_state holds pointer to caller's cInterpreter.
 3800   interpreter_frame_pointer(R14); // R14_state
 3801 
 3802   stack_alignment(frame::alignment_in_bytes);
 3803 
 3804   // Number of outgoing stack slots killed above the
 3805   // out_preserve_stack_slots for calls to C. Supports the var-args
 3806   // backing area for register parms.
 3807   //
 3808   varargs_C_out_slots_killed(((frame::native_abi_reg_args_size - frame::jit_out_preserve_size) / VMRegImpl::stack_slot_size));
 3809 
 3810   // The after-PROLOG location of the return address. Location of
 3811   // return address specifies a type (REG or STACK) and a number
 3812   // representing the register number (i.e. - use a register name) or
 3813   // stack slot.
 3814   //
 3815   // A: Link register is stored in stack slot ...
 3816   // M:  ... but it's in the caller's frame according to PPC-64 ABI.
 3817   // J: Therefore, we make sure that the link register is also in R11_scratch1
 3818   //    at the end of the prolog.
 3819   // B: We use R20, now.
 3820   //return_addr(REG R20);
 3821 
 3822   // G: After reading the comments made by all the luminaries on their
 3823   //    failure to tell the compiler where the return address really is,
 3824   //    I hardly dare to try myself.  However, I'm convinced it's in slot
 3825   //    4 what apparently works and saves us some spills.
 3826   return_addr(STACK 4);
 3827 
 3828   // Location of native (C/C++) and interpreter return values. This
 3829   // is specified to be the same as Java. In the 32-bit VM, long
 3830   // values are actually returned from native calls in O0:O1 and
 3831   // returned to the interpreter in I0:I1. The copying to and from
 3832   // the register pairs is done by the appropriate call and epilog
 3833   // opcodes. This simplifies the register allocator.
 3834   c_return_value %{
 3835     assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) ||
 3836             (ideal_reg == Op_RegN && CompressedOops::base() == NULL && CompressedOops::shift() == 0),
 3837             "only return normal values");
 3838     // enum names from opcodes.hpp:    Op_Node Op_Set Op_RegN       Op_RegI       Op_RegP       Op_RegF       Op_RegD       Op_RegL
 3839     static int typeToRegLo[Op_RegL+1] = { 0,   0,     R3_num,   R3_num,   R3_num,   F1_num,   F1_num,   R3_num };
 3840     static int typeToRegHi[Op_RegL+1] = { 0,   0,     OptoReg::Bad, R3_H_num, R3_H_num, OptoReg::Bad, F1_H_num, R3_H_num };
 3841     return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
 3842   %}
 3843 
 3844   // Location of compiled Java return values.  Same as C
 3845   return_value %{
 3846     assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) ||
 3847             (ideal_reg == Op_RegN && CompressedOops::base() == NULL && CompressedOops::shift() == 0),
 3848             "only return normal values");
 3849     // enum names from opcodes.hpp:    Op_Node Op_Set Op_RegN       Op_RegI       Op_RegP       Op_RegF       Op_RegD       Op_RegL
 3850     static int typeToRegLo[Op_RegL+1] = { 0,   0,     R3_num,   R3_num,   R3_num,   F1_num,   F1_num,   R3_num };
 3851     static int typeToRegHi[Op_RegL+1] = { 0,   0,     OptoReg::Bad, R3_H_num, R3_H_num, OptoReg::Bad, F1_H_num, R3_H_num };
 3852     return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
 3853   %}
 3854 %}
 3855 
 3856 
 3857 //----------ATTRIBUTES---------------------------------------------------------
 3858 
 3859 //----------Operand Attributes-------------------------------------------------
 3860 op_attrib op_cost(1);          // Required cost attribute.
 3861 
 3862 //----------Instruction Attributes---------------------------------------------
 3863 
 3864 // Cost attribute. required.
 3865 ins_attrib ins_cost(DEFAULT_COST);
 3866 
 3867 // Is this instruction a non-matching short branch variant of some
 3868 // long branch? Not required.
 3869 ins_attrib ins_short_branch(0);
 3870 
 3871 ins_attrib ins_is_TrapBasedCheckNode(true);
 3872 
 3873 // Number of constants.
 3874 // This instruction uses the given number of constants
 3875 // (optional attribute).
 3876 // This is needed to determine in time whether the constant pool will
 3877 // exceed 4000 entries. Before postalloc_expand the overall number of constants
 3878 // is determined. It's also used to compute the constant pool size
 3879 // in Output().
 3880 ins_attrib ins_num_consts(0);
 3881 
 3882 // Required alignment attribute (must be a power of 2) specifies the
 3883 // alignment that some part of the instruction (not necessarily the
 3884 // start) requires. If > 1, a compute_padding() function must be
 3885 // provided for the instruction.
 3886 ins_attrib ins_alignment(1);
 3887 
 3888 // Enforce/prohibit rematerializations.
 3889 // - If an instruction is attributed with 'ins_cannot_rematerialize(true)'
 3890 //   then rematerialization of that instruction is prohibited and the
 3891 //   instruction's value will be spilled if necessary.
 3892 //   Causes that MachNode::rematerialize() returns false.
 3893 // - If an instruction is attributed with 'ins_should_rematerialize(true)'
 3894 //   then rematerialization should be enforced and a copy of the instruction
 3895 //   should be inserted if possible; rematerialization is not guaranteed.
 3896 //   Note: this may result in rematerializations in front of every use.
 3897 //   Causes that MachNode::rematerialize() can return true.
 3898 // (optional attribute)
 3899 ins_attrib ins_cannot_rematerialize(false);
 3900 ins_attrib ins_should_rematerialize(false);
 3901 
 3902 // Instruction has variable size depending on alignment.
 3903 ins_attrib ins_variable_size_depending_on_alignment(false);
 3904 
 3905 // Instruction is a nop.
 3906 ins_attrib ins_is_nop(false);
 3907 
 3908 // Instruction is mapped to a MachIfFastLock node (instead of MachFastLock).
 3909 ins_attrib ins_use_mach_if_fast_lock_node(false);
 3910 
 3911 // Field for the toc offset of a constant.
 3912 //
 3913 // This is needed if the toc offset is not encodable as an immediate in
 3914 // the PPC load instruction. If so, the upper (hi) bits of the offset are
 3915 // added to the toc, and from this a load with immediate is performed.
 3916 // With postalloc expand, we get two nodes that require the same offset
 3917 // but which don't know about each other. The offset is only known
 3918 // when the constant is added to the constant pool during emitting.
 3919 // It is generated in the 'hi'-node adding the upper bits, and saved
 3920 // in this node.  The 'lo'-node has a link to the 'hi'-node and reads
 3921 // the offset from there when it gets encoded.
 3922 ins_attrib ins_field_const_toc_offset(0);
 3923 ins_attrib ins_field_const_toc_offset_hi_node(0);
 3924 
 3925 // A field that can hold the instructions offset in the code buffer.
 3926 // Set in the nodes emitter.
 3927 ins_attrib ins_field_cbuf_insts_offset(-1);
 3928 
 3929 // Fields for referencing a call's load-IC-node.
 3930 // If the toc offset can not be encoded as an immediate in a load, we
 3931 // use two nodes.
 3932 ins_attrib ins_field_load_ic_hi_node(0);
 3933 ins_attrib ins_field_load_ic_node(0);
 3934 
 3935 //----------OPERANDS-----------------------------------------------------------
 3936 // Operand definitions must precede instruction definitions for correct
 3937 // parsing in the ADLC because operands constitute user defined types
 3938 // which are used in instruction definitions.
 3939 //
 3940 // Formats are generated automatically for constants and base registers.
 3941 
 3942 operand vecX() %{
 3943   constraint(ALLOC_IN_RC(vs_reg));
 3944   match(VecX);
 3945 
 3946   format %{ %}
 3947   interface(REG_INTER);
 3948 %}
 3949 
 3950 //----------Simple Operands----------------------------------------------------
 3951 // Immediate Operands
 3952 
 3953 // Integer Immediate: 32-bit
 3954 operand immI() %{
 3955   match(ConI);
 3956   op_cost(40);
 3957   format %{ %}
 3958   interface(CONST_INTER);
 3959 %}
 3960 
 3961 operand immI8() %{
 3962   predicate(Assembler::is_simm(n->get_int(), 8));
 3963   op_cost(0);
 3964   match(ConI);
 3965   format %{ %}
 3966   interface(CONST_INTER);
 3967 %}
 3968 
 3969 // Integer Immediate: 16-bit
 3970 operand immI16() %{
 3971   predicate(Assembler::is_simm(n->get_int(), 16));
 3972   op_cost(0);
 3973   match(ConI);
 3974   format %{ %}
 3975   interface(CONST_INTER);
 3976 %}
 3977 
 3978 // Integer Immediate: 32-bit, where lowest 16 bits are 0x0000.
 3979 operand immIhi16() %{
 3980   predicate(((n->get_int() & 0xffff0000) != 0) && ((n->get_int() & 0xffff) == 0));
 3981   match(ConI);
 3982   op_cost(0);
 3983   format %{ %}
 3984   interface(CONST_INTER);
 3985 %}
 3986 
 3987 // Integer Immediate: 32-bit immediate for prefixed addi and load/store.
 3988 operand immI32() %{
 3989   predicate(PowerArchitecturePPC64 >= 10);
 3990   op_cost(0);
 3991   match(ConI);
 3992   format %{ %}
 3993   interface(CONST_INTER);
 3994 %}
 3995 
 3996 operand immInegpow2() %{
 3997   predicate(is_power_of_2(-(juint)(n->get_int())));
 3998   match(ConI);
 3999   op_cost(0);
 4000   format %{ %}
 4001   interface(CONST_INTER);
 4002 %}
 4003 
 4004 operand immIpow2minus1() %{
 4005   predicate(is_power_of_2((juint)(n->get_int()) + 1u));
 4006   match(ConI);
 4007   op_cost(0);
 4008   format %{ %}
 4009   interface(CONST_INTER);
 4010 %}
 4011 
 4012 operand immIpowerOf2() %{
 4013   predicate(is_power_of_2((juint)(n->get_int())));
 4014   match(ConI);
 4015   op_cost(0);
 4016   format %{ %}
 4017   interface(CONST_INTER);
 4018 %}
 4019 
 4020 // Unsigned Integer Immediate: the values 0-31
 4021 operand uimmI5() %{
 4022   predicate(Assembler::is_uimm(n->get_int(), 5));
 4023   match(ConI);
 4024   op_cost(0);
 4025   format %{ %}
 4026   interface(CONST_INTER);
 4027 %}
 4028 
 4029 // Unsigned Integer Immediate: 6-bit
 4030 operand uimmI6() %{
 4031   predicate(Assembler::is_uimm(n->get_int(), 6));
 4032   match(ConI);
 4033   op_cost(0);
 4034   format %{ %}
 4035   interface(CONST_INTER);
 4036 %}
 4037 
 4038 // Unsigned Integer Immediate:  6-bit int, greater than 32
 4039 operand uimmI6_ge32() %{
 4040   predicate(Assembler::is_uimm(n->get_int(), 6) && n->get_int() >= 32);
 4041   match(ConI);
 4042   op_cost(0);
 4043   format %{ %}
 4044   interface(CONST_INTER);
 4045 %}
 4046 
 4047 // Unsigned Integer Immediate: 15-bit
 4048 operand uimmI15() %{
 4049   predicate(Assembler::is_uimm(n->get_int(), 15));
 4050   match(ConI);
 4051   op_cost(0);
 4052   format %{ %}
 4053   interface(CONST_INTER);
 4054 %}
 4055 
 4056 // Unsigned Integer Immediate: 16-bit
 4057 operand uimmI16() %{
 4058   predicate(Assembler::is_uimm(n->get_int(), 16));
 4059   match(ConI);
 4060   op_cost(0);
 4061   format %{ %}
 4062   interface(CONST_INTER);
 4063 %}
 4064 
 4065 // constant 'int 0'.
 4066 operand immI_0() %{
 4067   predicate(n->get_int() == 0);
 4068   match(ConI);
 4069   op_cost(0);
 4070   format %{ %}
 4071   interface(CONST_INTER);
 4072 %}
 4073 
 4074 // constant 'int 1'.
 4075 operand immI_1() %{
 4076   predicate(n->get_int() == 1);
 4077   match(ConI);
 4078   op_cost(0);
 4079   format %{ %}
 4080   interface(CONST_INTER);
 4081 %}
 4082 
 4083 // constant 'int -1'.
 4084 operand immI_minus1() %{
 4085   predicate(n->get_int() == -1);
 4086   match(ConI);
 4087   op_cost(0);
 4088   format %{ %}
 4089   interface(CONST_INTER);
 4090 %}
 4091 
 4092 // int value 16.
 4093 operand immI_16() %{
 4094   predicate(n->get_int() == 16);
 4095   match(ConI);
 4096   op_cost(0);
 4097   format %{ %}
 4098   interface(CONST_INTER);
 4099 %}
 4100 
 4101 // int value 24.
 4102 operand immI_24() %{
 4103   predicate(n->get_int() == 24);
 4104   match(ConI);
 4105   op_cost(0);
 4106   format %{ %}
 4107   interface(CONST_INTER);
 4108 %}
 4109 
 4110 // Compressed oops constants
 4111 // Pointer Immediate
 4112 operand immN() %{
 4113   match(ConN);
 4114 
 4115   op_cost(10);
 4116   format %{ %}
 4117   interface(CONST_INTER);
 4118 %}
 4119 
 4120 // NULL Pointer Immediate
 4121 operand immN_0() %{
 4122   predicate(n->get_narrowcon() == 0);
 4123   match(ConN);
 4124 
 4125   op_cost(0);
 4126   format %{ %}
 4127   interface(CONST_INTER);
 4128 %}
 4129 
 4130 // Compressed klass constants
 4131 operand immNKlass() %{
 4132   match(ConNKlass);
 4133 
 4134   op_cost(0);
 4135   format %{ %}
 4136   interface(CONST_INTER);
 4137 %}
 4138 
 4139 // This operand can be used to avoid matching of an instruct
 4140 // with chain rule.
 4141 operand immNKlass_NM() %{
 4142   match(ConNKlass);
 4143   predicate(false);
 4144   op_cost(0);
 4145   format %{ %}
 4146   interface(CONST_INTER);
 4147 %}
 4148 
 4149 // Pointer Immediate: 64-bit
 4150 operand immP() %{
 4151   match(ConP);
 4152   op_cost(0);
 4153   format %{ %}
 4154   interface(CONST_INTER);
 4155 %}
 4156 
 4157 // Operand to avoid match of loadConP.
 4158 // This operand can be used to avoid matching of an instruct
 4159 // with chain rule.
 4160 operand immP_NM() %{
 4161   match(ConP);
 4162   predicate(false);
 4163   op_cost(0);
 4164   format %{ %}
 4165   interface(CONST_INTER);
 4166 %}
 4167 
 4168 // constant 'pointer 0'.
 4169 operand immP_0() %{
 4170   predicate(n->get_ptr() == 0);
 4171   match(ConP);
 4172   op_cost(0);
 4173   format %{ %}
 4174   interface(CONST_INTER);
 4175 %}
 4176 
 4177 // pointer 0x0 or 0x1
 4178 operand immP_0or1() %{
 4179   predicate((n->get_ptr() == 0) || (n->get_ptr() == 1));
 4180   match(ConP);
 4181   op_cost(0);
 4182   format %{ %}
 4183   interface(CONST_INTER);
 4184 %}
 4185 
 4186 operand immL() %{
 4187   match(ConL);
 4188   op_cost(40);
 4189   format %{ %}
 4190   interface(CONST_INTER);
 4191 %}
 4192 
 4193 operand immLmax30() %{
 4194   predicate((n->get_long() <= 30));
 4195   match(ConL);
 4196   op_cost(0);
 4197   format %{ %}
 4198   interface(CONST_INTER);
 4199 %}
 4200 
 4201 // Long Immediate: 16-bit
 4202 operand immL16() %{
 4203   predicate(Assembler::is_simm(n->get_long(), 16));
 4204   match(ConL);
 4205   op_cost(0);
 4206   format %{ %}
 4207   interface(CONST_INTER);
 4208 %}
 4209 
 4210 // Long Immediate: 16-bit, 4-aligned
 4211 operand immL16Alg4() %{
 4212   predicate(Assembler::is_simm(n->get_long(), 16) && ((n->get_long() & 0x3) == 0));
 4213   match(ConL);
 4214   op_cost(0);
 4215   format %{ %}
 4216   interface(CONST_INTER);
 4217 %}
 4218 
 4219 // Long Immediate: 32-bit, where lowest 16 bits are 0x0000.
 4220 operand immL32hi16() %{
 4221   predicate(Assembler::is_simm(n->get_long(), 32) && ((n->get_long() & 0xffffL) == 0L));
 4222   match(ConL);
 4223   op_cost(0);
 4224   format %{ %}
 4225   interface(CONST_INTER);
 4226 %}
 4227 
 4228 // Long Immediate: 32-bit
 4229 operand immL32() %{
 4230   predicate(Assembler::is_simm(n->get_long(), 32));
 4231   match(ConL);
 4232   op_cost(0);
 4233   format %{ %}
 4234   interface(CONST_INTER);
 4235 %}
 4236 
 4237 // Long Immediate: 34-bit, immediate field in prefixed addi and load/store.
 4238 operand immL34() %{
 4239   predicate(PowerArchitecturePPC64 >= 10 && Assembler::is_simm(n->get_long(), 34));
 4240   match(ConL);
 4241   op_cost(0);
 4242   format %{ %}
 4243   interface(CONST_INTER);
 4244 %}
 4245 
 4246 // Long Immediate: 64-bit, where highest 16 bits are not 0x0000.
 4247 operand immLhighest16() %{
 4248   predicate((n->get_long() & 0xffff000000000000L) != 0L && (n->get_long() & 0x0000ffffffffffffL) == 0L);
 4249   match(ConL);
 4250   op_cost(0);
 4251   format %{ %}
 4252   interface(CONST_INTER);
 4253 %}
 4254 
 4255 operand immLnegpow2() %{
 4256   predicate(is_power_of_2(-(julong)(n->get_long())));
 4257   match(ConL);
 4258   op_cost(0);
 4259   format %{ %}
 4260   interface(CONST_INTER);
 4261 %}
 4262 
 4263 operand immLpow2minus1() %{
 4264   predicate(is_power_of_2((julong)(n->get_long()) + 1ull));
 4265   match(ConL);
 4266   op_cost(0);
 4267   format %{ %}
 4268   interface(CONST_INTER);
 4269 %}
 4270 
 4271 // constant 'long 0'.
 4272 operand immL_0() %{
 4273   predicate(n->get_long() == 0L);
 4274   match(ConL);
 4275   op_cost(0);
 4276   format %{ %}
 4277   interface(CONST_INTER);
 4278 %}
 4279 
 4280 // constat ' long -1'.
 4281 operand immL_minus1() %{
 4282   predicate(n->get_long() == -1L);
 4283   match(ConL);
 4284   op_cost(0);
 4285   format %{ %}
 4286   interface(CONST_INTER);
 4287 %}
 4288 
 4289 // Long Immediate: low 32-bit mask
 4290 operand immL_32bits() %{
 4291   predicate(n->get_long() == 0xFFFFFFFFL);
 4292   match(ConL);
 4293   op_cost(0);
 4294   format %{ %}
 4295   interface(CONST_INTER);
 4296 %}
 4297 
 4298 // Unsigned Long Immediate: 16-bit
 4299 operand uimmL16() %{
 4300   predicate(Assembler::is_uimm(n->get_long(), 16));
 4301   match(ConL);
 4302   op_cost(0);
 4303   format %{ %}
 4304   interface(CONST_INTER);
 4305 %}
 4306 
 4307 // Float Immediate
 4308 operand immF() %{
 4309   match(ConF);
 4310   op_cost(40);
 4311   format %{ %}
 4312   interface(CONST_INTER);
 4313 %}
 4314 
 4315 // Float Immediate: +0.0f.
 4316 operand immF_0() %{
 4317   predicate(jint_cast(n->getf()) == 0);
 4318   match(ConF);
 4319 
 4320   op_cost(0);
 4321   format %{ %}
 4322   interface(CONST_INTER);
 4323 %}
 4324 
 4325 // Double Immediate
 4326 operand immD() %{
 4327   match(ConD);
 4328   op_cost(40);
 4329   format %{ %}
 4330   interface(CONST_INTER);
 4331 %}
 4332 
 4333 // Double Immediate: +0.0d.
 4334 operand immD_0() %{
 4335   predicate(jlong_cast(n->getd()) == 0);
 4336   match(ConD);
 4337 
 4338   op_cost(0);
 4339   format %{ %}
 4340   interface(CONST_INTER);
 4341 %}
 4342 
 4343 // Integer Register Operands
 4344 // Integer Destination Register
 4345 // See definition of reg_class bits32_reg_rw.
 4346 operand iRegIdst() %{
 4347   constraint(ALLOC_IN_RC(bits32_reg_rw));
 4348   match(RegI);
 4349   match(rscratch1RegI);
 4350   match(rscratch2RegI);
 4351   match(rarg1RegI);
 4352   match(rarg2RegI);
 4353   match(rarg3RegI);
 4354   match(rarg4RegI);
 4355   format %{ %}
 4356   interface(REG_INTER);
 4357 %}
 4358 
 4359 // Integer Source Register
 4360 // See definition of reg_class bits32_reg_ro.
 4361 operand iRegIsrc() %{
 4362   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4363   match(RegI);
 4364   match(rscratch1RegI);
 4365   match(rscratch2RegI);
 4366   match(rarg1RegI);
 4367   match(rarg2RegI);
 4368   match(rarg3RegI);
 4369   match(rarg4RegI);
 4370   format %{ %}
 4371   interface(REG_INTER);
 4372 %}
 4373 
 4374 operand rscratch1RegI() %{
 4375   constraint(ALLOC_IN_RC(rscratch1_bits32_reg));
 4376   match(iRegIdst);
 4377   format %{ %}
 4378   interface(REG_INTER);
 4379 %}
 4380 
 4381 operand rscratch2RegI() %{
 4382   constraint(ALLOC_IN_RC(rscratch2_bits32_reg));
 4383   match(iRegIdst);
 4384   format %{ %}
 4385   interface(REG_INTER);
 4386 %}
 4387 
 4388 operand rarg1RegI() %{
 4389   constraint(ALLOC_IN_RC(rarg1_bits32_reg));
 4390   match(iRegIdst);
 4391   format %{ %}
 4392   interface(REG_INTER);
 4393 %}
 4394 
 4395 operand rarg2RegI() %{
 4396   constraint(ALLOC_IN_RC(rarg2_bits32_reg));
 4397   match(iRegIdst);
 4398   format %{ %}
 4399   interface(REG_INTER);
 4400 %}
 4401 
 4402 operand rarg3RegI() %{
 4403   constraint(ALLOC_IN_RC(rarg3_bits32_reg));
 4404   match(iRegIdst);
 4405   format %{ %}
 4406   interface(REG_INTER);
 4407 %}
 4408 
 4409 operand rarg4RegI() %{
 4410   constraint(ALLOC_IN_RC(rarg4_bits32_reg));
 4411   match(iRegIdst);
 4412   format %{ %}
 4413   interface(REG_INTER);
 4414 %}
 4415 
 4416 operand rarg1RegL() %{
 4417   constraint(ALLOC_IN_RC(rarg1_bits64_reg));
 4418   match(iRegLdst);
 4419   format %{ %}
 4420   interface(REG_INTER);
 4421 %}
 4422 
 4423 operand rarg2RegL() %{
 4424   constraint(ALLOC_IN_RC(rarg2_bits64_reg));
 4425   match(iRegLdst);
 4426   format %{ %}
 4427   interface(REG_INTER);
 4428 %}
 4429 
 4430 operand rarg3RegL() %{
 4431   constraint(ALLOC_IN_RC(rarg3_bits64_reg));
 4432   match(iRegLdst);
 4433   format %{ %}
 4434   interface(REG_INTER);
 4435 %}
 4436 
 4437 operand rarg4RegL() %{
 4438   constraint(ALLOC_IN_RC(rarg4_bits64_reg));
 4439   match(iRegLdst);
 4440   format %{ %}
 4441   interface(REG_INTER);
 4442 %}
 4443 
 4444 // Pointer Destination Register
 4445 // See definition of reg_class bits64_reg_rw.
 4446 operand iRegPdst() %{
 4447   constraint(ALLOC_IN_RC(bits64_reg_rw));
 4448   match(RegP);
 4449   match(rscratch1RegP);
 4450   match(rscratch2RegP);
 4451   match(rarg1RegP);
 4452   match(rarg2RegP);
 4453   match(rarg3RegP);
 4454   match(rarg4RegP);
 4455   format %{ %}
 4456   interface(REG_INTER);
 4457 %}
 4458 
 4459 // Pointer Destination Register
 4460 // Operand not using r11 and r12 (killed in epilog).
 4461 operand iRegPdstNoScratch() %{
 4462   constraint(ALLOC_IN_RC(bits64_reg_leaf_call));
 4463   match(RegP);
 4464   match(rarg1RegP);
 4465   match(rarg2RegP);
 4466   match(rarg3RegP);
 4467   match(rarg4RegP);
 4468   format %{ %}
 4469   interface(REG_INTER);
 4470 %}
 4471 
 4472 // Pointer Source Register
 4473 // See definition of reg_class bits64_reg_ro.
 4474 operand iRegPsrc() %{
 4475   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4476   match(RegP);
 4477   match(iRegPdst);
 4478   match(rscratch1RegP);
 4479   match(rscratch2RegP);
 4480   match(rarg1RegP);
 4481   match(rarg2RegP);
 4482   match(rarg3RegP);
 4483   match(rarg4RegP);
 4484   match(threadRegP);
 4485   format %{ %}
 4486   interface(REG_INTER);
 4487 %}
 4488 
 4489 // Thread operand.
 4490 operand threadRegP() %{
 4491   constraint(ALLOC_IN_RC(thread_bits64_reg));
 4492   match(iRegPdst);
 4493   format %{ "R16" %}
 4494   interface(REG_INTER);
 4495 %}
 4496 
 4497 operand rscratch1RegP() %{
 4498   constraint(ALLOC_IN_RC(rscratch1_bits64_reg));
 4499   match(iRegPdst);
 4500   format %{ "R11" %}
 4501   interface(REG_INTER);
 4502 %}
 4503 
 4504 operand rscratch2RegP() %{
 4505   constraint(ALLOC_IN_RC(rscratch2_bits64_reg));
 4506   match(iRegPdst);
 4507   format %{ %}
 4508   interface(REG_INTER);
 4509 %}
 4510 
 4511 operand rarg1RegP() %{
 4512   constraint(ALLOC_IN_RC(rarg1_bits64_reg));
 4513   match(iRegPdst);
 4514   format %{ %}
 4515   interface(REG_INTER);
 4516 %}
 4517 
 4518 operand rarg2RegP() %{
 4519   constraint(ALLOC_IN_RC(rarg2_bits64_reg));
 4520   match(iRegPdst);
 4521   format %{ %}
 4522   interface(REG_INTER);
 4523 %}
 4524 
 4525 operand rarg3RegP() %{
 4526   constraint(ALLOC_IN_RC(rarg3_bits64_reg));
 4527   match(iRegPdst);
 4528   format %{ %}
 4529   interface(REG_INTER);
 4530 %}
 4531 
 4532 operand rarg4RegP() %{
 4533   constraint(ALLOC_IN_RC(rarg4_bits64_reg));
 4534   match(iRegPdst);
 4535   format %{ %}
 4536   interface(REG_INTER);
 4537 %}
 4538 
 4539 operand iRegNsrc() %{
 4540   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4541   match(RegN);
 4542   match(iRegNdst);
 4543 
 4544   format %{ %}
 4545   interface(REG_INTER);
 4546 %}
 4547 
 4548 operand iRegNdst() %{
 4549   constraint(ALLOC_IN_RC(bits32_reg_rw));
 4550   match(RegN);
 4551 
 4552   format %{ %}
 4553   interface(REG_INTER);
 4554 %}
 4555 
 4556 // Long Destination Register
 4557 // See definition of reg_class bits64_reg_rw.
 4558 operand iRegLdst() %{
 4559   constraint(ALLOC_IN_RC(bits64_reg_rw));
 4560   match(RegL);
 4561   match(rscratch1RegL);
 4562   match(rscratch2RegL);
 4563   format %{ %}
 4564   interface(REG_INTER);
 4565 %}
 4566 
 4567 // Long Source Register
 4568 // See definition of reg_class bits64_reg_ro.
 4569 operand iRegLsrc() %{
 4570   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4571   match(RegL);
 4572   match(iRegLdst);
 4573   match(rscratch1RegL);
 4574   match(rscratch2RegL);
 4575   format %{ %}
 4576   interface(REG_INTER);
 4577 %}
 4578 
 4579 // Special operand for ConvL2I.
 4580 operand iRegL2Isrc(iRegLsrc reg) %{
 4581   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4582   match(ConvL2I reg);
 4583   format %{ "ConvL2I($reg)" %}
 4584   interface(REG_INTER)
 4585 %}
 4586 
 4587 operand rscratch1RegL() %{
 4588   constraint(ALLOC_IN_RC(rscratch1_bits64_reg));
 4589   match(RegL);
 4590   format %{ %}
 4591   interface(REG_INTER);
 4592 %}
 4593 
 4594 operand rscratch2RegL() %{
 4595   constraint(ALLOC_IN_RC(rscratch2_bits64_reg));
 4596   match(RegL);
 4597   format %{ %}
 4598   interface(REG_INTER);
 4599 %}
 4600 
 4601 // Condition Code Flag Registers
 4602 operand flagsReg() %{
 4603   constraint(ALLOC_IN_RC(int_flags));
 4604   match(RegFlags);
 4605   format %{ %}
 4606   interface(REG_INTER);
 4607 %}
 4608 
 4609 operand flagsRegSrc() %{
 4610   constraint(ALLOC_IN_RC(int_flags_ro));
 4611   match(RegFlags);
 4612   match(flagsReg);
 4613   match(flagsRegCR0);
 4614   format %{ %}
 4615   interface(REG_INTER);
 4616 %}
 4617 
 4618 // Condition Code Flag Register CR0
 4619 operand flagsRegCR0() %{
 4620   constraint(ALLOC_IN_RC(int_flags_CR0));
 4621   match(RegFlags);
 4622   format %{ "CR0" %}
 4623   interface(REG_INTER);
 4624 %}
 4625 
 4626 operand flagsRegCR1() %{
 4627   constraint(ALLOC_IN_RC(int_flags_CR1));
 4628   match(RegFlags);
 4629   format %{ "CR1" %}
 4630   interface(REG_INTER);
 4631 %}
 4632 
 4633 operand flagsRegCR6() %{
 4634   constraint(ALLOC_IN_RC(int_flags_CR6));
 4635   match(RegFlags);
 4636   format %{ "CR6" %}
 4637   interface(REG_INTER);
 4638 %}
 4639 
 4640 operand regCTR() %{
 4641   constraint(ALLOC_IN_RC(ctr_reg));
 4642   // RegFlags should work. Introducing a RegSpecial type would cause a
 4643   // lot of changes.
 4644   match(RegFlags);
 4645   format %{"SR_CTR" %}
 4646   interface(REG_INTER);
 4647 %}
 4648 
 4649 operand regD() %{
 4650   constraint(ALLOC_IN_RC(dbl_reg));
 4651   match(RegD);
 4652   format %{ %}
 4653   interface(REG_INTER);
 4654 %}
 4655 
 4656 operand regF() %{
 4657   constraint(ALLOC_IN_RC(flt_reg));
 4658   match(RegF);
 4659   format %{ %}
 4660   interface(REG_INTER);
 4661 %}
 4662 
 4663 // Special Registers
 4664 
 4665 // Method Register
 4666 operand inline_cache_regP(iRegPdst reg) %{
 4667   constraint(ALLOC_IN_RC(r19_bits64_reg)); // inline_cache_reg
 4668   match(reg);
 4669   format %{ %}
 4670   interface(REG_INTER);
 4671 %}
 4672 
 4673 // Operands to remove register moves in unscaled mode.
 4674 // Match read/write registers with an EncodeP node if neither shift nor add are required.
 4675 operand iRegP2N(iRegPsrc reg) %{
 4676   predicate(false /* TODO: PPC port MatchDecodeNodes*/&& CompressedOops::shift() == 0);
 4677   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4678   match(EncodeP reg);
 4679   format %{ "$reg" %}
 4680   interface(REG_INTER)
 4681 %}
 4682 
 4683 operand iRegN2P(iRegNsrc reg) %{
 4684   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4685   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4686   match(DecodeN reg);
 4687   format %{ "$reg" %}
 4688   interface(REG_INTER)
 4689 %}
 4690 
 4691 operand iRegN2P_klass(iRegNsrc reg) %{
 4692   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
 4693   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4694   match(DecodeNKlass reg);
 4695   format %{ "$reg" %}
 4696   interface(REG_INTER)
 4697 %}
 4698 
 4699 //----------Complex Operands---------------------------------------------------
 4700 // Indirect Memory Reference
 4701 operand indirect(iRegPsrc reg) %{
 4702   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4703   match(reg);
 4704   op_cost(100);
 4705   format %{ "[$reg]" %}
 4706   interface(MEMORY_INTER) %{
 4707     base($reg);
 4708     index(0x0);
 4709     scale(0x0);
 4710     disp(0x0);
 4711   %}
 4712 %}
 4713 
 4714 // Indirect with Offset
 4715 operand indOffset16(iRegPsrc reg, immL16 offset) %{
 4716   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4717   match(AddP reg offset);
 4718   op_cost(100);
 4719   format %{ "[$reg + $offset]" %}
 4720   interface(MEMORY_INTER) %{
 4721     base($reg);
 4722     index(0x0);
 4723     scale(0x0);
 4724     disp($offset);
 4725   %}
 4726 %}
 4727 
 4728 // Indirect with 4-aligned Offset
 4729 operand indOffset16Alg4(iRegPsrc reg, immL16Alg4 offset) %{
 4730   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4731   match(AddP reg offset);
 4732   op_cost(100);
 4733   format %{ "[$reg + $offset]" %}
 4734   interface(MEMORY_INTER) %{
 4735     base($reg);
 4736     index(0x0);
 4737     scale(0x0);
 4738     disp($offset);
 4739   %}
 4740 %}
 4741 
 4742 //----------Complex Operands for Compressed OOPs-------------------------------
 4743 // Compressed OOPs with narrow_oop_shift == 0.
 4744 
 4745 // Indirect Memory Reference, compressed OOP
 4746 operand indirectNarrow(iRegNsrc reg) %{
 4747   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4748   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4749   match(DecodeN reg);
 4750   op_cost(100);
 4751   format %{ "[$reg]" %}
 4752   interface(MEMORY_INTER) %{
 4753     base($reg);
 4754     index(0x0);
 4755     scale(0x0);
 4756     disp(0x0);
 4757   %}
 4758 %}
 4759 
 4760 operand indirectNarrow_klass(iRegNsrc reg) %{
 4761   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
 4762   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4763   match(DecodeNKlass reg);
 4764   op_cost(100);
 4765   format %{ "[$reg]" %}
 4766   interface(MEMORY_INTER) %{
 4767     base($reg);
 4768     index(0x0);
 4769     scale(0x0);
 4770     disp(0x0);
 4771   %}
 4772 %}
 4773 
 4774 // Indirect with Offset, compressed OOP
 4775 operand indOffset16Narrow(iRegNsrc reg, immL16 offset) %{
 4776   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4777   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4778   match(AddP (DecodeN reg) offset);
 4779   op_cost(100);
 4780   format %{ "[$reg + $offset]" %}
 4781   interface(MEMORY_INTER) %{
 4782     base($reg);
 4783     index(0x0);
 4784     scale(0x0);
 4785     disp($offset);
 4786   %}
 4787 %}
 4788 
 4789 operand indOffset16Narrow_klass(iRegNsrc reg, immL16 offset) %{
 4790   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
 4791   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4792   match(AddP (DecodeNKlass reg) offset);
 4793   op_cost(100);
 4794   format %{ "[$reg + $offset]" %}
 4795   interface(MEMORY_INTER) %{
 4796     base($reg);
 4797     index(0x0);
 4798     scale(0x0);
 4799     disp($offset);
 4800   %}
 4801 %}
 4802 
 4803 // Indirect with 4-aligned Offset, compressed OOP
 4804 operand indOffset16NarrowAlg4(iRegNsrc reg, immL16Alg4 offset) %{
 4805   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4806   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4807   match(AddP (DecodeN reg) offset);
 4808   op_cost(100);
 4809   format %{ "[$reg + $offset]" %}
 4810   interface(MEMORY_INTER) %{
 4811     base($reg);
 4812     index(0x0);
 4813     scale(0x0);
 4814     disp($offset);
 4815   %}
 4816 %}
 4817 
 4818 operand indOffset16NarrowAlg4_klass(iRegNsrc reg, immL16Alg4 offset) %{
 4819   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
 4820   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4821   match(AddP (DecodeNKlass reg) offset);
 4822   op_cost(100);
 4823   format %{ "[$reg + $offset]" %}
 4824   interface(MEMORY_INTER) %{
 4825     base($reg);
 4826     index(0x0);
 4827     scale(0x0);
 4828     disp($offset);
 4829   %}
 4830 %}
 4831 
 4832 //----------Special Memory Operands--------------------------------------------
 4833 // Stack Slot Operand
 4834 //
 4835 // This operand is used for loading and storing temporary values on
 4836 // the stack where a match requires a value to flow through memory.
 4837 operand stackSlotI(sRegI reg) %{
 4838   constraint(ALLOC_IN_RC(stack_slots));
 4839   op_cost(100);
 4840   //match(RegI);
 4841   format %{ "[sp+$reg]" %}
 4842   interface(MEMORY_INTER) %{
 4843     base(0x1);   // R1_SP
 4844     index(0x0);
 4845     scale(0x0);
 4846     disp($reg);  // Stack Offset
 4847   %}
 4848 %}
 4849 
 4850 operand stackSlotL(sRegL reg) %{
 4851   constraint(ALLOC_IN_RC(stack_slots));
 4852   op_cost(100);
 4853   //match(RegL);
 4854   format %{ "[sp+$reg]" %}
 4855   interface(MEMORY_INTER) %{
 4856     base(0x1);   // R1_SP
 4857     index(0x0);
 4858     scale(0x0);
 4859     disp($reg);  // Stack Offset
 4860   %}
 4861 %}
 4862 
 4863 operand stackSlotP(sRegP reg) %{
 4864   constraint(ALLOC_IN_RC(stack_slots));
 4865   op_cost(100);
 4866   //match(RegP);
 4867   format %{ "[sp+$reg]" %}
 4868   interface(MEMORY_INTER) %{
 4869     base(0x1);   // R1_SP
 4870     index(0x0);
 4871     scale(0x0);
 4872     disp($reg);  // Stack Offset
 4873   %}
 4874 %}
 4875 
 4876 operand stackSlotF(sRegF reg) %{
 4877   constraint(ALLOC_IN_RC(stack_slots));
 4878   op_cost(100);
 4879   //match(RegF);
 4880   format %{ "[sp+$reg]" %}
 4881   interface(MEMORY_INTER) %{
 4882     base(0x1);   // R1_SP
 4883     index(0x0);
 4884     scale(0x0);
 4885     disp($reg);  // Stack Offset
 4886   %}
 4887 %}
 4888 
 4889 operand stackSlotD(sRegD reg) %{
 4890   constraint(ALLOC_IN_RC(stack_slots));
 4891   op_cost(100);
 4892   //match(RegD);
 4893   format %{ "[sp+$reg]" %}
 4894   interface(MEMORY_INTER) %{
 4895     base(0x1);   // R1_SP
 4896     index(0x0);
 4897     scale(0x0);
 4898     disp($reg);  // Stack Offset
 4899   %}
 4900 %}
 4901 
 4902 // Operands for expressing Control Flow
 4903 // NOTE: Label is a predefined operand which should not be redefined in
 4904 //       the AD file. It is generically handled within the ADLC.
 4905 
 4906 //----------Conditional Branch Operands----------------------------------------
 4907 // Comparison Op
 4908 //
 4909 // This is the operation of the comparison, and is limited to the
 4910 // following set of codes: L (<), LE (<=), G (>), GE (>=), E (==), NE
 4911 // (!=).
 4912 //
 4913 // Other attributes of the comparison, such as unsignedness, are specified
 4914 // by the comparison instruction that sets a condition code flags register.
 4915 // That result is represented by a flags operand whose subtype is appropriate
 4916 // to the unsignedness (etc.) of the comparison.
 4917 //
 4918 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4919 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4920 // by matching a specific subtype of Bool operand below.
 4921 
 4922 // When used for floating point comparisons: unordered same as less.
 4923 operand cmpOp() %{
 4924   match(Bool);
 4925   format %{ "" %}
 4926   interface(COND_INTER) %{
 4927                            // BO only encodes bit 4 of bcondCRbiIsX, as bits 1-3 are always '100'.
 4928                            //           BO          &  BI
 4929     equal(0xA);            // 10 10:   bcondCRbiIs1 & Condition::equal
 4930     not_equal(0x2);        // 00 10:   bcondCRbiIs0 & Condition::equal
 4931     less(0x8);             // 10 00:   bcondCRbiIs1 & Condition::less
 4932     greater_equal(0x0);    // 00 00:   bcondCRbiIs0 & Condition::less
 4933     less_equal(0x1);       // 00 01:   bcondCRbiIs0 & Condition::greater
 4934     greater(0x9);          // 10 01:   bcondCRbiIs1 & Condition::greater
 4935     overflow(0xB);         // 10 11:   bcondCRbiIs1 & Condition::summary_overflow
 4936     no_overflow(0x3);      // 00 11:   bcondCRbiIs0 & Condition::summary_overflow
 4937   %}
 4938 %}
 4939 
 4940 //----------OPERAND CLASSES----------------------------------------------------
 4941 // Operand Classes are groups of operands that are used to simplify
 4942 // instruction definitions by not requiring the AD writer to specify
 4943 // separate instructions for every form of operand when the
 4944 // instruction accepts multiple operand types with the same basic
 4945 // encoding and format. The classic case of this is memory operands.
 4946 // Indirect is not included since its use is limited to Compare & Swap.
 4947 
 4948 opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indirectNarrow_klass, indOffset16Narrow, indOffset16Narrow_klass);
 4949 // Memory operand where offsets are 4-aligned. Required for ld, std.
 4950 opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4, indOffset16NarrowAlg4_klass);
 4951 opclass indirectMemory(indirect, indirectNarrow);
 4952 
 4953 // Special opclass for I and ConvL2I.
 4954 opclass iRegIsrc_iRegL2Isrc(iRegIsrc, iRegL2Isrc);
 4955 
 4956 // Operand classes to match encode and decode. iRegN_P2N is only used
 4957 // for storeN. I have never seen an encode node elsewhere.
 4958 opclass iRegN_P2N(iRegNsrc, iRegP2N);
 4959 opclass iRegP_N2P(iRegPsrc, iRegN2P, iRegN2P_klass);
 4960 
 4961 //----------PIPELINE-----------------------------------------------------------
 4962 
 4963 pipeline %{
 4964 
 4965 // See J.M.Tendler et al. "Power4 system microarchitecture", IBM
 4966 // J. Res. & Dev., No. 1, Jan. 2002.
 4967 
 4968 //----------ATTRIBUTES---------------------------------------------------------
 4969 attributes %{
 4970 
 4971   // Power4 instructions are of fixed length.
 4972   fixed_size_instructions;
 4973 
 4974   // TODO: if `bundle' means number of instructions fetched
 4975   // per cycle, this is 8. If `bundle' means Power4 `group', that is
 4976   // max instructions issued per cycle, this is 5.
 4977   max_instructions_per_bundle = 8;
 4978 
 4979   // A Power4 instruction is 4 bytes long.
 4980   instruction_unit_size = 4;
 4981 
 4982   // The Power4 processor fetches 64 bytes...
 4983   instruction_fetch_unit_size = 64;
 4984 
 4985   // ...in one line
 4986   instruction_fetch_units = 1
 4987 
 4988   // Unused, list one so that array generated by adlc is not empty.
 4989   // Aix compiler chokes if _nop_count = 0.
 4990   nops(fxNop);
 4991 %}
 4992 
 4993 //----------RESOURCES----------------------------------------------------------
 4994 // Resources are the functional units available to the machine
 4995 resources(
 4996    PPC_BR,         // branch unit
 4997    PPC_CR,         // condition unit
 4998    PPC_FX1,        // integer arithmetic unit 1
 4999    PPC_FX2,        // integer arithmetic unit 2
 5000    PPC_LDST1,      // load/store unit 1
 5001    PPC_LDST2,      // load/store unit 2
 5002    PPC_FP1,        // float arithmetic unit 1
 5003    PPC_FP2,        // float arithmetic unit 2
 5004    PPC_LDST = PPC_LDST1 | PPC_LDST2,
 5005    PPC_FX = PPC_FX1 | PPC_FX2,
 5006    PPC_FP = PPC_FP1 | PPC_FP2
 5007  );
 5008 
 5009 //----------PIPELINE DESCRIPTION-----------------------------------------------
 5010 // Pipeline Description specifies the stages in the machine's pipeline
 5011 pipe_desc(
 5012    // Power4 longest pipeline path
 5013    PPC_IF,   // instruction fetch
 5014    PPC_IC,
 5015    //PPC_BP, // branch prediction
 5016    PPC_D0,   // decode
 5017    PPC_D1,   // decode
 5018    PPC_D2,   // decode
 5019    PPC_D3,   // decode
 5020    PPC_Xfer1,
 5021    PPC_GD,   // group definition
 5022    PPC_MP,   // map
 5023    PPC_ISS,  // issue
 5024    PPC_RF,   // resource fetch
 5025    PPC_EX1,  // execute (all units)
 5026    PPC_EX2,  // execute (FP, LDST)
 5027    PPC_EX3,  // execute (FP, LDST)
 5028    PPC_EX4,  // execute (FP)
 5029    PPC_EX5,  // execute (FP)
 5030    PPC_EX6,  // execute (FP)
 5031    PPC_WB,   // write back
 5032    PPC_Xfer2,
 5033    PPC_CP
 5034  );
 5035 
 5036 //----------PIPELINE CLASSES---------------------------------------------------
 5037 // Pipeline Classes describe the stages in which input and output are
 5038 // referenced by the hardware pipeline.
 5039 
 5040 // Simple pipeline classes.
 5041 
 5042 // Default pipeline class.
 5043 pipe_class pipe_class_default() %{
 5044   single_instruction;
 5045   fixed_latency(2);
 5046 %}
 5047 
 5048 // Pipeline class for empty instructions.
 5049 pipe_class pipe_class_empty() %{
 5050   single_instruction;
 5051   fixed_latency(0);
 5052 %}
 5053 
 5054 // Pipeline class for compares.
 5055 pipe_class pipe_class_compare() %{
 5056   single_instruction;
 5057   fixed_latency(16);
 5058 %}
 5059 
 5060 // Pipeline class for traps.
 5061 pipe_class pipe_class_trap() %{
 5062   single_instruction;
 5063   fixed_latency(100);
 5064 %}
 5065 
 5066 // Pipeline class for memory operations.
 5067 pipe_class pipe_class_memory() %{
 5068   single_instruction;
 5069   fixed_latency(16);
 5070 %}
 5071 
 5072 // Pipeline class for call.
 5073 pipe_class pipe_class_call() %{
 5074   single_instruction;
 5075   fixed_latency(100);
 5076 %}
 5077 
 5078 // Define the class for the Nop node.
 5079 define %{
 5080    MachNop = pipe_class_default;
 5081 %}
 5082 
 5083 %}
 5084 
 5085 //----------INSTRUCTIONS-------------------------------------------------------
 5086 
 5087 // Naming of instructions:
 5088 //   opA_operB / opA_operB_operC:
 5089 //     Operation 'op' with one or two source operands 'oper'. Result
 5090 //     type is A, source operand types are B and C.
 5091 //     Iff A == B == C, B and C are left out.
 5092 //
 5093 // The instructions are ordered according to the following scheme:
 5094 //  - loads
 5095 //  - load constants
 5096 //  - prefetch
 5097 //  - store
 5098 //  - encode/decode
 5099 //  - membar
 5100 //  - conditional moves
 5101 //  - compare & swap
 5102 //  - arithmetic and logic operations
 5103 //    * int: Add, Sub, Mul, Div, Mod
 5104 //    * int: lShift, arShift, urShift, rot
 5105 //    * float: Add, Sub, Mul, Div
 5106 //    * and, or, xor ...
 5107 //  - register moves: float <-> int, reg <-> stack, repl
 5108 //  - cast (high level type cast, XtoP, castPP, castII, not_null etc.
 5109 //  - conv (low level type cast requiring bit changes (sign extend etc)
 5110 //  - compares, range & zero checks.
 5111 //  - branches
 5112 //  - complex operations, intrinsics, min, max, replicate
 5113 //  - lock
 5114 //  - Calls
 5115 //
 5116 // If there are similar instructions with different types they are sorted:
 5117 // int before float
 5118 // small before big
 5119 // signed before unsigned
 5120 // e.g., loadS before loadUS before loadI before loadF.
 5121 
 5122 
 5123 //----------Load/Store Instructions--------------------------------------------
 5124 
 5125 //----------Load Instructions--------------------------------------------------
 5126 
 5127 // Converts byte to int.
 5128 // As convB2I_reg, but without match rule.  The match rule of convB2I_reg
 5129 // reuses the 'amount' operand, but adlc expects that operand specification
 5130 // and operands in match rule are equivalent.
 5131 instruct convB2I_reg_2(iRegIdst dst, iRegIsrc src) %{
 5132   effect(DEF dst, USE src);
 5133   format %{ "EXTSB   $dst, $src \t// byte->int" %}
 5134   size(4);
 5135   ins_encode %{
 5136     __ extsb($dst$$Register, $src$$Register);
 5137   %}
 5138   ins_pipe(pipe_class_default);
 5139 %}
 5140 
 5141 instruct loadUB_indirect(iRegIdst dst, indirectMemory mem) %{
 5142   // match-rule, false predicate
 5143   match(Set dst (LoadB mem));
 5144   predicate(false);
 5145 
 5146   format %{ "LBZ     $dst, $mem" %}
 5147   size(4);
 5148   ins_encode( enc_lbz(dst, mem) );
 5149   ins_pipe(pipe_class_memory);
 5150 %}
 5151 
 5152 instruct loadUB_indirect_ac(iRegIdst dst, indirectMemory mem) %{
 5153   // match-rule, false predicate
 5154   match(Set dst (LoadB mem));
 5155   predicate(false);
 5156 
 5157   format %{ "LBZ     $dst, $mem\n\t"
 5158             "TWI     $dst\n\t"
 5159             "ISYNC" %}
 5160   size(12);
 5161   ins_encode( enc_lbz_ac(dst, mem) );
 5162   ins_pipe(pipe_class_memory);
 5163 %}
 5164 
 5165 // Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B.
 5166 instruct loadB_indirect_Ex(iRegIdst dst, indirectMemory mem) %{
 5167   match(Set dst (LoadB mem));
 5168   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5169   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
 5170   expand %{
 5171     iRegIdst tmp;
 5172     loadUB_indirect(tmp, mem);
 5173     convB2I_reg_2(dst, tmp);
 5174   %}
 5175 %}
 5176 
 5177 instruct loadB_indirect_ac_Ex(iRegIdst dst, indirectMemory mem) %{
 5178   match(Set dst (LoadB mem));
 5179   ins_cost(3*MEMORY_REF_COST + DEFAULT_COST);
 5180   expand %{
 5181     iRegIdst tmp;
 5182     loadUB_indirect_ac(tmp, mem);
 5183     convB2I_reg_2(dst, tmp);
 5184   %}
 5185 %}
 5186 
 5187 instruct loadUB_indOffset16(iRegIdst dst, indOffset16 mem) %{
 5188   // match-rule, false predicate
 5189   match(Set dst (LoadB mem));
 5190   predicate(false);
 5191 
 5192   format %{ "LBZ     $dst, $mem" %}
 5193   size(4);
 5194   ins_encode( enc_lbz(dst, mem) );
 5195   ins_pipe(pipe_class_memory);
 5196 %}
 5197 
 5198 instruct loadUB_indOffset16_ac(iRegIdst dst, indOffset16 mem) %{
 5199   // match-rule, false predicate
 5200   match(Set dst (LoadB mem));
 5201   predicate(false);
 5202 
 5203   format %{ "LBZ     $dst, $mem\n\t"
 5204             "TWI     $dst\n\t"
 5205             "ISYNC" %}
 5206   size(12);
 5207   ins_encode( enc_lbz_ac(dst, mem) );
 5208   ins_pipe(pipe_class_memory);
 5209 %}
 5210 
 5211 // Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B.
 5212 instruct loadB_indOffset16_Ex(iRegIdst dst, indOffset16 mem) %{
 5213   match(Set dst (LoadB mem));
 5214   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5215   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
 5216 
 5217   expand %{
 5218     iRegIdst tmp;
 5219     loadUB_indOffset16(tmp, mem);
 5220     convB2I_reg_2(dst, tmp);
 5221   %}
 5222 %}
 5223 
 5224 instruct loadB_indOffset16_ac_Ex(iRegIdst dst, indOffset16 mem) %{
 5225   match(Set dst (LoadB mem));
 5226   ins_cost(3*MEMORY_REF_COST + DEFAULT_COST);
 5227 
 5228   expand %{
 5229     iRegIdst tmp;
 5230     loadUB_indOffset16_ac(tmp, mem);
 5231     convB2I_reg_2(dst, tmp);
 5232   %}
 5233 %}
 5234 
 5235 // Load Unsigned Byte (8bit UNsigned) into an int reg.
 5236 instruct loadUB(iRegIdst dst, memory mem) %{
 5237   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5238   match(Set dst (LoadUB mem));
 5239   ins_cost(MEMORY_REF_COST);
 5240 
 5241   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to int" %}
 5242   size(4);
 5243   ins_encode( enc_lbz(dst, mem) );
 5244   ins_pipe(pipe_class_memory);
 5245 %}
 5246 
 5247 // Load  Unsigned Byte (8bit UNsigned) acquire.
 5248 instruct loadUB_ac(iRegIdst dst, memory mem) %{
 5249   match(Set dst (LoadUB mem));
 5250   ins_cost(3*MEMORY_REF_COST);
 5251 
 5252   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to int, acquire\n\t"
 5253             "TWI     $dst\n\t"
 5254             "ISYNC" %}
 5255   size(12);
 5256   ins_encode( enc_lbz_ac(dst, mem) );
 5257   ins_pipe(pipe_class_memory);
 5258 %}
 5259 
 5260 // Load Unsigned Byte (8bit UNsigned) into a Long Register.
 5261 instruct loadUB2L(iRegLdst dst, memory mem) %{
 5262   match(Set dst (ConvI2L (LoadUB mem)));
 5263   predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf));
 5264   ins_cost(MEMORY_REF_COST);
 5265 
 5266   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to long" %}
 5267   size(4);
 5268   ins_encode( enc_lbz(dst, mem) );
 5269   ins_pipe(pipe_class_memory);
 5270 %}
 5271 
 5272 instruct loadUB2L_ac(iRegLdst dst, memory mem) %{
 5273   match(Set dst (ConvI2L (LoadUB mem)));
 5274   ins_cost(3*MEMORY_REF_COST);
 5275 
 5276   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to long, acquire\n\t"
 5277             "TWI     $dst\n\t"
 5278             "ISYNC" %}
 5279   size(12);
 5280   ins_encode( enc_lbz_ac(dst, mem) );
 5281   ins_pipe(pipe_class_memory);
 5282 %}
 5283 
 5284 // Load Short (16bit signed)
 5285 instruct loadS(iRegIdst dst, memory mem) %{
 5286   match(Set dst (LoadS mem));
 5287   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5288   ins_cost(MEMORY_REF_COST);
 5289 
 5290   format %{ "LHA     $dst, $mem" %}
 5291   size(4);
 5292   ins_encode %{
 5293     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5294     __ lha($dst$$Register, Idisp, $mem$$base$$Register);
 5295   %}
 5296   ins_pipe(pipe_class_memory);
 5297 %}
 5298 
 5299 // Load Short (16bit signed) acquire.
 5300 instruct loadS_ac(iRegIdst dst, memory mem) %{
 5301   match(Set dst (LoadS mem));
 5302   ins_cost(3*MEMORY_REF_COST);
 5303 
 5304   format %{ "LHA     $dst, $mem\t acquire\n\t"
 5305             "TWI     $dst\n\t"
 5306             "ISYNC" %}
 5307   size(12);
 5308   ins_encode %{
 5309     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5310     __ lha($dst$$Register, Idisp, $mem$$base$$Register);
 5311     __ twi_0($dst$$Register);
 5312     __ isync();
 5313   %}
 5314   ins_pipe(pipe_class_memory);
 5315 %}
 5316 
 5317 // Load Char (16bit unsigned)
 5318 instruct loadUS(iRegIdst dst, memory mem) %{
 5319   match(Set dst (LoadUS mem));
 5320   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5321   ins_cost(MEMORY_REF_COST);
 5322 
 5323   format %{ "LHZ     $dst, $mem" %}
 5324   size(4);
 5325   ins_encode( enc_lhz(dst, mem) );
 5326   ins_pipe(pipe_class_memory);
 5327 %}
 5328 
 5329 // Load Char (16bit unsigned) acquire.
 5330 instruct loadUS_ac(iRegIdst dst, memory mem) %{
 5331   match(Set dst (LoadUS mem));
 5332   ins_cost(3*MEMORY_REF_COST);
 5333 
 5334   format %{ "LHZ     $dst, $mem \t// acquire\n\t"
 5335             "TWI     $dst\n\t"
 5336             "ISYNC" %}
 5337   size(12);
 5338   ins_encode( enc_lhz_ac(dst, mem) );
 5339   ins_pipe(pipe_class_memory);
 5340 %}
 5341 
 5342 // Load Unsigned Short/Char (16bit UNsigned) into a Long Register.
 5343 instruct loadUS2L(iRegLdst dst, memory mem) %{
 5344   match(Set dst (ConvI2L (LoadUS mem)));
 5345   predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf));
 5346   ins_cost(MEMORY_REF_COST);
 5347 
 5348   format %{ "LHZ     $dst, $mem \t// short, zero-extend to long" %}
 5349   size(4);
 5350   ins_encode( enc_lhz(dst, mem) );
 5351   ins_pipe(pipe_class_memory);
 5352 %}
 5353 
 5354 // Load Unsigned Short/Char (16bit UNsigned) into a Long Register acquire.
 5355 instruct loadUS2L_ac(iRegLdst dst, memory mem) %{
 5356   match(Set dst (ConvI2L (LoadUS mem)));
 5357   ins_cost(3*MEMORY_REF_COST);
 5358 
 5359   format %{ "LHZ     $dst, $mem \t// short, zero-extend to long, acquire\n\t"
 5360             "TWI     $dst\n\t"
 5361             "ISYNC" %}
 5362   size(12);
 5363   ins_encode( enc_lhz_ac(dst, mem) );
 5364   ins_pipe(pipe_class_memory);
 5365 %}
 5366 
 5367 // Load Integer.
 5368 instruct loadI(iRegIdst dst, memory mem) %{
 5369   match(Set dst (LoadI mem));
 5370   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5371   ins_cost(MEMORY_REF_COST);
 5372 
 5373   format %{ "LWZ     $dst, $mem" %}
 5374   size(4);
 5375   ins_encode( enc_lwz(dst, mem) );
 5376   ins_pipe(pipe_class_memory);
 5377 %}
 5378 
 5379 // Load Integer acquire.
 5380 instruct loadI_ac(iRegIdst dst, memory mem) %{
 5381   match(Set dst (LoadI mem));
 5382   ins_cost(3*MEMORY_REF_COST);
 5383 
 5384   format %{ "LWZ     $dst, $mem \t// load acquire\n\t"
 5385             "TWI     $dst\n\t"
 5386             "ISYNC" %}
 5387   size(12);
 5388   ins_encode( enc_lwz_ac(dst, mem) );
 5389   ins_pipe(pipe_class_memory);
 5390 %}
 5391 
 5392 // Match loading integer and casting it to unsigned int in
 5393 // long register.
 5394 // LoadI + ConvI2L + AndL 0xffffffff.
 5395 instruct loadUI2L(iRegLdst dst, memory mem, immL_32bits mask) %{
 5396   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5397   predicate(_kids[0]->_kids[0]->_leaf->as_Load()->is_unordered());
 5398   ins_cost(MEMORY_REF_COST);
 5399 
 5400   format %{ "LWZ     $dst, $mem \t// zero-extend to long" %}
 5401   size(4);
 5402   ins_encode( enc_lwz(dst, mem) );
 5403   ins_pipe(pipe_class_memory);
 5404 %}
 5405 
 5406 // Match loading integer and casting it to long.
 5407 instruct loadI2L(iRegLdst dst, memoryAlg4 mem) %{
 5408   match(Set dst (ConvI2L (LoadI mem)));
 5409   predicate(_kids[0]->_leaf->as_Load()->is_unordered());
 5410   ins_cost(MEMORY_REF_COST);
 5411 
 5412   format %{ "LWA     $dst, $mem \t// loadI2L" %}
 5413   size(4);
 5414   ins_encode %{
 5415     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5416     __ lwa($dst$$Register, Idisp, $mem$$base$$Register);
 5417   %}
 5418   ins_pipe(pipe_class_memory);
 5419 %}
 5420 
 5421 // Match loading integer and casting it to long - acquire.
 5422 instruct loadI2L_ac(iRegLdst dst, memoryAlg4 mem) %{
 5423   match(Set dst (ConvI2L (LoadI mem)));
 5424   ins_cost(3*MEMORY_REF_COST);
 5425 
 5426   format %{ "LWA     $dst, $mem \t// loadI2L acquire"
 5427             "TWI     $dst\n\t"
 5428             "ISYNC" %}
 5429   size(12);
 5430   ins_encode %{
 5431     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5432     __ lwa($dst$$Register, Idisp, $mem$$base$$Register);
 5433     __ twi_0($dst$$Register);
 5434     __ isync();
 5435   %}
 5436   ins_pipe(pipe_class_memory);
 5437 %}
 5438 
 5439 // Load Long - aligned
 5440 instruct loadL(iRegLdst dst, memoryAlg4 mem) %{
 5441   match(Set dst (LoadL mem));
 5442   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5443   ins_cost(MEMORY_REF_COST);
 5444 
 5445   format %{ "LD      $dst, $mem \t// long" %}
 5446   size(4);
 5447   ins_encode( enc_ld(dst, mem) );
 5448   ins_pipe(pipe_class_memory);
 5449 %}
 5450 
 5451 // Load Long - aligned acquire.
 5452 instruct loadL_ac(iRegLdst dst, memoryAlg4 mem) %{
 5453   match(Set dst (LoadL mem));
 5454   ins_cost(3*MEMORY_REF_COST);
 5455 
 5456   format %{ "LD      $dst, $mem \t// long acquire\n\t"
 5457             "TWI     $dst\n\t"
 5458             "ISYNC" %}
 5459   size(12);
 5460   ins_encode( enc_ld_ac(dst, mem) );
 5461   ins_pipe(pipe_class_memory);
 5462 %}
 5463 
 5464 // Load Long - UNaligned
 5465 instruct loadL_unaligned(iRegLdst dst, memoryAlg4 mem) %{
 5466   match(Set dst (LoadL_unaligned mem));
 5467   // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense).
 5468   ins_cost(MEMORY_REF_COST);
 5469 
 5470   format %{ "LD      $dst, $mem \t// unaligned long" %}
 5471   size(4);
 5472   ins_encode( enc_ld(dst, mem) );
 5473   ins_pipe(pipe_class_memory);
 5474 %}
 5475 
 5476 // Load nodes for superwords
 5477 
 5478 // Load Aligned Packed Byte
 5479 instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{
 5480   predicate(n->as_LoadVector()->memory_size() == 8);
 5481   match(Set dst (LoadVector mem));
 5482   ins_cost(MEMORY_REF_COST);
 5483 
 5484   format %{ "LD      $dst, $mem \t// load 8-byte Vector" %}
 5485   size(4);
 5486   ins_encode( enc_ld(dst, mem) );
 5487   ins_pipe(pipe_class_memory);
 5488 %}
 5489 
 5490 // Load Aligned Packed Byte
 5491 instruct loadV16(vecX dst, indirect mem) %{
 5492   predicate(n->as_LoadVector()->memory_size() == 16);
 5493   match(Set dst (LoadVector mem));
 5494   ins_cost(MEMORY_REF_COST);
 5495 
 5496   format %{ "LXVD2X      $dst, $mem \t// load 16-byte Vector" %}
 5497   size(4);
 5498   ins_encode %{
 5499     __ lxvd2x($dst$$VectorSRegister, $mem$$Register);
 5500   %}
 5501   ins_pipe(pipe_class_default);
 5502 %}
 5503 
 5504 // Load Range, range = array length (=jint)
 5505 instruct loadRange(iRegIdst dst, memory mem) %{
 5506   match(Set dst (LoadRange mem));
 5507   ins_cost(MEMORY_REF_COST);
 5508 
 5509   format %{ "LWZ     $dst, $mem \t// range" %}
 5510   size(4);
 5511   ins_encode( enc_lwz(dst, mem) );
 5512   ins_pipe(pipe_class_memory);
 5513 %}
 5514 
 5515 // Load Compressed Pointer
 5516 instruct loadN(iRegNdst dst, memory mem) %{
 5517   match(Set dst (LoadN mem));
 5518   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5519   ins_cost(MEMORY_REF_COST);
 5520 
 5521   format %{ "LWZ     $dst, $mem \t// load compressed ptr" %}
 5522   size(4);
 5523   ins_encode( enc_lwz(dst, mem) );
 5524   ins_pipe(pipe_class_memory);
 5525 %}
 5526 
 5527 // Load Compressed Pointer acquire.
 5528 instruct loadN_ac(iRegNdst dst, memory mem) %{
 5529   match(Set dst (LoadN mem));
 5530   ins_cost(3*MEMORY_REF_COST);
 5531 
 5532   format %{ "LWZ     $dst, $mem \t// load acquire compressed ptr\n\t"
 5533             "TWI     $dst\n\t"
 5534             "ISYNC" %}
 5535   size(12);
 5536   ins_encode( enc_lwz_ac(dst, mem) );
 5537   ins_pipe(pipe_class_memory);
 5538 %}
 5539 
 5540 // Load Compressed Pointer and decode it if narrow_oop_shift == 0.
 5541 instruct loadN2P_unscaled(iRegPdst dst, memory mem) %{
 5542   match(Set dst (DecodeN (LoadN mem)));
 5543   predicate(_kids[0]->_leaf->as_Load()->is_unordered() && CompressedOops::shift() == 0);
 5544   ins_cost(MEMORY_REF_COST);
 5545 
 5546   format %{ "LWZ     $dst, $mem \t// DecodeN (unscaled)" %}
 5547   size(4);
 5548   ins_encode( enc_lwz(dst, mem) );
 5549   ins_pipe(pipe_class_memory);
 5550 %}
 5551 
 5552 instruct loadN2P_klass_unscaled(iRegPdst dst, memory mem) %{
 5553   match(Set dst (DecodeNKlass (LoadNKlass mem)));
 5554   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0 &&
 5555             _kids[0]->_leaf->as_Load()->is_unordered());
 5556   ins_cost(MEMORY_REF_COST);
 5557 
 5558   format %{ "LWZ     $dst, $mem \t// DecodeN (unscaled)" %}
 5559   size(4);
 5560   ins_encode( enc_lwz(dst, mem) );
 5561   ins_pipe(pipe_class_memory);
 5562 %}
 5563 
 5564 // Load Pointer
 5565 instruct loadP(iRegPdst dst, memoryAlg4 mem) %{
 5566   match(Set dst (LoadP mem));
 5567   predicate((n->as_Load()->is_unordered() || followed_by_acquire(n)) && n->as_Load()->barrier_data() == 0);
 5568   ins_cost(MEMORY_REF_COST);
 5569 
 5570   format %{ "LD      $dst, $mem \t// ptr" %}
 5571   size(4);
 5572   ins_encode( enc_ld(dst, mem) );
 5573   ins_pipe(pipe_class_memory);
 5574 %}
 5575 
 5576 // Load Pointer acquire.
 5577 instruct loadP_ac(iRegPdst dst, memoryAlg4 mem) %{
 5578   match(Set dst (LoadP mem));
 5579   ins_cost(3*MEMORY_REF_COST);
 5580 
 5581   predicate(n->as_Load()->barrier_data() == 0);
 5582 
 5583   format %{ "LD      $dst, $mem \t// ptr acquire\n\t"
 5584             "TWI     $dst\n\t"
 5585             "ISYNC" %}
 5586   size(12);
 5587   ins_encode( enc_ld_ac(dst, mem) );
 5588   ins_pipe(pipe_class_memory);
 5589 %}
 5590 
 5591 // LoadP + CastP2L
 5592 instruct loadP2X(iRegLdst dst, memoryAlg4 mem) %{
 5593   match(Set dst (CastP2X (LoadP mem)));
 5594   predicate(_kids[0]->_leaf->as_Load()->is_unordered() && _kids[0]->_leaf->as_Load()->barrier_data() == 0);
 5595   ins_cost(MEMORY_REF_COST);
 5596 
 5597   format %{ "LD      $dst, $mem \t// ptr + p2x" %}
 5598   size(4);
 5599   ins_encode( enc_ld(dst, mem) );
 5600   ins_pipe(pipe_class_memory);
 5601 %}
 5602 
 5603 // Load compressed klass pointer.
 5604 instruct loadNKlass(iRegNdst dst, memory mem) %{
 5605   match(Set dst (LoadNKlass mem));
 5606   ins_cost(MEMORY_REF_COST);
 5607 
 5608   format %{ "LWZ     $dst, $mem \t// compressed klass ptr" %}
 5609   size(4);
 5610   ins_encode( enc_lwz(dst, mem) );
 5611   ins_pipe(pipe_class_memory);
 5612 %}
 5613 
 5614 // Load Klass Pointer
 5615 instruct loadKlass(iRegPdst dst, memoryAlg4 mem) %{
 5616   match(Set dst (LoadKlass mem));
 5617   ins_cost(MEMORY_REF_COST);
 5618 
 5619   format %{ "LD      $dst, $mem \t// klass ptr" %}
 5620   size(4);
 5621   ins_encode( enc_ld(dst, mem) );
 5622   ins_pipe(pipe_class_memory);
 5623 %}
 5624 
 5625 // Load Float
 5626 instruct loadF(regF dst, memory mem) %{
 5627   match(Set dst (LoadF mem));
 5628   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5629   ins_cost(MEMORY_REF_COST);
 5630 
 5631   format %{ "LFS     $dst, $mem" %}
 5632   size(4);
 5633   ins_encode %{
 5634     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5635     __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 5636   %}
 5637   ins_pipe(pipe_class_memory);
 5638 %}
 5639 
 5640 // Load Float acquire.
 5641 instruct loadF_ac(regF dst, memory mem, flagsRegCR0 cr0) %{
 5642   match(Set dst (LoadF mem));
 5643   effect(TEMP cr0);
 5644   ins_cost(3*MEMORY_REF_COST);
 5645 
 5646   format %{ "LFS     $dst, $mem \t// acquire\n\t"
 5647             "FCMPU   cr0, $dst, $dst\n\t"
 5648             "BNE     cr0, next\n"
 5649             "next:\n\t"
 5650             "ISYNC" %}
 5651   size(16);
 5652   ins_encode %{
 5653     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5654     Label next;
 5655     __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 5656     __ fcmpu(CCR0, $dst$$FloatRegister, $dst$$FloatRegister);
 5657     __ bne(CCR0, next);
 5658     __ bind(next);
 5659     __ isync();
 5660   %}
 5661   ins_pipe(pipe_class_memory);
 5662 %}
 5663 
 5664 // Load Double - aligned
 5665 instruct loadD(regD dst, memory mem) %{
 5666   match(Set dst (LoadD mem));
 5667   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5668   ins_cost(MEMORY_REF_COST);
 5669 
 5670   format %{ "LFD     $dst, $mem" %}
 5671   size(4);
 5672   ins_encode( enc_lfd(dst, mem) );
 5673   ins_pipe(pipe_class_memory);
 5674 %}
 5675 
 5676 // Load Double - aligned acquire.
 5677 instruct loadD_ac(regD dst, memory mem, flagsRegCR0 cr0) %{
 5678   match(Set dst (LoadD mem));
 5679   effect(TEMP cr0);
 5680   ins_cost(3*MEMORY_REF_COST);
 5681 
 5682   format %{ "LFD     $dst, $mem \t// acquire\n\t"
 5683             "FCMPU   cr0, $dst, $dst\n\t"
 5684             "BNE     cr0, next\n"
 5685             "next:\n\t"
 5686             "ISYNC" %}
 5687   size(16);
 5688   ins_encode %{
 5689     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5690     Label next;
 5691     __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 5692     __ fcmpu(CCR0, $dst$$FloatRegister, $dst$$FloatRegister);
 5693     __ bne(CCR0, next);
 5694     __ bind(next);
 5695     __ isync();
 5696   %}
 5697   ins_pipe(pipe_class_memory);
 5698 %}
 5699 
 5700 // Load Double - UNaligned
 5701 instruct loadD_unaligned(regD dst, memory mem) %{
 5702   match(Set dst (LoadD_unaligned mem));
 5703   // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense).
 5704   ins_cost(MEMORY_REF_COST);
 5705 
 5706   format %{ "LFD     $dst, $mem" %}
 5707   size(4);
 5708   ins_encode( enc_lfd(dst, mem) );
 5709   ins_pipe(pipe_class_memory);
 5710 %}
 5711 
 5712 //----------Constants--------------------------------------------------------
 5713 
 5714 // Load MachConstantTableBase: add hi offset to global toc.
 5715 // TODO: Handle hidden register r29 in bundler!
 5716 instruct loadToc_hi(iRegLdst dst) %{
 5717   effect(DEF dst);
 5718   ins_cost(DEFAULT_COST);
 5719 
 5720   format %{ "ADDIS   $dst, R29, DISP.hi \t// load TOC hi" %}
 5721   size(4);
 5722   ins_encode %{
 5723     __ calculate_address_from_global_toc_hi16only($dst$$Register, __ method_toc());
 5724   %}
 5725   ins_pipe(pipe_class_default);
 5726 %}
 5727 
 5728 // Load MachConstantTableBase: add lo offset to global toc.
 5729 instruct loadToc_lo(iRegLdst dst, iRegLdst src) %{
 5730   effect(DEF dst, USE src);
 5731   ins_cost(DEFAULT_COST);
 5732 
 5733   format %{ "ADDI    $dst, $src, DISP.lo \t// load TOC lo" %}
 5734   size(4);
 5735   ins_encode %{
 5736     __ calculate_address_from_global_toc_lo16only($dst$$Register, __ method_toc());
 5737   %}
 5738   ins_pipe(pipe_class_default);
 5739 %}
 5740 
 5741 // Load 16-bit integer constant 0xssss????
 5742 instruct loadConI16(iRegIdst dst, immI16 src) %{
 5743   match(Set dst src);
 5744 
 5745   format %{ "LI      $dst, $src" %}
 5746   size(4);
 5747   ins_encode %{
 5748     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
 5749   %}
 5750   ins_pipe(pipe_class_default);
 5751 %}
 5752 
 5753 // Load integer constant 0x????0000
 5754 instruct loadConIhi16(iRegIdst dst, immIhi16 src) %{
 5755   match(Set dst src);
 5756   ins_cost(DEFAULT_COST);
 5757 
 5758   format %{ "LIS     $dst, $src.hi" %}
 5759   size(4);
 5760   ins_encode %{
 5761     // Lis sign extends 16-bit src then shifts it 16 bit to the left.
 5762     __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16)));
 5763   %}
 5764   ins_pipe(pipe_class_default);
 5765 %}
 5766 
 5767 // Part 2 of loading 32 bit constant: hi16 is is src1 (properly shifted
 5768 // and sign extended), this adds the low 16 bits.
 5769 instruct loadConI32_lo16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
 5770   // no match-rule, false predicate
 5771   effect(DEF dst, USE src1, USE src2);
 5772   predicate(false);
 5773 
 5774   format %{ "ORI     $dst, $src1.hi, $src2.lo" %}
 5775   size(4);
 5776   ins_encode %{
 5777     __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
 5778   %}
 5779   ins_pipe(pipe_class_default);
 5780 %}
 5781 
 5782 instruct loadConI32(iRegIdst dst, immI32 src) %{
 5783   match(Set dst src);
 5784   // This macro is valid only in Power 10 and up, but adding the following predicate here
 5785   // caused a build error, so we comment it out for now.
 5786   // predicate(PowerArchitecturePPC64 >= 10);
 5787   ins_cost(DEFAULT_COST+1);
 5788 
 5789   format %{ "PLI     $dst, $src" %}
 5790   size(8);
 5791   ins_encode %{
 5792     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 5793     __ pli($dst$$Register, $src$$constant);
 5794   %}
 5795   ins_pipe(pipe_class_default);
 5796   ins_alignment(2);
 5797 %}
 5798 
 5799 instruct loadConI_Ex(iRegIdst dst, immI src) %{
 5800   match(Set dst src);
 5801   ins_cost(DEFAULT_COST*2);
 5802 
 5803   expand %{
 5804     // Would like to use $src$$constant.
 5805     immI16 srcLo %{ _opnds[1]->constant() %}
 5806     // srcHi can be 0000 if srcLo sign-extends to a negative number.
 5807     immIhi16 srcHi %{ _opnds[1]->constant() %}
 5808     iRegIdst tmpI;
 5809     loadConIhi16(tmpI, srcHi);
 5810     loadConI32_lo16(dst, tmpI, srcLo);
 5811   %}
 5812 %}
 5813 
 5814 // No constant pool entries required.
 5815 instruct loadConL16(iRegLdst dst, immL16 src) %{
 5816   match(Set dst src);
 5817 
 5818   format %{ "LI      $dst, $src \t// long" %}
 5819   size(4);
 5820   ins_encode %{
 5821     __ li($dst$$Register, (int)((short) ($src$$constant & 0xFFFF)));
 5822   %}
 5823   ins_pipe(pipe_class_default);
 5824 %}
 5825 
 5826 // Load long constant 0xssssssss????0000
 5827 instruct loadConL32hi16(iRegLdst dst, immL32hi16 src) %{
 5828   match(Set dst src);
 5829   ins_cost(DEFAULT_COST);
 5830 
 5831   format %{ "LIS     $dst, $src.hi \t// long" %}
 5832   size(4);
 5833   ins_encode %{
 5834     __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16)));
 5835   %}
 5836   ins_pipe(pipe_class_default);
 5837 %}
 5838 
 5839 // To load a 32 bit constant: merge lower 16 bits into already loaded
 5840 // high 16 bits.
 5841 instruct loadConL32_lo16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
 5842   // no match-rule, false predicate
 5843   effect(DEF dst, USE src1, USE src2);
 5844   predicate(false);
 5845 
 5846   format %{ "ORI     $dst, $src1, $src2.lo" %}
 5847   size(4);
 5848   ins_encode %{
 5849     __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
 5850   %}
 5851   ins_pipe(pipe_class_default);
 5852 %}
 5853 
 5854 // Load 32-bit long constant
 5855 instruct loadConL32_Ex(iRegLdst dst, immL32 src) %{
 5856   match(Set dst src);
 5857   ins_cost(DEFAULT_COST*2);
 5858 
 5859   expand %{
 5860     // Would like to use $src$$constant.
 5861     immL16     srcLo %{ _opnds[1]->constant() /*& 0x0000FFFFL */%}
 5862     // srcHi can be 0000 if srcLo sign-extends to a negative number.
 5863     immL32hi16 srcHi %{ _opnds[1]->constant() /*& 0xFFFF0000L */%}
 5864     iRegLdst tmpL;
 5865     loadConL32hi16(tmpL, srcHi);
 5866     loadConL32_lo16(dst, tmpL, srcLo);
 5867   %}
 5868 %}
 5869 
 5870 // Load 34-bit long constant using prefixed addi. No constant pool entries required.
 5871 instruct loadConL34(iRegLdst dst, immL34 src) %{
 5872   match(Set dst src);
 5873   // This macro is valid only in Power 10 and up, but adding the following predicate here
 5874   // caused a build error, so we comment it out for now.
 5875   // predicate(PowerArchitecturePPC64 >= 10);
 5876   ins_cost(DEFAULT_COST+1);
 5877 
 5878   format %{ "PLI     $dst, $src \t// long" %}
 5879   size(8);
 5880   ins_encode %{
 5881     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 5882     __ pli($dst$$Register, $src$$constant);
 5883   %}
 5884   ins_pipe(pipe_class_default);
 5885   ins_alignment(2);
 5886 %}
 5887 
 5888 // Load long constant 0x????000000000000.
 5889 instruct loadConLhighest16_Ex(iRegLdst dst, immLhighest16 src) %{
 5890   match(Set dst src);
 5891   ins_cost(DEFAULT_COST);
 5892 
 5893   expand %{
 5894     immL32hi16 srcHi %{ _opnds[1]->constant() >> 32 /*& 0xFFFF0000L */%}
 5895     immI shift32 %{ 32 %}
 5896     iRegLdst tmpL;
 5897     loadConL32hi16(tmpL, srcHi);
 5898     lshiftL_regL_immI(dst, tmpL, shift32);
 5899   %}
 5900 %}
 5901 
 5902 // Expand node for constant pool load: small offset.
 5903 instruct loadConL(iRegLdst dst, immL src, iRegLdst toc) %{
 5904   effect(DEF dst, USE src, USE toc);
 5905   ins_cost(MEMORY_REF_COST);
 5906 
 5907   ins_num_consts(1);
 5908   // Needed so that CallDynamicJavaDirect can compute the address of this
 5909   // instruction for relocation.
 5910   ins_field_cbuf_insts_offset(int);
 5911 
 5912   format %{ "LD      $dst, offset, $toc \t// load long $src from TOC" %}
 5913   size(4);
 5914   ins_encode( enc_load_long_constL(dst, src, toc) );
 5915   ins_pipe(pipe_class_memory);
 5916 %}
 5917 
 5918 // Expand node for constant pool load: large offset.
 5919 instruct loadConL_hi(iRegLdst dst, immL src, iRegLdst toc) %{
 5920   effect(DEF dst, USE src, USE toc);
 5921   predicate(false);
 5922 
 5923   ins_num_consts(1);
 5924   ins_field_const_toc_offset(int);
 5925   // Needed so that CallDynamicJavaDirect can compute the address of this
 5926   // instruction for relocation.
 5927   ins_field_cbuf_insts_offset(int);
 5928 
 5929   format %{ "ADDIS   $dst, $toc, offset \t// load long $src from TOC (hi)" %}
 5930   size(4);
 5931   ins_encode( enc_load_long_constL_hi(dst, toc, src) );
 5932   ins_pipe(pipe_class_default);
 5933 %}
 5934 
 5935 // Expand node for constant pool load: large offset.
 5936 // No constant pool entries required.
 5937 instruct loadConL_lo(iRegLdst dst, immL src, iRegLdst base) %{
 5938   effect(DEF dst, USE src, USE base);
 5939   predicate(false);
 5940 
 5941   ins_field_const_toc_offset_hi_node(loadConL_hiNode*);
 5942 
 5943   format %{ "LD      $dst, offset, $base \t// load long $src from TOC (lo)" %}
 5944   size(4);
 5945   ins_encode %{
 5946     int offset = ra_->C->output()->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset;
 5947     __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register);
 5948   %}
 5949   ins_pipe(pipe_class_memory);
 5950 %}
 5951 
 5952 // Load long constant from constant table. Expand in case of
 5953 // offset > 16 bit is needed.
 5954 // Adlc adds toc node MachConstantTableBase.
 5955 instruct loadConL_Ex(iRegLdst dst, immL src) %{
 5956   match(Set dst src);
 5957   ins_cost(MEMORY_REF_COST);
 5958 
 5959   format %{ "LD      $dst, offset, $constanttablebase\t// load long $src from table, postalloc expanded" %}
 5960   // We can not inline the enc_class for the expand as that does not support constanttablebase.
 5961   postalloc_expand( postalloc_expand_load_long_constant(dst, src, constanttablebase) );
 5962 %}
 5963 
 5964 // Load NULL as compressed oop.
 5965 instruct loadConN0(iRegNdst dst, immN_0 src) %{
 5966   match(Set dst src);
 5967   ins_cost(DEFAULT_COST);
 5968 
 5969   format %{ "LI      $dst, $src \t// compressed ptr" %}
 5970   size(4);
 5971   ins_encode %{
 5972     __ li($dst$$Register, 0);
 5973   %}
 5974   ins_pipe(pipe_class_default);
 5975 %}
 5976 
 5977 // Load hi part of compressed oop constant.
 5978 instruct loadConN_hi(iRegNdst dst, immN src) %{
 5979   effect(DEF dst, USE src);
 5980   ins_cost(DEFAULT_COST);
 5981 
 5982   format %{ "LIS     $dst, $src \t// narrow oop hi" %}
 5983   size(4);
 5984   ins_encode %{
 5985     __ lis($dst$$Register, 0); // Will get patched.
 5986   %}
 5987   ins_pipe(pipe_class_default);
 5988 %}
 5989 
 5990 // Add lo part of compressed oop constant to already loaded hi part.
 5991 instruct loadConN_lo(iRegNdst dst, iRegNsrc src1, immN src2) %{
 5992   effect(DEF dst, USE src1, USE src2);
 5993   ins_cost(DEFAULT_COST);
 5994 
 5995   format %{ "ORI     $dst, $src1, $src2 \t// narrow oop lo" %}
 5996   size(4);
 5997   ins_encode %{
 5998     AddressLiteral addrlit = __ constant_oop_address((jobject)$src2$$constant);
 5999     __ relocate(addrlit.rspec(), /*compressed format*/ 1);
 6000     __ ori($dst$$Register, $src1$$Register, 0); // Will get patched.
 6001   %}
 6002   ins_pipe(pipe_class_default);
 6003 %}
 6004 
 6005 instruct rldicl(iRegLdst dst, iRegLsrc src, immI16 shift, immI16 mask_begin) %{
 6006   effect(DEF dst, USE src, USE shift, USE mask_begin);
 6007 
 6008   size(4);
 6009   ins_encode %{
 6010     __ rldicl($dst$$Register, $src$$Register, $shift$$constant, $mask_begin$$constant);
 6011   %}
 6012   ins_pipe(pipe_class_default);
 6013 %}
 6014 
 6015 // Needed to postalloc expand loadConN: ConN is loaded as ConI
 6016 // leaving the upper 32 bits with sign-extension bits.
 6017 // This clears these bits: dst = src & 0xFFFFFFFF.
 6018 // TODO: Eventually call this maskN_regN_FFFFFFFF.
 6019 instruct clearMs32b(iRegNdst dst, iRegNsrc src) %{
 6020   effect(DEF dst, USE src);
 6021   predicate(false);
 6022 
 6023   format %{ "MASK    $dst, $src, 0xFFFFFFFF" %} // mask
 6024   size(4);
 6025   ins_encode %{
 6026     __ clrldi($dst$$Register, $src$$Register, 0x20);
 6027   %}
 6028   ins_pipe(pipe_class_default);
 6029 %}
 6030 
 6031 // Optimize DecodeN for disjoint base.
 6032 // Load base of compressed oops into a register
 6033 instruct loadBase(iRegLdst dst) %{
 6034   effect(DEF dst);
 6035 
 6036   format %{ "LoadConst $dst, heapbase" %}
 6037   ins_encode %{
 6038     __ load_const_optimized($dst$$Register, CompressedOops::base(), R0);
 6039   %}
 6040   ins_pipe(pipe_class_default);
 6041 %}
 6042 
 6043 // Loading ConN must be postalloc expanded so that edges between
 6044 // the nodes are safe. They may not interfere with a safepoint.
 6045 // GL TODO: This needs three instructions: better put this into the constant pool.
 6046 instruct loadConN_Ex(iRegNdst dst, immN src) %{
 6047   match(Set dst src);
 6048   ins_cost(DEFAULT_COST*2);
 6049 
 6050   format %{ "LoadN   $dst, $src \t// postalloc expanded" %} // mask
 6051   postalloc_expand %{
 6052     MachNode *m1 = new loadConN_hiNode();
 6053     MachNode *m2 = new loadConN_loNode();
 6054     MachNode *m3 = new clearMs32bNode();
 6055     m1->add_req(NULL);
 6056     m2->add_req(NULL, m1);
 6057     m3->add_req(NULL, m2);
 6058     m1->_opnds[0] = op_dst;
 6059     m1->_opnds[1] = op_src;
 6060     m2->_opnds[0] = op_dst;
 6061     m2->_opnds[1] = op_dst;
 6062     m2->_opnds[2] = op_src;
 6063     m3->_opnds[0] = op_dst;
 6064     m3->_opnds[1] = op_dst;
 6065     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6066     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6067     ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6068     nodes->push(m1);
 6069     nodes->push(m2);
 6070     nodes->push(m3);
 6071   %}
 6072 %}
 6073 
 6074 // We have seen a safepoint between the hi and lo parts, and this node was handled
 6075 // as an oop. Therefore this needs a match rule so that build_oop_map knows this is
 6076 // not a narrow oop.
 6077 instruct loadConNKlass_hi(iRegNdst dst, immNKlass_NM src) %{
 6078   match(Set dst src);
 6079   effect(DEF dst, USE src);
 6080   ins_cost(DEFAULT_COST);
 6081 
 6082   format %{ "LIS     $dst, $src \t// narrow klass hi" %}
 6083   size(4);
 6084   ins_encode %{
 6085     intptr_t Csrc = CompressedKlassPointers::encode((Klass *)$src$$constant);
 6086     __ lis($dst$$Register, (int)(short)((Csrc >> 16) & 0xffff));
 6087   %}
 6088   ins_pipe(pipe_class_default);
 6089 %}
 6090 
 6091 // As loadConNKlass_hi this must be recognized as narrow klass, not oop!
 6092 instruct loadConNKlass_mask(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{
 6093   match(Set dst src1);
 6094   effect(TEMP src2);
 6095   ins_cost(DEFAULT_COST);
 6096 
 6097   format %{ "MASK    $dst, $src2, 0xFFFFFFFF" %} // mask
 6098   size(4);
 6099   ins_encode %{
 6100     __ clrldi($dst$$Register, $src2$$Register, 0x20);
 6101   %}
 6102   ins_pipe(pipe_class_default);
 6103 %}
 6104 
 6105 // This needs a match rule so that build_oop_map knows this is
 6106 // not a narrow oop.
 6107 instruct loadConNKlass_lo(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{
 6108   match(Set dst src1);
 6109   effect(TEMP src2);
 6110   ins_cost(DEFAULT_COST);
 6111 
 6112   format %{ "ORI     $dst, $src1, $src2 \t// narrow klass lo" %}
 6113   size(4);
 6114   ins_encode %{
 6115     // Notify OOP recorder (don't need the relocation)
 6116     AddressLiteral md = __ constant_metadata_address((Klass*)$src1$$constant);
 6117     intptr_t Csrc = CompressedKlassPointers::encode((Klass*)md.value());
 6118     __ ori($dst$$Register, $src2$$Register, Csrc & 0xffff);
 6119   %}
 6120   ins_pipe(pipe_class_default);
 6121 %}
 6122 
 6123 // Loading ConNKlass must be postalloc expanded so that edges between
 6124 // the nodes are safe. They may not interfere with a safepoint.
 6125 instruct loadConNKlass_Ex(iRegNdst dst, immNKlass src) %{
 6126   match(Set dst src);
 6127   ins_cost(DEFAULT_COST*2);
 6128 
 6129   format %{ "LoadN   $dst, $src \t// postalloc expanded" %} // mask
 6130   postalloc_expand %{
 6131     // Load high bits into register. Sign extended.
 6132     MachNode *m1 = new loadConNKlass_hiNode();
 6133     m1->add_req(NULL);
 6134     m1->_opnds[0] = op_dst;
 6135     m1->_opnds[1] = op_src;
 6136     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6137     nodes->push(m1);
 6138 
 6139     MachNode *m2 = m1;
 6140     if (!Assembler::is_uimm((jlong)CompressedKlassPointers::encode((Klass *)op_src->constant()), 31)) {
 6141       // Value might be 1-extended. Mask out these bits.
 6142       m2 = new loadConNKlass_maskNode();
 6143       m2->add_req(NULL, m1);
 6144       m2->_opnds[0] = op_dst;
 6145       m2->_opnds[1] = op_src;
 6146       m2->_opnds[2] = op_dst;
 6147       ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6148       nodes->push(m2);
 6149     }
 6150 
 6151     MachNode *m3 = new loadConNKlass_loNode();
 6152     m3->add_req(NULL, m2);
 6153     m3->_opnds[0] = op_dst;
 6154     m3->_opnds[1] = op_src;
 6155     m3->_opnds[2] = op_dst;
 6156     ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6157     nodes->push(m3);
 6158   %}
 6159 %}
 6160 
 6161 // 0x1 is used in object initialization (initial object header).
 6162 // No constant pool entries required.
 6163 instruct loadConP0or1(iRegPdst dst, immP_0or1 src) %{
 6164   match(Set dst src);
 6165 
 6166   format %{ "LI      $dst, $src \t// ptr" %}
 6167   size(4);
 6168   ins_encode %{
 6169     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
 6170   %}
 6171   ins_pipe(pipe_class_default);
 6172 %}
 6173 
 6174 // Expand node for constant pool load: small offset.
 6175 // The match rule is needed to generate the correct bottom_type(),
 6176 // however this node should never match. The use of predicate is not
 6177 // possible since ADLC forbids predicates for chain rules. The higher
 6178 // costs do not prevent matching in this case. For that reason the
 6179 // operand immP_NM with predicate(false) is used.
 6180 instruct loadConP(iRegPdst dst, immP_NM src, iRegLdst toc) %{
 6181   match(Set dst src);
 6182   effect(TEMP toc);
 6183 
 6184   ins_num_consts(1);
 6185 
 6186   format %{ "LD      $dst, offset, $toc \t// load ptr $src from TOC" %}
 6187   size(4);
 6188   ins_encode( enc_load_long_constP(dst, src, toc) );
 6189   ins_pipe(pipe_class_memory);
 6190 %}
 6191 
 6192 // Expand node for constant pool load: large offset.
 6193 instruct loadConP_hi(iRegPdst dst, immP_NM src, iRegLdst toc) %{
 6194   effect(DEF dst, USE src, USE toc);
 6195   predicate(false);
 6196 
 6197   ins_num_consts(1);
 6198   ins_field_const_toc_offset(int);
 6199 
 6200   format %{ "ADDIS   $dst, $toc, offset \t// load ptr $src from TOC (hi)" %}
 6201   size(4);
 6202   ins_encode( enc_load_long_constP_hi(dst, src, toc) );
 6203   ins_pipe(pipe_class_default);
 6204 %}
 6205 
 6206 // Expand node for constant pool load: large offset.
 6207 instruct loadConP_lo(iRegPdst dst, immP_NM src, iRegLdst base) %{
 6208   match(Set dst src);
 6209   effect(TEMP base);
 6210 
 6211   ins_field_const_toc_offset_hi_node(loadConP_hiNode*);
 6212 
 6213   format %{ "LD      $dst, offset, $base \t// load ptr $src from TOC (lo)" %}
 6214   size(4);
 6215   ins_encode %{
 6216     int offset = ra_->C->output()->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset;
 6217     __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register);
 6218   %}
 6219   ins_pipe(pipe_class_memory);
 6220 %}
 6221 
 6222 // Load pointer constant from constant table. Expand in case an
 6223 // offset > 16 bit is needed.
 6224 // Adlc adds toc node MachConstantTableBase.
 6225 instruct loadConP_Ex(iRegPdst dst, immP src) %{
 6226   match(Set dst src);
 6227   ins_cost(MEMORY_REF_COST);
 6228 
 6229   // This rule does not use "expand" because then
 6230   // the result type is not known to be an Oop.  An ADLC
 6231   // enhancement will be needed to make that work - not worth it!
 6232 
 6233   // If this instruction rematerializes, it prolongs the live range
 6234   // of the toc node, causing illegal graphs.
 6235   // assert(edge_from_to(_reg_node[reg_lo],def)) fails in verify_good_schedule().
 6236   ins_cannot_rematerialize(true);
 6237 
 6238   format %{ "LD    $dst, offset, $constanttablebase \t//  load ptr $src from table, postalloc expanded" %}
 6239   postalloc_expand( postalloc_expand_load_ptr_constant(dst, src, constanttablebase) );
 6240 %}
 6241 
 6242 // Expand node for constant pool load: small offset.
 6243 instruct loadConF(regF dst, immF src, iRegLdst toc) %{
 6244   effect(DEF dst, USE src, USE toc);
 6245   ins_cost(MEMORY_REF_COST);
 6246 
 6247   ins_num_consts(1);
 6248 
 6249   format %{ "LFS     $dst, offset, $toc \t// load float $src from TOC" %}
 6250   size(4);
 6251   ins_encode %{
 6252     address float_address = __ float_constant($src$$constant);
 6253     if (float_address == NULL) {
 6254       ciEnv::current()->record_out_of_memory_failure();
 6255       return;
 6256     }
 6257     __ lfs($dst$$FloatRegister, __ offset_to_method_toc(float_address), $toc$$Register);
 6258   %}
 6259   ins_pipe(pipe_class_memory);
 6260 %}
 6261 
 6262 // Expand node for constant pool load: large offset.
 6263 instruct loadConFComp(regF dst, immF src, iRegLdst toc) %{
 6264   effect(DEF dst, USE src, USE toc);
 6265   ins_cost(MEMORY_REF_COST);
 6266 
 6267   ins_num_consts(1);
 6268 
 6269   format %{ "ADDIS   $toc, $toc, offset_hi\n\t"
 6270             "LFS     $dst, offset_lo, $toc \t// load float $src from TOC (hi/lo)\n\t"
 6271             "ADDIS   $toc, $toc, -offset_hi"%}
 6272   size(12);
 6273   ins_encode %{
 6274     FloatRegister Rdst    = $dst$$FloatRegister;
 6275     Register Rtoc         = $toc$$Register;
 6276     address float_address = __ float_constant($src$$constant);
 6277     if (float_address == NULL) {
 6278       ciEnv::current()->record_out_of_memory_failure();
 6279       return;
 6280     }
 6281     int offset            = __ offset_to_method_toc(float_address);
 6282     int hi = (offset + (1<<15))>>16;
 6283     int lo = offset - hi * (1<<16);
 6284 
 6285     __ addis(Rtoc, Rtoc, hi);
 6286     __ lfs(Rdst, lo, Rtoc);
 6287     __ addis(Rtoc, Rtoc, -hi);
 6288   %}
 6289   ins_pipe(pipe_class_memory);
 6290 %}
 6291 
 6292 // Adlc adds toc node MachConstantTableBase.
 6293 instruct loadConF_Ex(regF dst, immF src) %{
 6294   match(Set dst src);
 6295   ins_cost(MEMORY_REF_COST);
 6296 
 6297   // See loadConP.
 6298   ins_cannot_rematerialize(true);
 6299 
 6300   format %{ "LFS     $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %}
 6301   postalloc_expand( postalloc_expand_load_float_constant(dst, src, constanttablebase) );
 6302 %}
 6303 
 6304 // Expand node for constant pool load: small offset.
 6305 instruct loadConD(regD dst, immD src, iRegLdst toc) %{
 6306   effect(DEF dst, USE src, USE toc);
 6307   ins_cost(MEMORY_REF_COST);
 6308 
 6309   ins_num_consts(1);
 6310 
 6311   format %{ "LFD     $dst, offset, $toc \t// load double $src from TOC" %}
 6312   size(4);
 6313   ins_encode %{
 6314     address float_address = __ double_constant($src$$constant);
 6315     if (float_address == NULL) {
 6316       ciEnv::current()->record_out_of_memory_failure();
 6317       return;
 6318     }
 6319     int offset =  __ offset_to_method_toc(float_address);
 6320     __ lfd($dst$$FloatRegister, offset, $toc$$Register);
 6321   %}
 6322   ins_pipe(pipe_class_memory);
 6323 %}
 6324 
 6325 // Expand node for constant pool load: large offset.
 6326 instruct loadConDComp(regD dst, immD src, iRegLdst toc) %{
 6327   effect(DEF dst, USE src, USE toc);
 6328   ins_cost(MEMORY_REF_COST);
 6329 
 6330   ins_num_consts(1);
 6331 
 6332   format %{ "ADDIS   $toc, $toc, offset_hi\n\t"
 6333             "LFD     $dst, offset_lo, $toc \t// load double $src from TOC (hi/lo)\n\t"
 6334             "ADDIS   $toc, $toc, -offset_hi" %}
 6335   size(12);
 6336   ins_encode %{
 6337     FloatRegister Rdst    = $dst$$FloatRegister;
 6338     Register      Rtoc    = $toc$$Register;
 6339     address float_address = __ double_constant($src$$constant);
 6340     if (float_address == NULL) {
 6341       ciEnv::current()->record_out_of_memory_failure();
 6342       return;
 6343     }
 6344     int offset = __ offset_to_method_toc(float_address);
 6345     int hi = (offset + (1<<15))>>16;
 6346     int lo = offset - hi * (1<<16);
 6347 
 6348     __ addis(Rtoc, Rtoc, hi);
 6349     __ lfd(Rdst, lo, Rtoc);
 6350     __ addis(Rtoc, Rtoc, -hi);
 6351   %}
 6352   ins_pipe(pipe_class_memory);
 6353 %}
 6354 
 6355 // Adlc adds toc node MachConstantTableBase.
 6356 instruct loadConD_Ex(regD dst, immD src) %{
 6357   match(Set dst src);
 6358   ins_cost(MEMORY_REF_COST);
 6359 
 6360   // See loadConP.
 6361   ins_cannot_rematerialize(true);
 6362 
 6363   format %{ "ConD    $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %}
 6364   postalloc_expand( postalloc_expand_load_double_constant(dst, src, constanttablebase) );
 6365 %}
 6366 
 6367 // Prefetch instructions.
 6368 // Must be safe to execute with invalid address (cannot fault).
 6369 
 6370 // Special prefetch versions which use the dcbz instruction.
 6371 instruct prefetch_alloc_zero(indirectMemory mem, iRegLsrc src) %{
 6372   match(PrefetchAllocation (AddP mem src));
 6373   predicate(AllocatePrefetchStyle == 3);
 6374   ins_cost(MEMORY_REF_COST);
 6375 
 6376   format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many with zero" %}
 6377   size(4);
 6378   ins_encode %{
 6379     __ dcbz($src$$Register, $mem$$base$$Register);
 6380   %}
 6381   ins_pipe(pipe_class_memory);
 6382 %}
 6383 
 6384 instruct prefetch_alloc_zero_no_offset(indirectMemory mem) %{
 6385   match(PrefetchAllocation mem);
 6386   predicate(AllocatePrefetchStyle == 3);
 6387   ins_cost(MEMORY_REF_COST);
 6388 
 6389   format %{ "PREFETCH $mem, 2 \t// Prefetch write-many with zero" %}
 6390   size(4);
 6391   ins_encode %{
 6392     __ dcbz($mem$$base$$Register);
 6393   %}
 6394   ins_pipe(pipe_class_memory);
 6395 %}
 6396 
 6397 instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{
 6398   match(PrefetchAllocation (AddP mem src));
 6399   predicate(AllocatePrefetchStyle != 3);
 6400   ins_cost(MEMORY_REF_COST);
 6401 
 6402   format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many" %}
 6403   size(4);
 6404   ins_encode %{
 6405     __ dcbtst($src$$Register, $mem$$base$$Register);
 6406   %}
 6407   ins_pipe(pipe_class_memory);
 6408 %}
 6409 
 6410 instruct prefetch_alloc_no_offset(indirectMemory mem) %{
 6411   match(PrefetchAllocation mem);
 6412   predicate(AllocatePrefetchStyle != 3);
 6413   ins_cost(MEMORY_REF_COST);
 6414 
 6415   format %{ "PREFETCH $mem, 2 \t// Prefetch write-many" %}
 6416   size(4);
 6417   ins_encode %{
 6418     __ dcbtst($mem$$base$$Register);
 6419   %}
 6420   ins_pipe(pipe_class_memory);
 6421 %}
 6422 
 6423 //----------Store Instructions-------------------------------------------------
 6424 
 6425 // Store Byte
 6426 instruct storeB(memory mem, iRegIsrc src) %{
 6427   match(Set mem (StoreB mem src));
 6428   ins_cost(MEMORY_REF_COST);
 6429 
 6430   format %{ "STB     $src, $mem \t// byte" %}
 6431   size(4);
 6432   ins_encode %{
 6433     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 6434     __ stb($src$$Register, Idisp, $mem$$base$$Register);
 6435   %}
 6436   ins_pipe(pipe_class_memory);
 6437 %}
 6438 
 6439 // Store Char/Short
 6440 instruct storeC(memory mem, iRegIsrc src) %{
 6441   match(Set mem (StoreC mem src));
 6442   ins_cost(MEMORY_REF_COST);
 6443 
 6444   format %{ "STH     $src, $mem \t// short" %}
 6445   size(4);
 6446   ins_encode %{
 6447     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 6448     __ sth($src$$Register, Idisp, $mem$$base$$Register);
 6449   %}
 6450   ins_pipe(pipe_class_memory);
 6451 %}
 6452 
 6453 // Store Integer
 6454 instruct storeI(memory mem, iRegIsrc src) %{
 6455   match(Set mem (StoreI mem src));
 6456   ins_cost(MEMORY_REF_COST);
 6457 
 6458   format %{ "STW     $src, $mem" %}
 6459   size(4);
 6460   ins_encode( enc_stw(src, mem) );
 6461   ins_pipe(pipe_class_memory);
 6462 %}
 6463 
 6464 // ConvL2I + StoreI.
 6465 instruct storeI_convL2I(memory mem, iRegLsrc src) %{
 6466   match(Set mem (StoreI mem (ConvL2I src)));
 6467   ins_cost(MEMORY_REF_COST);
 6468 
 6469   format %{ "STW     l2i($src), $mem" %}
 6470   size(4);
 6471   ins_encode( enc_stw(src, mem) );
 6472   ins_pipe(pipe_class_memory);
 6473 %}
 6474 
 6475 // Store Long
 6476 instruct storeL(memoryAlg4 mem, iRegLsrc src) %{
 6477   match(Set mem (StoreL mem src));
 6478   ins_cost(MEMORY_REF_COST);
 6479 
 6480   format %{ "STD     $src, $mem \t// long" %}
 6481   size(4);
 6482   ins_encode( enc_std(src, mem) );
 6483   ins_pipe(pipe_class_memory);
 6484 %}
 6485 
 6486 // Store super word nodes.
 6487 
 6488 // Store Aligned Packed Byte long register to memory
 6489 instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{
 6490   predicate(n->as_StoreVector()->memory_size() == 8);
 6491   match(Set mem (StoreVector mem src));
 6492   ins_cost(MEMORY_REF_COST);
 6493 
 6494   format %{ "STD     $mem, $src \t// packed8B" %}
 6495   size(4);
 6496   ins_encode( enc_std(src, mem) );
 6497   ins_pipe(pipe_class_memory);
 6498 %}
 6499 
 6500 // Store Packed Byte long register to memory
 6501 instruct storeV16(indirect mem, vecX src) %{
 6502   predicate(n->as_StoreVector()->memory_size() == 16);
 6503   match(Set mem (StoreVector mem src));
 6504   ins_cost(MEMORY_REF_COST);
 6505 
 6506   format %{ "STXVD2X     $mem, $src \t// store 16-byte Vector" %}
 6507   size(4);
 6508   ins_encode %{
 6509     __ stxvd2x($src$$VectorSRegister, $mem$$Register);
 6510   %}
 6511   ins_pipe(pipe_class_default);
 6512 %}
 6513 
 6514 // Reinterpret: only one vector size used: either L or X
 6515 instruct reinterpretL(iRegLdst dst) %{
 6516   match(Set dst (VectorReinterpret dst));
 6517   ins_cost(0);
 6518   format %{ "reinterpret $dst" %}
 6519   ins_encode( /*empty*/ );
 6520   ins_pipe(pipe_class_empty);
 6521 %}
 6522 
 6523 instruct reinterpretX(vecX dst) %{
 6524   match(Set dst (VectorReinterpret dst));
 6525   ins_cost(0);
 6526   format %{ "reinterpret $dst" %}
 6527   ins_encode( /*empty*/ );
 6528   ins_pipe(pipe_class_empty);
 6529 %}
 6530 
 6531 // Store Compressed Oop
 6532 instruct storeN(memory dst, iRegN_P2N src) %{
 6533   match(Set dst (StoreN dst src));
 6534   ins_cost(MEMORY_REF_COST);
 6535 
 6536   format %{ "STW     $src, $dst \t// compressed oop" %}
 6537   size(4);
 6538   ins_encode( enc_stw(src, dst) );
 6539   ins_pipe(pipe_class_memory);
 6540 %}
 6541 
 6542 // Store Compressed KLass
 6543 instruct storeNKlass(memory dst, iRegN_P2N src) %{
 6544   match(Set dst (StoreNKlass dst src));
 6545   ins_cost(MEMORY_REF_COST);
 6546 
 6547   format %{ "STW     $src, $dst \t// compressed klass" %}
 6548   size(4);
 6549   ins_encode( enc_stw(src, dst) );
 6550   ins_pipe(pipe_class_memory);
 6551 %}
 6552 
 6553 // Store Pointer
 6554 instruct storeP(memoryAlg4 dst, iRegPsrc src) %{
 6555   match(Set dst (StoreP dst src));
 6556   predicate(n->as_Store()->barrier_data() == 0);
 6557   ins_cost(MEMORY_REF_COST);
 6558 
 6559   format %{ "STD     $src, $dst \t// ptr" %}
 6560   size(4);
 6561   ins_encode( enc_std(src, dst) );
 6562   ins_pipe(pipe_class_memory);
 6563 %}
 6564 
 6565 // Store Float
 6566 instruct storeF(memory mem, regF src) %{
 6567   match(Set mem (StoreF mem src));
 6568   ins_cost(MEMORY_REF_COST);
 6569 
 6570   format %{ "STFS    $src, $mem" %}
 6571   size(4);
 6572   ins_encode( enc_stfs(src, mem) );
 6573   ins_pipe(pipe_class_memory);
 6574 %}
 6575 
 6576 // Store Double
 6577 instruct storeD(memory mem, regD src) %{
 6578   match(Set mem (StoreD mem src));
 6579   ins_cost(MEMORY_REF_COST);
 6580 
 6581   format %{ "STFD    $src, $mem" %}
 6582   size(4);
 6583   ins_encode( enc_stfd(src, mem) );
 6584   ins_pipe(pipe_class_memory);
 6585 %}
 6586 
 6587 //----------Store Instructions With Zeros--------------------------------------
 6588 
 6589 instruct storeCM(memory mem, immI_0 zero) %{
 6590   match(Set mem (StoreCM mem zero));
 6591   ins_cost(MEMORY_REF_COST);
 6592 
 6593   format %{ "STB     #0, $mem \t// CMS card-mark byte store" %}
 6594   size(8);
 6595   ins_encode %{
 6596     __ li(R0, 0);
 6597     // No release barrier: Oops are allowed to get visible after marking.
 6598     guarantee($mem$$base$$Register != R1_SP, "use frame_slots_bias");
 6599     __ stb(R0, $mem$$disp, $mem$$base$$Register);
 6600   %}
 6601   ins_pipe(pipe_class_memory);
 6602 %}
 6603 
 6604 // Convert oop pointer into compressed form.
 6605 
 6606 // Nodes for postalloc expand.
 6607 
 6608 // Shift node for expand.
 6609 instruct encodeP_shift(iRegNdst dst, iRegNsrc src) %{
 6610   // The match rule is needed to make it a 'MachTypeNode'!
 6611   match(Set dst (EncodeP src));
 6612   predicate(false);
 6613 
 6614   format %{ "SRDI    $dst, $src, 3 \t// encode" %}
 6615   size(4);
 6616   ins_encode %{
 6617     __ srdi($dst$$Register, $src$$Register, CompressedOops::shift() & 0x3f);
 6618   %}
 6619   ins_pipe(pipe_class_default);
 6620 %}
 6621 
 6622 // Add node for expand.
 6623 instruct encodeP_sub(iRegPdst dst, iRegPdst src) %{
 6624   // The match rule is needed to make it a 'MachTypeNode'!
 6625   match(Set dst (EncodeP src));
 6626   predicate(false);
 6627 
 6628   format %{ "SUB     $dst, $src, oop_base \t// encode" %}
 6629   ins_encode %{
 6630     __ sub_const_optimized($dst$$Register, $src$$Register, CompressedOops::base(), R0);
 6631   %}
 6632   ins_pipe(pipe_class_default);
 6633 %}
 6634 
 6635 // Conditional sub base.
 6636 instruct cond_sub_base(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
 6637   // The match rule is needed to make it a 'MachTypeNode'!
 6638   match(Set dst (EncodeP (Binary crx src1)));
 6639   predicate(false);
 6640 
 6641   format %{ "BEQ     $crx, done\n\t"
 6642             "SUB     $dst, $src1, heapbase \t// encode: subtract base if != NULL\n"
 6643             "done:" %}
 6644   ins_encode %{
 6645     Label done;
 6646     __ beq($crx$$CondRegister, done);
 6647     __ sub_const_optimized($dst$$Register, $src1$$Register, CompressedOops::base(), R0);
 6648     __ bind(done);
 6649   %}
 6650   ins_pipe(pipe_class_default);
 6651 %}
 6652 
 6653 // Power 7 can use isel instruction
 6654 instruct cond_set_0_oop(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
 6655   // The match rule is needed to make it a 'MachTypeNode'!
 6656   match(Set dst (EncodeP (Binary crx src1)));
 6657   predicate(false);
 6658 
 6659   format %{ "CMOVE   $dst, $crx eq, 0, $src1 \t// encode: preserve 0" %}
 6660   size(4);
 6661   ins_encode %{
 6662     // This is a Power7 instruction for which no machine description exists.
 6663     __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
 6664   %}
 6665   ins_pipe(pipe_class_default);
 6666 %}
 6667 
 6668 // Disjoint narrow oop base.
 6669 instruct encodeP_Disjoint(iRegNdst dst, iRegPsrc src) %{
 6670   match(Set dst (EncodeP src));
 6671   predicate(CompressedOops::base_disjoint());
 6672 
 6673   format %{ "EXTRDI  $dst, $src, #32, #3 \t// encode with disjoint base" %}
 6674   size(4);
 6675   ins_encode %{
 6676     __ rldicl($dst$$Register, $src$$Register, 64-CompressedOops::shift(), 32);
 6677   %}
 6678   ins_pipe(pipe_class_default);
 6679 %}
 6680 
 6681 // shift != 0, base != 0
 6682 instruct encodeP_Ex(iRegNdst dst, flagsReg crx, iRegPsrc src) %{
 6683   match(Set dst (EncodeP src));
 6684   effect(TEMP crx);
 6685   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull &&
 6686             CompressedOops::shift() != 0 &&
 6687             CompressedOops::base_overlaps());
 6688 
 6689   format %{ "EncodeP $dst, $crx, $src \t// postalloc expanded" %}
 6690   postalloc_expand( postalloc_expand_encode_oop(dst, src, crx));
 6691 %}
 6692 
 6693 // shift != 0, base != 0
 6694 instruct encodeP_not_null_Ex(iRegNdst dst, iRegPsrc src) %{
 6695   match(Set dst (EncodeP src));
 6696   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull &&
 6697             CompressedOops::shift() != 0 &&
 6698             CompressedOops::base_overlaps());
 6699 
 6700   format %{ "EncodeP $dst, $src\t// $src != Null, postalloc expanded" %}
 6701   postalloc_expand( postalloc_expand_encode_oop_not_null(dst, src) );
 6702 %}
 6703 
 6704 // shift != 0, base == 0
 6705 // TODO: This is the same as encodeP_shift. Merge!
 6706 instruct encodeP_not_null_base_null(iRegNdst dst, iRegPsrc src) %{
 6707   match(Set dst (EncodeP src));
 6708   predicate(CompressedOops::shift() != 0 &&
 6709             CompressedOops::base() ==0);
 6710 
 6711   format %{ "SRDI    $dst, $src, #3 \t// encodeP, $src != NULL" %}
 6712   size(4);
 6713   ins_encode %{
 6714     __ srdi($dst$$Register, $src$$Register, CompressedOops::shift() & 0x3f);
 6715   %}
 6716   ins_pipe(pipe_class_default);
 6717 %}
 6718 
 6719 // Compressed OOPs with narrow_oop_shift == 0.
 6720 // shift == 0, base == 0
 6721 instruct encodeP_narrow_oop_shift_0(iRegNdst dst, iRegPsrc src) %{
 6722   match(Set dst (EncodeP src));
 6723   predicate(CompressedOops::shift() == 0);
 6724 
 6725   format %{ "MR      $dst, $src \t// Ptr->Narrow" %}
 6726   // variable size, 0 or 4.
 6727   ins_encode %{
 6728     __ mr_if_needed($dst$$Register, $src$$Register);
 6729   %}
 6730   ins_pipe(pipe_class_default);
 6731 %}
 6732 
 6733 // Decode nodes.
 6734 
 6735 // Shift node for expand.
 6736 instruct decodeN_shift(iRegPdst dst, iRegPsrc src) %{
 6737   // The match rule is needed to make it a 'MachTypeNode'!
 6738   match(Set dst (DecodeN src));
 6739   predicate(false);
 6740 
 6741   format %{ "SLDI    $dst, $src, #3 \t// DecodeN" %}
 6742   size(4);
 6743   ins_encode %{
 6744     __ sldi($dst$$Register, $src$$Register, CompressedOops::shift());
 6745   %}
 6746   ins_pipe(pipe_class_default);
 6747 %}
 6748 
 6749 // Add node for expand.
 6750 instruct decodeN_add(iRegPdst dst, iRegPdst src) %{
 6751   // The match rule is needed to make it a 'MachTypeNode'!
 6752   match(Set dst (DecodeN src));
 6753   predicate(false);
 6754 
 6755   format %{ "ADD     $dst, $src, heapbase \t// DecodeN, add oop base" %}
 6756   ins_encode %{
 6757     __ add_const_optimized($dst$$Register, $src$$Register, CompressedOops::base(), R0);
 6758   %}
 6759   ins_pipe(pipe_class_default);
 6760 %}
 6761 
 6762 // conditianal add base for expand
 6763 instruct cond_add_base(iRegPdst dst, flagsRegSrc crx, iRegPsrc src) %{
 6764   // The match rule is needed to make it a 'MachTypeNode'!
 6765   // NOTICE that the rule is nonsense - we just have to make sure that:
 6766   //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
 6767   //  - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
 6768   match(Set dst (DecodeN (Binary crx src)));
 6769   predicate(false);
 6770 
 6771   format %{ "BEQ     $crx, done\n\t"
 6772             "ADD     $dst, $src, heapbase \t// DecodeN: add oop base if $src != NULL\n"
 6773             "done:" %}
 6774   ins_encode %{
 6775     Label done;
 6776     __ beq($crx$$CondRegister, done);
 6777     __ add_const_optimized($dst$$Register, $src$$Register, CompressedOops::base(), R0);
 6778     __ bind(done);
 6779   %}
 6780   ins_pipe(pipe_class_default);
 6781 %}
 6782 
 6783 instruct cond_set_0_ptr(iRegPdst dst, flagsRegSrc crx, iRegPsrc src1) %{
 6784   // The match rule is needed to make it a 'MachTypeNode'!
 6785   // NOTICE that the rule is nonsense - we just have to make sure that:
 6786   //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
 6787   //  - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
 6788   match(Set dst (DecodeN (Binary crx src1)));
 6789   predicate(false);
 6790 
 6791   format %{ "CMOVE   $dst, $crx eq, 0, $src1 \t// decode: preserve 0" %}
 6792   size(4);
 6793   ins_encode %{
 6794     // This is a Power7 instruction for which no machine description exists.
 6795     __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
 6796   %}
 6797   ins_pipe(pipe_class_default);
 6798 %}
 6799 
 6800 //  shift != 0, base != 0
 6801 instruct decodeN_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
 6802   match(Set dst (DecodeN src));
 6803   predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
 6804              n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) &&
 6805             CompressedOops::shift() != 0 &&
 6806             CompressedOops::base() != 0);
 6807   ins_cost(4 * DEFAULT_COST); // Should be more expensive than decodeN_Disjoint_isel_Ex.
 6808   effect(TEMP crx);
 6809 
 6810   format %{ "DecodeN $dst, $src \t// Kills $crx, postalloc expanded" %}
 6811   postalloc_expand( postalloc_expand_decode_oop(dst, src, crx) );
 6812 %}
 6813 
 6814 // shift != 0, base == 0
 6815 instruct decodeN_nullBase(iRegPdst dst, iRegNsrc src) %{
 6816   match(Set dst (DecodeN src));
 6817   predicate(CompressedOops::shift() != 0 &&
 6818             CompressedOops::base() == 0);
 6819 
 6820   format %{ "SLDI    $dst, $src, #3 \t// DecodeN (zerobased)" %}
 6821   size(4);
 6822   ins_encode %{
 6823     __ sldi($dst$$Register, $src$$Register, CompressedOops::shift());
 6824   %}
 6825   ins_pipe(pipe_class_default);
 6826 %}
 6827 
 6828 // Optimize DecodeN for disjoint base.
 6829 // Shift narrow oop and or it into register that already contains the heap base.
 6830 // Base == dst must hold, and is assured by construction in postaloc_expand.
 6831 instruct decodeN_mergeDisjoint(iRegPdst dst, iRegNsrc src, iRegLsrc base) %{
 6832   match(Set dst (DecodeN src));
 6833   effect(TEMP base);
 6834   predicate(false);
 6835 
 6836   format %{ "RLDIMI  $dst, $src, shift, 32-shift \t// DecodeN (disjoint base)" %}
 6837   size(4);
 6838   ins_encode %{
 6839     __ rldimi($dst$$Register, $src$$Register, CompressedOops::shift(), 32-CompressedOops::shift());
 6840   %}
 6841   ins_pipe(pipe_class_default);
 6842 %}
 6843 
 6844 // Optimize DecodeN for disjoint base.
 6845 // This node requires only one cycle on the critical path.
 6846 // We must postalloc_expand as we can not express use_def effects where
 6847 // the used register is L and the def'ed register P.
 6848 instruct decodeN_Disjoint_notNull_Ex(iRegPdst dst, iRegNsrc src) %{
 6849   match(Set dst (DecodeN src));
 6850   effect(TEMP_DEF dst);
 6851   predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
 6852              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
 6853             CompressedOops::base_disjoint());
 6854   ins_cost(DEFAULT_COST);
 6855 
 6856   format %{ "MOV     $dst, heapbase \t\n"
 6857             "RLDIMI  $dst, $src, shift, 32-shift \t// decode with disjoint base" %}
 6858   postalloc_expand %{
 6859     loadBaseNode *n1 = new loadBaseNode();
 6860     n1->add_req(NULL);
 6861     n1->_opnds[0] = op_dst;
 6862 
 6863     decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode();
 6864     n2->add_req(n_region, n_src, n1);
 6865     n2->_opnds[0] = op_dst;
 6866     n2->_opnds[1] = op_src;
 6867     n2->_opnds[2] = op_dst;
 6868     n2->_bottom_type = _bottom_type;
 6869 
 6870     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 6871     ra_->set_oop(n2, true);
 6872 
 6873     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6874     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6875 
 6876     nodes->push(n1);
 6877     nodes->push(n2);
 6878   %}
 6879 %}
 6880 
 6881 instruct decodeN_Disjoint_isel_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
 6882   match(Set dst (DecodeN src));
 6883   effect(TEMP_DEF dst, TEMP crx);
 6884   predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
 6885              n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) &&
 6886             CompressedOops::base_disjoint() && VM_Version::has_isel());
 6887   ins_cost(3 * DEFAULT_COST);
 6888 
 6889   format %{ "DecodeN  $dst, $src \t// decode with disjoint base using isel" %}
 6890   postalloc_expand %{
 6891     loadBaseNode *n1 = new loadBaseNode();
 6892     n1->add_req(NULL);
 6893     n1->_opnds[0] = op_dst;
 6894 
 6895     cmpN_reg_imm0Node *n_compare  = new cmpN_reg_imm0Node();
 6896     n_compare->add_req(n_region, n_src);
 6897     n_compare->_opnds[0] = op_crx;
 6898     n_compare->_opnds[1] = op_src;
 6899     n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR);
 6900 
 6901     decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode();
 6902     n2->add_req(n_region, n_src, n1);
 6903     n2->_opnds[0] = op_dst;
 6904     n2->_opnds[1] = op_src;
 6905     n2->_opnds[2] = op_dst;
 6906     n2->_bottom_type = _bottom_type;
 6907 
 6908     cond_set_0_ptrNode *n_cond_set = new cond_set_0_ptrNode();
 6909     n_cond_set->add_req(n_region, n_compare, n2);
 6910     n_cond_set->_opnds[0] = op_dst;
 6911     n_cond_set->_opnds[1] = op_crx;
 6912     n_cond_set->_opnds[2] = op_dst;
 6913     n_cond_set->_bottom_type = _bottom_type;
 6914 
 6915     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 6916     ra_->set_oop(n_cond_set, true);
 6917 
 6918     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6919     ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 6920     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6921     ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6922 
 6923     nodes->push(n1);
 6924     nodes->push(n_compare);
 6925     nodes->push(n2);
 6926     nodes->push(n_cond_set);
 6927   %}
 6928 %}
 6929 
 6930 // src != 0, shift != 0, base != 0
 6931 instruct decodeN_notNull_addBase_Ex(iRegPdst dst, iRegNsrc src) %{
 6932   match(Set dst (DecodeN src));
 6933   predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
 6934              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
 6935             CompressedOops::shift() != 0 &&
 6936             CompressedOops::base() != 0);
 6937   ins_cost(2 * DEFAULT_COST);
 6938 
 6939   format %{ "DecodeN $dst, $src \t// $src != NULL, postalloc expanded" %}
 6940   postalloc_expand( postalloc_expand_decode_oop_not_null(dst, src));
 6941 %}
 6942 
 6943 // Compressed OOPs with narrow_oop_shift == 0.
 6944 instruct decodeN_unscaled(iRegPdst dst, iRegNsrc src) %{
 6945   match(Set dst (DecodeN src));
 6946   predicate(CompressedOops::shift() == 0);
 6947   ins_cost(DEFAULT_COST);
 6948 
 6949   format %{ "MR      $dst, $src \t// DecodeN (unscaled)" %}
 6950   // variable size, 0 or 4.
 6951   ins_encode %{
 6952     __ mr_if_needed($dst$$Register, $src$$Register);
 6953   %}
 6954   ins_pipe(pipe_class_default);
 6955 %}
 6956 
 6957 // Convert compressed oop into int for vectors alignment masking.
 6958 instruct decodeN2I_unscaled(iRegIdst dst, iRegNsrc src) %{
 6959   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 6960   predicate(CompressedOops::shift() == 0);
 6961   ins_cost(DEFAULT_COST);
 6962 
 6963   format %{ "MR      $dst, $src \t// (int)DecodeN (unscaled)" %}
 6964   // variable size, 0 or 4.
 6965   ins_encode %{
 6966     __ mr_if_needed($dst$$Register, $src$$Register);
 6967   %}
 6968   ins_pipe(pipe_class_default);
 6969 %}
 6970 
 6971 // Convert klass pointer into compressed form.
 6972 
 6973 // Nodes for postalloc expand.
 6974 
 6975 // Shift node for expand.
 6976 instruct encodePKlass_shift(iRegNdst dst, iRegNsrc src) %{
 6977   // The match rule is needed to make it a 'MachTypeNode'!
 6978   match(Set dst (EncodePKlass src));
 6979   predicate(false);
 6980 
 6981   format %{ "SRDI    $dst, $src, 3 \t// encode" %}
 6982   size(4);
 6983   ins_encode %{
 6984     __ srdi($dst$$Register, $src$$Register, CompressedKlassPointers::shift());
 6985   %}
 6986   ins_pipe(pipe_class_default);
 6987 %}
 6988 
 6989 // Add node for expand.
 6990 instruct encodePKlass_sub_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{
 6991   // The match rule is needed to make it a 'MachTypeNode'!
 6992   match(Set dst (EncodePKlass (Binary base src)));
 6993   predicate(false);
 6994 
 6995   format %{ "SUB     $dst, $base, $src \t// encode" %}
 6996   size(4);
 6997   ins_encode %{
 6998     __ subf($dst$$Register, $base$$Register, $src$$Register);
 6999   %}
 7000   ins_pipe(pipe_class_default);
 7001 %}
 7002 
 7003 // Disjoint narrow oop base.
 7004 instruct encodePKlass_Disjoint(iRegNdst dst, iRegPsrc src) %{
 7005   match(Set dst (EncodePKlass src));
 7006   predicate(false /* TODO: PPC port CompressedKlassPointers::base_disjoint()*/);
 7007 
 7008   format %{ "EXTRDI  $dst, $src, #32, #3 \t// encode with disjoint base" %}
 7009   size(4);
 7010   ins_encode %{
 7011     __ rldicl($dst$$Register, $src$$Register, 64-CompressedKlassPointers::shift(), 32);
 7012   %}
 7013   ins_pipe(pipe_class_default);
 7014 %}
 7015 
 7016 // shift != 0, base != 0
 7017 instruct encodePKlass_not_null_Ex(iRegNdst dst, iRegLsrc base, iRegPsrc src) %{
 7018   match(Set dst (EncodePKlass (Binary base src)));
 7019   predicate(false);
 7020 
 7021   format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %}
 7022   postalloc_expand %{
 7023     encodePKlass_sub_baseNode *n1 = new encodePKlass_sub_baseNode();
 7024     n1->add_req(n_region, n_base, n_src);
 7025     n1->_opnds[0] = op_dst;
 7026     n1->_opnds[1] = op_base;
 7027     n1->_opnds[2] = op_src;
 7028     n1->_bottom_type = _bottom_type;
 7029 
 7030     encodePKlass_shiftNode *n2 = new encodePKlass_shiftNode();
 7031     n2->add_req(n_region, n1);
 7032     n2->_opnds[0] = op_dst;
 7033     n2->_opnds[1] = op_dst;
 7034     n2->_bottom_type = _bottom_type;
 7035     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 7036     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 7037 
 7038     nodes->push(n1);
 7039     nodes->push(n2);
 7040   %}
 7041 %}
 7042 
 7043 // shift != 0, base != 0
 7044 instruct encodePKlass_not_null_ExEx(iRegNdst dst, iRegPsrc src) %{
 7045   match(Set dst (EncodePKlass src));
 7046   //predicate(CompressedKlassPointers::shift() != 0 &&
 7047   //          true /* TODO: PPC port CompressedKlassPointers::base_overlaps()*/);
 7048 
 7049   //format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %}
 7050   ins_cost(DEFAULT_COST*2);  // Don't count constant.
 7051   expand %{
 7052     immL baseImm %{ (jlong)(intptr_t)CompressedKlassPointers::base() %}
 7053     iRegLdst base;
 7054     loadConL_Ex(base, baseImm);
 7055     encodePKlass_not_null_Ex(dst, base, src);
 7056   %}
 7057 %}
 7058 
 7059 // Decode nodes.
 7060 
 7061 // Shift node for expand.
 7062 instruct decodeNKlass_shift(iRegPdst dst, iRegPsrc src) %{
 7063   // The match rule is needed to make it a 'MachTypeNode'!
 7064   match(Set dst (DecodeNKlass src));
 7065   predicate(false);
 7066 
 7067   format %{ "SLDI    $dst, $src, #3 \t// DecodeNKlass" %}
 7068   size(4);
 7069   ins_encode %{
 7070     __ sldi($dst$$Register, $src$$Register, CompressedKlassPointers::shift());
 7071   %}
 7072   ins_pipe(pipe_class_default);
 7073 %}
 7074 
 7075 // Add node for expand.
 7076 
 7077 instruct decodeNKlass_add_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{
 7078   // The match rule is needed to make it a 'MachTypeNode'!
 7079   match(Set dst (DecodeNKlass (Binary base src)));
 7080   predicate(false);
 7081 
 7082   format %{ "ADD     $dst, $base, $src \t// DecodeNKlass, add klass base" %}
 7083   size(4);
 7084   ins_encode %{
 7085     __ add($dst$$Register, $base$$Register, $src$$Register);
 7086   %}
 7087   ins_pipe(pipe_class_default);
 7088 %}
 7089 
 7090 // src != 0, shift != 0, base != 0
 7091 instruct decodeNKlass_notNull_addBase_Ex(iRegPdst dst, iRegLsrc base, iRegNsrc src) %{
 7092   match(Set dst (DecodeNKlass (Binary base src)));
 7093   //effect(kill src); // We need a register for the immediate result after shifting.
 7094   predicate(false);
 7095 
 7096   format %{ "DecodeNKlass $dst =  $base + ($src << 3) \t// $src != NULL, postalloc expanded" %}
 7097   postalloc_expand %{
 7098     decodeNKlass_add_baseNode *n1 = new decodeNKlass_add_baseNode();
 7099     n1->add_req(n_region, n_base, n_src);
 7100     n1->_opnds[0] = op_dst;
 7101     n1->_opnds[1] = op_base;
 7102     n1->_opnds[2] = op_src;
 7103     n1->_bottom_type = _bottom_type;
 7104 
 7105     decodeNKlass_shiftNode *n2 = new decodeNKlass_shiftNode();
 7106     n2->add_req(n_region, n1);
 7107     n2->_opnds[0] = op_dst;
 7108     n2->_opnds[1] = op_dst;
 7109     n2->_bottom_type = _bottom_type;
 7110 
 7111     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 7112     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 7113 
 7114     nodes->push(n1);
 7115     nodes->push(n2);
 7116   %}
 7117 %}
 7118 
 7119 // src != 0, shift != 0, base != 0
 7120 instruct decodeNKlass_notNull_addBase_ExEx(iRegPdst dst, iRegNsrc src) %{
 7121   match(Set dst (DecodeNKlass src));
 7122   // predicate(CompressedKlassPointers::shift() != 0 &&
 7123   //           CompressedKlassPointers::base() != 0);
 7124 
 7125   //format %{ "DecodeNKlass $dst, $src \t// $src != NULL, expanded" %}
 7126 
 7127   ins_cost(DEFAULT_COST*2);  // Don't count constant.
 7128   expand %{
 7129     // We add first, then we shift. Like this, we can get along with one register less.
 7130     // But we have to load the base pre-shifted.
 7131     immL baseImm %{ (jlong)((intptr_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift()) %}
 7132     iRegLdst base;
 7133     loadConL_Ex(base, baseImm);
 7134     decodeNKlass_notNull_addBase_Ex(dst, base, src);
 7135   %}
 7136 %}
 7137 
 7138 //----------MemBar Instructions-----------------------------------------------
 7139 // Memory barrier flavors
 7140 
 7141 instruct membar_acquire() %{
 7142   match(LoadFence);
 7143   ins_cost(4*MEMORY_REF_COST);
 7144 
 7145   format %{ "MEMBAR-acquire" %}
 7146   size(4);
 7147   ins_encode %{
 7148     __ acquire();
 7149   %}
 7150   ins_pipe(pipe_class_default);
 7151 %}
 7152 
 7153 instruct unnecessary_membar_acquire() %{
 7154   match(MemBarAcquire);
 7155   ins_cost(0);
 7156 
 7157   format %{ " -- \t// redundant MEMBAR-acquire - empty" %}
 7158   size(0);
 7159   ins_encode( /*empty*/ );
 7160   ins_pipe(pipe_class_default);
 7161 %}
 7162 
 7163 instruct membar_acquire_lock() %{
 7164   match(MemBarAcquireLock);
 7165   ins_cost(0);
 7166 
 7167   format %{ " -- \t// redundant MEMBAR-acquire - empty (acquire as part of CAS in prior FastLock)" %}
 7168   size(0);
 7169   ins_encode( /*empty*/ );
 7170   ins_pipe(pipe_class_default);
 7171 %}
 7172 
 7173 instruct membar_release() %{
 7174   match(MemBarRelease);
 7175   match(StoreFence);
 7176   ins_cost(4*MEMORY_REF_COST);
 7177 
 7178   format %{ "MEMBAR-release" %}
 7179   size(4);
 7180   ins_encode %{
 7181     __ release();
 7182   %}
 7183   ins_pipe(pipe_class_default);
 7184 %}
 7185 
 7186 instruct membar_storestore() %{
 7187   match(MemBarStoreStore);
 7188   match(StoreStoreFence);
 7189   ins_cost(4*MEMORY_REF_COST);
 7190 
 7191   format %{ "MEMBAR-store-store" %}
 7192   size(4);
 7193   ins_encode %{
 7194     __ membar(Assembler::StoreStore);
 7195   %}
 7196   ins_pipe(pipe_class_default);
 7197 %}
 7198 
 7199 instruct membar_release_lock() %{
 7200   match(MemBarReleaseLock);
 7201   ins_cost(0);
 7202 
 7203   format %{ " -- \t// redundant MEMBAR-release - empty (release in FastUnlock)" %}
 7204   size(0);
 7205   ins_encode( /*empty*/ );
 7206   ins_pipe(pipe_class_default);
 7207 %}
 7208 
 7209 instruct membar_volatile() %{
 7210   match(MemBarVolatile);
 7211   ins_cost(4*MEMORY_REF_COST);
 7212 
 7213   format %{ "MEMBAR-volatile" %}
 7214   size(4);
 7215   ins_encode %{
 7216     __ fence();
 7217   %}
 7218   ins_pipe(pipe_class_default);
 7219 %}
 7220 
 7221 // This optimization is wrong on PPC. The following pattern is not supported:
 7222 //  MemBarVolatile
 7223 //   ^        ^
 7224 //   |        |
 7225 //  CtrlProj MemProj
 7226 //   ^        ^
 7227 //   |        |
 7228 //   |       Load
 7229 //   |
 7230 //  MemBarVolatile
 7231 //
 7232 //  The first MemBarVolatile could get optimized out! According to
 7233 //  Vladimir, this pattern can not occur on Oracle platforms.
 7234 //  However, it does occur on PPC64 (because of membars in
 7235 //  inline_unsafe_load_store).
 7236 //
 7237 // Add this node again if we found a good solution for inline_unsafe_load_store().
 7238 // Don't forget to look at the implementation of post_store_load_barrier again,
 7239 // we did other fixes in that method.
 7240 //instruct unnecessary_membar_volatile() %{
 7241 //  match(MemBarVolatile);
 7242 //  predicate(Matcher::post_store_load_barrier(n));
 7243 //  ins_cost(0);
 7244 //
 7245 //  format %{ " -- \t// redundant MEMBAR-volatile - empty" %}
 7246 //  size(0);
 7247 //  ins_encode( /*empty*/ );
 7248 //  ins_pipe(pipe_class_default);
 7249 //%}
 7250 
 7251 instruct membar_CPUOrder() %{
 7252   match(MemBarCPUOrder);
 7253   ins_cost(0);
 7254 
 7255   format %{ " -- \t// MEMBAR-CPUOrder - empty: PPC64 processors are self-consistent." %}
 7256   size(0);
 7257   ins_encode( /*empty*/ );
 7258   ins_pipe(pipe_class_default);
 7259 %}
 7260 
 7261 //----------Conditional Move---------------------------------------------------
 7262 
 7263 // Cmove using isel.
 7264 instruct cmovI_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
 7265   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
 7266   predicate(VM_Version::has_isel());
 7267   ins_cost(DEFAULT_COST);
 7268 
 7269   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7270   size(4);
 7271   ins_encode %{
 7272     // This is a Power7 instruction for which no machine description
 7273     // exists. Anyways, the scheduler should be off on Power7.
 7274     int cc        = $cmp$$cmpcode;
 7275     __ isel($dst$$Register, $crx$$CondRegister,
 7276             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7277   %}
 7278   ins_pipe(pipe_class_default);
 7279 %}
 7280 
 7281 instruct cmovI_reg(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
 7282   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
 7283   predicate(!VM_Version::has_isel());
 7284   ins_cost(DEFAULT_COST+BRANCH_COST);
 7285 
 7286   ins_variable_size_depending_on_alignment(true);
 7287 
 7288   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7289   // Worst case is branch + move + stop, no stop without scheduler
 7290   size(8);
 7291   ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
 7292   ins_pipe(pipe_class_default);
 7293 %}
 7294 
 7295 instruct cmovI_imm(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, immI16 src) %{
 7296   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
 7297   ins_cost(DEFAULT_COST+BRANCH_COST);
 7298 
 7299   ins_variable_size_depending_on_alignment(true);
 7300 
 7301   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7302   // Worst case is branch + move + stop, no stop without scheduler
 7303   size(8);
 7304   ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
 7305   ins_pipe(pipe_class_default);
 7306 %}
 7307 
 7308 // Cmove using isel.
 7309 instruct cmovL_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
 7310   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
 7311   predicate(VM_Version::has_isel());
 7312   ins_cost(DEFAULT_COST);
 7313 
 7314   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7315   size(4);
 7316   ins_encode %{
 7317     // This is a Power7 instruction for which no machine description
 7318     // exists. Anyways, the scheduler should be off on Power7.
 7319     int cc        = $cmp$$cmpcode;
 7320     __ isel($dst$$Register, $crx$$CondRegister,
 7321             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7322   %}
 7323   ins_pipe(pipe_class_default);
 7324 %}
 7325 
 7326 instruct cmovL_reg(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
 7327   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
 7328   predicate(!VM_Version::has_isel());
 7329   ins_cost(DEFAULT_COST+BRANCH_COST);
 7330 
 7331   ins_variable_size_depending_on_alignment(true);
 7332 
 7333   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7334   // Worst case is branch + move + stop, no stop without scheduler.
 7335   size(8);
 7336   ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
 7337   ins_pipe(pipe_class_default);
 7338 %}
 7339 
 7340 instruct cmovL_imm(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, immL16 src) %{
 7341   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
 7342   ins_cost(DEFAULT_COST+BRANCH_COST);
 7343 
 7344   ins_variable_size_depending_on_alignment(true);
 7345 
 7346   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7347   // Worst case is branch + move + stop, no stop without scheduler.
 7348   size(8);
 7349   ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
 7350   ins_pipe(pipe_class_default);
 7351 %}
 7352 
 7353 // Cmove using isel.
 7354 instruct cmovN_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
 7355   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
 7356   predicate(VM_Version::has_isel());
 7357   ins_cost(DEFAULT_COST);
 7358 
 7359   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7360   size(4);
 7361   ins_encode %{
 7362     // This is a Power7 instruction for which no machine description
 7363     // exists. Anyways, the scheduler should be off on Power7.
 7364     int cc        = $cmp$$cmpcode;
 7365     __ isel($dst$$Register, $crx$$CondRegister,
 7366             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7367   %}
 7368   ins_pipe(pipe_class_default);
 7369 %}
 7370 
 7371 // Conditional move for RegN. Only cmov(reg, reg).
 7372 instruct cmovN_reg(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
 7373   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
 7374   predicate(!VM_Version::has_isel());
 7375   ins_cost(DEFAULT_COST+BRANCH_COST);
 7376 
 7377   ins_variable_size_depending_on_alignment(true);
 7378 
 7379   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7380   // Worst case is branch + move + stop, no stop without scheduler.
 7381   size(8);
 7382   ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
 7383   ins_pipe(pipe_class_default);
 7384 %}
 7385 
 7386 instruct cmovN_imm(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, immN_0 src) %{
 7387   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
 7388   ins_cost(DEFAULT_COST+BRANCH_COST);
 7389 
 7390   ins_variable_size_depending_on_alignment(true);
 7391 
 7392   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7393   // Worst case is branch + move + stop, no stop without scheduler.
 7394   size(8);
 7395   ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
 7396   ins_pipe(pipe_class_default);
 7397 %}
 7398 
 7399 // Cmove using isel.
 7400 instruct cmovP_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegPsrc src) %{
 7401   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
 7402   predicate(VM_Version::has_isel());
 7403   ins_cost(DEFAULT_COST);
 7404 
 7405   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7406   size(4);
 7407   ins_encode %{
 7408     // This is a Power7 instruction for which no machine description
 7409     // exists. Anyways, the scheduler should be off on Power7.
 7410     int cc        = $cmp$$cmpcode;
 7411     __ isel($dst$$Register, $crx$$CondRegister,
 7412             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7413   %}
 7414   ins_pipe(pipe_class_default);
 7415 %}
 7416 
 7417 instruct cmovP_reg(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegP_N2P src) %{
 7418   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
 7419   predicate(!VM_Version::has_isel());
 7420   ins_cost(DEFAULT_COST+BRANCH_COST);
 7421 
 7422   ins_variable_size_depending_on_alignment(true);
 7423 
 7424   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7425   // Worst case is branch + move + stop, no stop without scheduler.
 7426   size(8);
 7427   ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
 7428   ins_pipe(pipe_class_default);
 7429 %}
 7430 
 7431 instruct cmovP_imm(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, immP_0 src) %{
 7432   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
 7433   ins_cost(DEFAULT_COST+BRANCH_COST);
 7434 
 7435   ins_variable_size_depending_on_alignment(true);
 7436 
 7437   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7438   // Worst case is branch + move + stop, no stop without scheduler.
 7439   size(8);
 7440   ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
 7441   ins_pipe(pipe_class_default);
 7442 %}
 7443 
 7444 instruct cmovF_reg(cmpOp cmp, flagsRegSrc crx, regF dst, regF src) %{
 7445   match(Set dst (CMoveF (Binary cmp crx) (Binary dst src)));
 7446   ins_cost(DEFAULT_COST+BRANCH_COST);
 7447 
 7448   ins_variable_size_depending_on_alignment(true);
 7449 
 7450   format %{ "CMOVEF  $cmp, $crx, $dst, $src\n\t" %}
 7451   // Worst case is branch + move + stop, no stop without scheduler.
 7452   size(8);
 7453   ins_encode %{
 7454     Label done;
 7455     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 7456     // Branch if not (cmp crx).
 7457     __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
 7458     __ fmr($dst$$FloatRegister, $src$$FloatRegister);
 7459     __ bind(done);
 7460   %}
 7461   ins_pipe(pipe_class_default);
 7462 %}
 7463 
 7464 instruct cmovD_reg(cmpOp cmp, flagsRegSrc crx, regD dst, regD src) %{
 7465   match(Set dst (CMoveD (Binary cmp crx) (Binary dst src)));
 7466   ins_cost(DEFAULT_COST+BRANCH_COST);
 7467 
 7468   ins_variable_size_depending_on_alignment(true);
 7469 
 7470   format %{ "CMOVEF  $cmp, $crx, $dst, $src\n\t" %}
 7471   // Worst case is branch + move + stop, no stop without scheduler.
 7472   size(8);
 7473   ins_encode %{
 7474     Label done;
 7475     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 7476     // Branch if not (cmp crx).
 7477     __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
 7478     __ fmr($dst$$FloatRegister, $src$$FloatRegister);
 7479     __ bind(done);
 7480   %}
 7481   ins_pipe(pipe_class_default);
 7482 %}
 7483 
 7484 //----------Compare-And-Swap---------------------------------------------------
 7485 
 7486 // CompareAndSwap{P,I,L} have more than one output, therefore "CmpI
 7487 // (CompareAndSwap ...)" or "If (CmpI (CompareAndSwap ..))"  cannot be
 7488 // matched.
 7489 
 7490 // Strong versions:
 7491 
 7492 instruct compareAndSwapB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7493   match(Set res (CompareAndSwapB mem_ptr (Binary src1 src2)));
 7494   predicate(VM_Version::has_lqarx());
 7495   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7496   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7497   ins_encode %{
 7498     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7499     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7500                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7501                 $res$$Register, true);
 7502     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7503       __ isync();
 7504     } else {
 7505       __ sync();
 7506     }
 7507   %}
 7508   ins_pipe(pipe_class_default);
 7509 %}
 7510 
 7511 instruct compareAndSwapB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7512   match(Set res (CompareAndSwapB mem_ptr (Binary src1 src2)));
 7513   predicate(!VM_Version::has_lqarx());
 7514   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7515   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7516   ins_encode %{
 7517     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7518     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7519                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7520                 $res$$Register, true);
 7521     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7522       __ isync();
 7523     } else {
 7524       __ sync();
 7525     }
 7526   %}
 7527   ins_pipe(pipe_class_default);
 7528 %}
 7529 
 7530 instruct compareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7531   match(Set res (CompareAndSwapS mem_ptr (Binary src1 src2)));
 7532   predicate(VM_Version::has_lqarx());
 7533   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7534   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7535   ins_encode %{
 7536     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7537     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7538                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7539                 $res$$Register, true);
 7540     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7541       __ isync();
 7542     } else {
 7543       __ sync();
 7544     }
 7545   %}
 7546   ins_pipe(pipe_class_default);
 7547 %}
 7548 
 7549 instruct compareAndSwapS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7550   match(Set res (CompareAndSwapS mem_ptr (Binary src1 src2)));
 7551   predicate(!VM_Version::has_lqarx());
 7552   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7553   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7554   ins_encode %{
 7555     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7556     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7557                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7558                 $res$$Register, true);
 7559     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7560       __ isync();
 7561     } else {
 7562       __ sync();
 7563     }
 7564   %}
 7565   ins_pipe(pipe_class_default);
 7566 %}
 7567 
 7568 instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7569   match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2)));
 7570   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7571   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7572   ins_encode %{
 7573     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7574     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7575                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7576                 $res$$Register, true);
 7577     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7578       __ isync();
 7579     } else {
 7580       __ sync();
 7581     }
 7582   %}
 7583   ins_pipe(pipe_class_default);
 7584 %}
 7585 
 7586 instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7587   match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2)));
 7588   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7589   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7590   ins_encode %{
 7591     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7592     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7593                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7594                 $res$$Register, true);
 7595     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7596       __ isync();
 7597     } else {
 7598       __ sync();
 7599     }
 7600   %}
 7601   ins_pipe(pipe_class_default);
 7602 %}
 7603 
 7604 instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7605   match(Set res (CompareAndSwapL mem_ptr (Binary src1 src2)));
 7606   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7607   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
 7608   ins_encode %{
 7609     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7610     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7611                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7612                 $res$$Register, NULL, true);
 7613     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7614       __ isync();
 7615     } else {
 7616       __ sync();
 7617     }
 7618   %}
 7619   ins_pipe(pipe_class_default);
 7620 %}
 7621 
 7622 instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7623   match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2)));
 7624   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7625   predicate(n->as_LoadStore()->barrier_data() == 0);
 7626   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
 7627   ins_encode %{
 7628     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7629     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7630                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7631                 $res$$Register, NULL, true);
 7632     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7633       __ isync();
 7634     } else {
 7635       __ sync();
 7636     }
 7637   %}
 7638   ins_pipe(pipe_class_default);
 7639 %}
 7640 
 7641 // Weak versions:
 7642 
 7643 instruct weakCompareAndSwapB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7644   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7645   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
 7646   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7647   format %{ "weak CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7648   ins_encode %{
 7649     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7650     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7651                 MacroAssembler::MemBarNone,
 7652                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7653   %}
 7654   ins_pipe(pipe_class_default);
 7655 %}
 7656 
 7657 instruct weakCompareAndSwapB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7658   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7659   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
 7660   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7661   format %{ "weak CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7662   ins_encode %{
 7663     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7664     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7665                 MacroAssembler::MemBarNone,
 7666                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7667   %}
 7668   ins_pipe(pipe_class_default);
 7669 %}
 7670 
 7671 instruct weakCompareAndSwapB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7672   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7673   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
 7674   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7675   format %{ "weak CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7676   ins_encode %{
 7677     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7678     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7679                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7680                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7681   %}
 7682   ins_pipe(pipe_class_default);
 7683 %}
 7684 
 7685 instruct weakCompareAndSwapB4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7686   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7687   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
 7688   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7689   format %{ "weak CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7690   ins_encode %{
 7691     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7692     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7693                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7694                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7695   %}
 7696   ins_pipe(pipe_class_default);
 7697 %}
 7698 
 7699 instruct weakCompareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7700   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7701   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
 7702   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7703   format %{ "weak CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7704   ins_encode %{
 7705     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7706     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7707                 MacroAssembler::MemBarNone,
 7708                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7709   %}
 7710   ins_pipe(pipe_class_default);
 7711 %}
 7712 
 7713 instruct weakCompareAndSwapS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7714   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7715   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
 7716   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7717   format %{ "weak CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7718   ins_encode %{
 7719     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7720     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7721                 MacroAssembler::MemBarNone,
 7722                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7723   %}
 7724   ins_pipe(pipe_class_default);
 7725 %}
 7726 
 7727 instruct weakCompareAndSwapS_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7728   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7729   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
 7730   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7731   format %{ "weak CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7732   ins_encode %{
 7733     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7734     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7735                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7736                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7737   %}
 7738   ins_pipe(pipe_class_default);
 7739 %}
 7740 
 7741 instruct weakCompareAndSwapS4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7742   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7743   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
 7744   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7745   format %{ "weak CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7746   ins_encode %{
 7747     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7748     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7749                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7750                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7751   %}
 7752   ins_pipe(pipe_class_default);
 7753 %}
 7754 
 7755 instruct weakCompareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7756   match(Set res (WeakCompareAndSwapI mem_ptr (Binary src1 src2)));
 7757   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7758   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7759   format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7760   ins_encode %{
 7761     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7762     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7763                 MacroAssembler::MemBarNone,
 7764                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7765   %}
 7766   ins_pipe(pipe_class_default);
 7767 %}
 7768 
 7769 instruct weakCompareAndSwapI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7770   match(Set res (WeakCompareAndSwapI mem_ptr (Binary src1 src2)));
 7771   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7772   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7773   format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7774   ins_encode %{
 7775     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7776     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7777     // value is never passed to caller.
 7778     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7779                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7780                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7781   %}
 7782   ins_pipe(pipe_class_default);
 7783 %}
 7784 
 7785 instruct weakCompareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7786   match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
 7787   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7788   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7789   format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7790   ins_encode %{
 7791     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7792     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7793                 MacroAssembler::MemBarNone,
 7794                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7795   %}
 7796   ins_pipe(pipe_class_default);
 7797 %}
 7798 
 7799 instruct weakCompareAndSwapN_acq_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7800   match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
 7801   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7802   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7803   format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7804   ins_encode %{
 7805     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7806     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7807     // value is never passed to caller.
 7808     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7809                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7810                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7811   %}
 7812   ins_pipe(pipe_class_default);
 7813 %}
 7814 
 7815 instruct weakCompareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7816   match(Set res (WeakCompareAndSwapL mem_ptr (Binary src1 src2)));
 7817   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7818   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7819   format %{ "weak CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
 7820   ins_encode %{
 7821     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7822     // value is never passed to caller.
 7823     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7824                 MacroAssembler::MemBarNone,
 7825                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true);
 7826   %}
 7827   ins_pipe(pipe_class_default);
 7828 %}
 7829 
 7830 instruct weakCompareAndSwapL_acq_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7831   match(Set res (WeakCompareAndSwapL mem_ptr (Binary src1 src2)));
 7832   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7833   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7834   format %{ "weak CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7835   ins_encode %{
 7836     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7837     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7838     // value is never passed to caller.
 7839     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7840                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7841                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true);
 7842   %}
 7843   ins_pipe(pipe_class_default);
 7844 %}
 7845 
 7846 instruct weakCompareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7847   match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2)));
 7848   predicate((((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0);
 7849   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7850   format %{ "weak CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
 7851   ins_encode %{
 7852     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7853     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7854                 MacroAssembler::MemBarNone,
 7855                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true);
 7856   %}
 7857   ins_pipe(pipe_class_default);
 7858 %}
 7859 
 7860 instruct weakCompareAndSwapP_acq_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7861   match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2)));
 7862   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0);
 7863   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7864   format %{ "weak CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
 7865   ins_encode %{
 7866     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7867     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7868     // value is never passed to caller.
 7869     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7870                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7871                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true);
 7872   %}
 7873   ins_pipe(pipe_class_default);
 7874 %}
 7875 
 7876 // CompareAndExchange
 7877 
 7878 instruct compareAndExchangeB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7879   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7880   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
 7881   effect(TEMP_DEF res, TEMP cr0);
 7882   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as int" %}
 7883   ins_encode %{
 7884     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7885     __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7886                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7887                 noreg, true);
 7888   %}
 7889   ins_pipe(pipe_class_default);
 7890 %}
 7891 
 7892 instruct compareAndExchangeB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
 7893   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7894   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
 7895   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
 7896   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as int" %}
 7897   ins_encode %{
 7898     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7899     __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
 7900                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7901                 noreg, true);
 7902   %}
 7903   ins_pipe(pipe_class_default);
 7904 %}
 7905 
 7906 instruct compareAndExchangeB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7907   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7908   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
 7909   effect(TEMP_DEF res, TEMP cr0);
 7910   format %{ "CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as int" %}
 7911   ins_encode %{
 7912     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7913     __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7914                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7915                 noreg, true);
 7916     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7917       __ isync();
 7918     } else {
 7919       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 7920       __ sync();
 7921     }
 7922   %}
 7923   ins_pipe(pipe_class_default);
 7924 %}
 7925 
 7926 instruct compareAndExchangeB4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
 7927   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7928   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
 7929   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
 7930   format %{ "CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as int" %}
 7931   ins_encode %{
 7932     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7933     __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
 7934                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7935                 noreg, true);
 7936     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7937       __ isync();
 7938     } else {
 7939       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 7940       __ sync();
 7941     }
 7942   %}
 7943   ins_pipe(pipe_class_default);
 7944 %}
 7945 
 7946 instruct compareAndExchangeS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7947   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 7948   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
 7949   effect(TEMP_DEF res, TEMP cr0);
 7950   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as int" %}
 7951   ins_encode %{
 7952     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7953     __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7954                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7955                 noreg, true);
 7956   %}
 7957   ins_pipe(pipe_class_default);
 7958 %}
 7959 
 7960 instruct compareAndExchangeS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
 7961   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 7962   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
 7963   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
 7964   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as int" %}
 7965   ins_encode %{
 7966     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7967     __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
 7968                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7969                 noreg, true);
 7970   %}
 7971   ins_pipe(pipe_class_default);
 7972 %}
 7973 
 7974 instruct compareAndExchangeS_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7975   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 7976   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
 7977   effect(TEMP_DEF res, TEMP cr0);
 7978   format %{ "CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as int" %}
 7979   ins_encode %{
 7980     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7981     __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7982                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7983                 noreg, true);
 7984     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7985       __ isync();
 7986     } else {
 7987       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 7988       __ sync();
 7989     }
 7990   %}
 7991   ins_pipe(pipe_class_default);
 7992 %}
 7993 
 7994 instruct compareAndExchangeS4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
 7995   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 7996   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
 7997   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
 7998   format %{ "CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as int" %}
 7999   ins_encode %{
 8000     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8001     __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
 8002                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8003                 noreg, true);
 8004     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8005       __ isync();
 8006     } else {
 8007       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8008       __ sync();
 8009     }
 8010   %}
 8011   ins_pipe(pipe_class_default);
 8012 %}
 8013 
 8014 instruct compareAndExchangeI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 8015   match(Set res (CompareAndExchangeI mem_ptr (Binary src1 src2)));
 8016   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 8017   effect(TEMP_DEF res, TEMP cr0);
 8018   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as int" %}
 8019   ins_encode %{
 8020     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8021     __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8022                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8023                 noreg, true);
 8024   %}
 8025   ins_pipe(pipe_class_default);
 8026 %}
 8027 
 8028 instruct compareAndExchangeI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 8029   match(Set res (CompareAndExchangeI mem_ptr (Binary src1 src2)));
 8030   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 8031   effect(TEMP_DEF res, TEMP cr0);
 8032   format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as int" %}
 8033   ins_encode %{
 8034     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8035     __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8036                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8037                 noreg, true);
 8038     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8039       __ isync();
 8040     } else {
 8041       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8042       __ sync();
 8043     }
 8044   %}
 8045   ins_pipe(pipe_class_default);
 8046 %}
 8047 
 8048 instruct compareAndExchangeN_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 8049   match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2)));
 8050   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 8051   effect(TEMP_DEF res, TEMP cr0);
 8052   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as narrow oop" %}
 8053   ins_encode %{
 8054     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8055     __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8056                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8057                 noreg, true);
 8058   %}
 8059   ins_pipe(pipe_class_default);
 8060 %}
 8061 
 8062 instruct compareAndExchangeN_acq_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 8063   match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2)));
 8064   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 8065   effect(TEMP_DEF res, TEMP cr0);
 8066   format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as narrow oop" %}
 8067   ins_encode %{
 8068     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8069     __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8070                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8071                 noreg, true);
 8072     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8073       __ isync();
 8074     } else {
 8075       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8076       __ sync();
 8077     }
 8078   %}
 8079   ins_pipe(pipe_class_default);
 8080 %}
 8081 
 8082 instruct compareAndExchangeL_regP_regL_regL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 8083   match(Set res (CompareAndExchangeL mem_ptr (Binary src1 src2)));
 8084   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 8085   effect(TEMP_DEF res, TEMP cr0);
 8086   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as long" %}
 8087   ins_encode %{
 8088     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8089     __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8090                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8091                 noreg, NULL, true);
 8092   %}
 8093   ins_pipe(pipe_class_default);
 8094 %}
 8095 
 8096 instruct compareAndExchangeL_acq_regP_regL_regL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 8097   match(Set res (CompareAndExchangeL mem_ptr (Binary src1 src2)));
 8098   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 8099   effect(TEMP_DEF res, TEMP cr0);
 8100   format %{ "CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as long" %}
 8101   ins_encode %{
 8102     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8103     __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8104                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8105                 noreg, NULL, true);
 8106     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8107       __ isync();
 8108     } else {
 8109       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8110       __ sync();
 8111     }
 8112   %}
 8113   ins_pipe(pipe_class_default);
 8114 %}
 8115 
 8116 instruct compareAndExchangeP_regP_regP_regP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 8117   match(Set res (CompareAndExchangeP mem_ptr (Binary src1 src2)));
 8118   predicate((((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst)
 8119             && n->as_LoadStore()->barrier_data() == 0);
 8120   effect(TEMP_DEF res, TEMP cr0);
 8121   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as ptr; ptr" %}
 8122   ins_encode %{
 8123     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8124     __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8125                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8126                 noreg, NULL, true);
 8127   %}
 8128   ins_pipe(pipe_class_default);
 8129 %}
 8130 
 8131 instruct compareAndExchangeP_acq_regP_regP_regP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 8132   match(Set res (CompareAndExchangeP mem_ptr (Binary src1 src2)));
 8133   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst)
 8134             && n->as_LoadStore()->barrier_data() == 0);
 8135   effect(TEMP_DEF res, TEMP cr0);
 8136   format %{ "CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as ptr; ptr" %}
 8137   ins_encode %{
 8138     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8139     __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8140                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8141                 noreg, NULL, true);
 8142     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8143       __ isync();
 8144     } else {
 8145       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8146       __ sync();
 8147     }
 8148   %}
 8149   ins_pipe(pipe_class_default);
 8150 %}
 8151 
 8152 // Special RMW
 8153 
 8154 instruct getAndAddB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8155   match(Set res (GetAndAddB mem_ptr src));
 8156   predicate(VM_Version::has_lqarx());
 8157   effect(TEMP_DEF res, TEMP cr0);
 8158   format %{ "GetAndAddB $res, $mem_ptr, $src" %}
 8159   ins_encode %{
 8160     __ getandaddb($res$$Register, $src$$Register, $mem_ptr$$Register,
 8161                   R0, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 8162     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8163       __ isync();
 8164     } else {
 8165       __ sync();
 8166     }
 8167   %}
 8168   ins_pipe(pipe_class_default);
 8169 %}
 8170 
 8171 instruct getAndAddB4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
 8172   match(Set res (GetAndAddB mem_ptr src));
 8173   predicate(!VM_Version::has_lqarx());
 8174   effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
 8175   format %{ "GetAndAddB $res, $mem_ptr, $src" %}
 8176   ins_encode %{
 8177     __ getandaddb($res$$Register, $src$$Register, $mem_ptr$$Register,
 8178                   R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 8179     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8180       __ isync();
 8181     } else {
 8182       __ sync();
 8183     }
 8184   %}
 8185   ins_pipe(pipe_class_default);
 8186 %}
 8187 
 8188 instruct getAndAddS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8189   match(Set res (GetAndAddS mem_ptr src));
 8190   predicate(VM_Version::has_lqarx());
 8191   effect(TEMP_DEF res, TEMP cr0);
 8192   format %{ "GetAndAddS $res, $mem_ptr, $src" %}
 8193   ins_encode %{
 8194     __ getandaddh($res$$Register, $src$$Register, $mem_ptr$$Register,
 8195                   R0, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 8196     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8197       __ isync();
 8198     } else {
 8199       __ sync();
 8200     }
 8201   %}
 8202   ins_pipe(pipe_class_default);
 8203 %}
 8204 
 8205 instruct getAndAddS4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
 8206   match(Set res (GetAndAddS mem_ptr src));
 8207   predicate(!VM_Version::has_lqarx());
 8208   effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
 8209   format %{ "GetAndAddS $res, $mem_ptr, $src" %}
 8210   ins_encode %{
 8211     __ getandaddh($res$$Register, $src$$Register, $mem_ptr$$Register,
 8212                   R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 8213     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8214       __ isync();
 8215     } else {
 8216       __ sync();
 8217     }
 8218   %}
 8219   ins_pipe(pipe_class_default);
 8220 %}
 8221 
 8222 instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8223   match(Set res (GetAndAddI mem_ptr src));
 8224   effect(TEMP_DEF res, TEMP cr0);
 8225   format %{ "GetAndAddI $res, $mem_ptr, $src" %}
 8226   ins_encode %{
 8227     __ getandaddw($res$$Register, $src$$Register, $mem_ptr$$Register,
 8228                   R0, MacroAssembler::cmpxchgx_hint_atomic_update());
 8229     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8230       __ isync();
 8231     } else {
 8232       __ sync();
 8233     }
 8234   %}
 8235   ins_pipe(pipe_class_default);
 8236 %}
 8237 
 8238 instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
 8239   match(Set res (GetAndAddL mem_ptr src));
 8240   effect(TEMP_DEF res, TEMP cr0);
 8241   format %{ "GetAndAddL $res, $mem_ptr, $src" %}
 8242   ins_encode %{
 8243     __ getandaddd($res$$Register, $src$$Register, $mem_ptr$$Register,
 8244                   R0, MacroAssembler::cmpxchgx_hint_atomic_update());
 8245     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8246       __ isync();
 8247     } else {
 8248       __ sync();
 8249     }
 8250   %}
 8251   ins_pipe(pipe_class_default);
 8252 %}
 8253 
 8254 instruct getAndSetB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8255   match(Set res (GetAndSetB mem_ptr src));
 8256   predicate(VM_Version::has_lqarx());
 8257   effect(TEMP_DEF res, TEMP cr0);
 8258   format %{ "GetAndSetB $res, $mem_ptr, $src" %}
 8259   ins_encode %{
 8260     __ getandsetb($res$$Register, $src$$Register, $mem_ptr$$Register,
 8261                   noreg, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 8262     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8263       __ isync();
 8264     } else {
 8265       __ sync();
 8266     }
 8267   %}
 8268   ins_pipe(pipe_class_default);
 8269 %}
 8270 
 8271 instruct getAndSetB4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
 8272   match(Set res (GetAndSetB mem_ptr src));
 8273   predicate(!VM_Version::has_lqarx());
 8274   effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
 8275   format %{ "GetAndSetB $res, $mem_ptr, $src" %}
 8276   ins_encode %{
 8277     __ getandsetb($res$$Register, $src$$Register, $mem_ptr$$Register,
 8278                   R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 8279     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8280       __ isync();
 8281     } else {
 8282       __ sync();
 8283     }
 8284   %}
 8285   ins_pipe(pipe_class_default);
 8286 %}
 8287 
 8288 instruct getAndSetS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8289   match(Set res (GetAndSetS mem_ptr src));
 8290   predicate(VM_Version::has_lqarx());
 8291   effect(TEMP_DEF res, TEMP cr0);
 8292   format %{ "GetAndSetS $res, $mem_ptr, $src" %}
 8293   ins_encode %{
 8294     __ getandseth($res$$Register, $src$$Register, $mem_ptr$$Register,
 8295                   noreg, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 8296     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8297       __ isync();
 8298     } else {
 8299       __ sync();
 8300     }
 8301   %}
 8302   ins_pipe(pipe_class_default);
 8303 %}
 8304 
 8305 instruct getAndSetS4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
 8306   match(Set res (GetAndSetS mem_ptr src));
 8307   predicate(!VM_Version::has_lqarx());
 8308   effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
 8309   format %{ "GetAndSetS $res, $mem_ptr, $src" %}
 8310   ins_encode %{
 8311     __ getandseth($res$$Register, $src$$Register, $mem_ptr$$Register,
 8312                   R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 8313     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8314       __ isync();
 8315     } else {
 8316       __ sync();
 8317     }
 8318   %}
 8319   ins_pipe(pipe_class_default);
 8320 %}
 8321 
 8322 instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8323   match(Set res (GetAndSetI mem_ptr src));
 8324   effect(TEMP_DEF res, TEMP cr0);
 8325   format %{ "GetAndSetI $res, $mem_ptr, $src" %}
 8326   ins_encode %{
 8327     __ getandsetw($res$$Register, $src$$Register, $mem_ptr$$Register,
 8328                   MacroAssembler::cmpxchgx_hint_atomic_update());
 8329     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8330       __ isync();
 8331     } else {
 8332       __ sync();
 8333     }
 8334   %}
 8335   ins_pipe(pipe_class_default);
 8336 %}
 8337 
 8338 instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
 8339   match(Set res (GetAndSetL mem_ptr src));
 8340   effect(TEMP_DEF res, TEMP cr0);
 8341   format %{ "GetAndSetL $res, $mem_ptr, $src" %}
 8342   ins_encode %{
 8343     __ getandsetd($res$$Register, $src$$Register, $mem_ptr$$Register,
 8344                   MacroAssembler::cmpxchgx_hint_atomic_update());
 8345     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8346       __ isync();
 8347     } else {
 8348       __ sync();
 8349     }
 8350   %}
 8351   ins_pipe(pipe_class_default);
 8352 %}
 8353 
 8354 instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr0) %{
 8355   match(Set res (GetAndSetP mem_ptr src));
 8356   predicate(n->as_LoadStore()->barrier_data() == 0);
 8357   effect(TEMP_DEF res, TEMP cr0);
 8358   format %{ "GetAndSetP $res, $mem_ptr, $src" %}
 8359   ins_encode %{
 8360     __ getandsetd($res$$Register, $src$$Register, $mem_ptr$$Register,
 8361                   MacroAssembler::cmpxchgx_hint_atomic_update());
 8362     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8363       __ isync();
 8364     } else {
 8365       __ sync();
 8366     }
 8367   %}
 8368   ins_pipe(pipe_class_default);
 8369 %}
 8370 
 8371 instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{
 8372   match(Set res (GetAndSetN mem_ptr src));
 8373   effect(TEMP_DEF res, TEMP cr0);
 8374   format %{ "GetAndSetN $res, $mem_ptr, $src" %}
 8375   ins_encode %{
 8376     __ getandsetw($res$$Register, $src$$Register, $mem_ptr$$Register,
 8377                   MacroAssembler::cmpxchgx_hint_atomic_update());
 8378     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8379       __ isync();
 8380     } else {
 8381       __ sync();
 8382     }
 8383   %}
 8384   ins_pipe(pipe_class_default);
 8385 %}
 8386 
 8387 //----------Arithmetic Instructions--------------------------------------------
 8388 // Addition Instructions
 8389 
 8390 // Register Addition
 8391 instruct addI_reg_reg(iRegIdst dst, iRegIsrc_iRegL2Isrc src1, iRegIsrc_iRegL2Isrc src2) %{
 8392   match(Set dst (AddI src1 src2));
 8393   format %{ "ADD     $dst, $src1, $src2" %}
 8394   size(4);
 8395   ins_encode %{
 8396     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8397   %}
 8398   ins_pipe(pipe_class_default);
 8399 %}
 8400 
 8401 // Expand does not work with above instruct. (??)
 8402 instruct addI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8403   // no match-rule
 8404   effect(DEF dst, USE src1, USE src2);
 8405   format %{ "ADD     $dst, $src1, $src2" %}
 8406   size(4);
 8407   ins_encode %{
 8408     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8409   %}
 8410   ins_pipe(pipe_class_default);
 8411 %}
 8412 
 8413 instruct tree_addI_addI_addI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
 8414   match(Set dst (AddI (AddI (AddI src1 src2) src3) src4));
 8415   ins_cost(DEFAULT_COST*3);
 8416 
 8417   expand %{
 8418     // FIXME: we should do this in the ideal world.
 8419     iRegIdst tmp1;
 8420     iRegIdst tmp2;
 8421     addI_reg_reg(tmp1, src1, src2);
 8422     addI_reg_reg_2(tmp2, src3, src4); // Adlc complains about addI_reg_reg.
 8423     addI_reg_reg(dst, tmp1, tmp2);
 8424   %}
 8425 %}
 8426 
 8427 // Immediate Addition
 8428 instruct addI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
 8429   match(Set dst (AddI src1 src2));
 8430   format %{ "ADDI    $dst, $src1, $src2" %}
 8431   size(4);
 8432   ins_encode %{
 8433     __ addi($dst$$Register, $src1$$Register, $src2$$constant);
 8434   %}
 8435   ins_pipe(pipe_class_default);
 8436 %}
 8437 
 8438 // Immediate Addition with 16-bit shifted operand
 8439 instruct addI_reg_immhi16(iRegIdst dst, iRegIsrc src1, immIhi16 src2) %{
 8440   match(Set dst (AddI src1 src2));
 8441   format %{ "ADDIS   $dst, $src1, $src2" %}
 8442   size(4);
 8443   ins_encode %{
 8444     __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
 8445   %}
 8446   ins_pipe(pipe_class_default);
 8447 %}
 8448 
 8449 // Immediate Addition using prefixed addi
 8450 instruct addI_reg_imm32(iRegIdst dst, iRegIsrc src1, immI32 src2) %{
 8451   match(Set dst (AddI src1 src2));
 8452   predicate(PowerArchitecturePPC64 >= 10);
 8453   ins_cost(DEFAULT_COST+1);
 8454   format %{ "PADDI   $dst, $src1, $src2" %}
 8455   size(8);
 8456   ins_encode %{
 8457     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 8458     __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
 8459   %}
 8460   ins_pipe(pipe_class_default);
 8461   ins_alignment(2);
 8462 %}
 8463 
 8464 // Long Addition
 8465 instruct addL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8466   match(Set dst (AddL src1 src2));
 8467   format %{ "ADD     $dst, $src1, $src2 \t// long" %}
 8468   size(4);
 8469   ins_encode %{
 8470     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8471   %}
 8472   ins_pipe(pipe_class_default);
 8473 %}
 8474 
 8475 // Expand does not work with above instruct. (??)
 8476 instruct addL_reg_reg_2(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8477   // no match-rule
 8478   effect(DEF dst, USE src1, USE src2);
 8479   format %{ "ADD     $dst, $src1, $src2 \t// long" %}
 8480   size(4);
 8481   ins_encode %{
 8482     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8483   %}
 8484   ins_pipe(pipe_class_default);
 8485 %}
 8486 
 8487 instruct tree_addL_addL_addL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, iRegLsrc src3, iRegLsrc src4) %{
 8488   match(Set dst (AddL (AddL (AddL src1 src2) src3) src4));
 8489   ins_cost(DEFAULT_COST*3);
 8490 
 8491   expand %{
 8492     // FIXME: we should do this in the ideal world.
 8493     iRegLdst tmp1;
 8494     iRegLdst tmp2;
 8495     addL_reg_reg(tmp1, src1, src2);
 8496     addL_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg.
 8497     addL_reg_reg(dst, tmp1, tmp2);
 8498   %}
 8499 %}
 8500 
 8501 // AddL + ConvL2I.
 8502 instruct addI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8503   match(Set dst (ConvL2I (AddL src1 src2)));
 8504 
 8505   format %{ "ADD     $dst, $src1, $src2 \t// long + l2i" %}
 8506   size(4);
 8507   ins_encode %{
 8508     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8509   %}
 8510   ins_pipe(pipe_class_default);
 8511 %}
 8512 
 8513 // No constant pool entries required.
 8514 instruct addL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
 8515   match(Set dst (AddL src1 src2));
 8516 
 8517   format %{ "ADDI    $dst, $src1, $src2" %}
 8518   size(4);
 8519   ins_encode %{
 8520     __ addi($dst$$Register, $src1$$Register, $src2$$constant);
 8521   %}
 8522   ins_pipe(pipe_class_default);
 8523 %}
 8524 
 8525 // Long Immediate Addition with 16-bit shifted operand.
 8526 // No constant pool entries required.
 8527 instruct addL_reg_immhi16(iRegLdst dst, iRegLsrc src1, immL32hi16 src2) %{
 8528   match(Set dst (AddL src1 src2));
 8529 
 8530   format %{ "ADDIS   $dst, $src1, $src2" %}
 8531   size(4);
 8532   ins_encode %{
 8533     __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
 8534   %}
 8535   ins_pipe(pipe_class_default);
 8536 %}
 8537 
 8538 // Long Immediate Addition using prefixed addi
 8539 // No constant pool entries required.
 8540 instruct addL_reg_imm34(iRegLdst dst, iRegLsrc src1, immL34 src2) %{
 8541   match(Set dst (AddL src1 src2));
 8542   predicate(PowerArchitecturePPC64 >= 10);
 8543   ins_cost(DEFAULT_COST+1);
 8544 
 8545   format %{ "PADDI   $dst, $src1, $src2" %}
 8546   size(8);
 8547   ins_encode %{
 8548     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 8549     __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
 8550   %}
 8551   ins_pipe(pipe_class_default);
 8552   ins_alignment(2);
 8553 %}
 8554 
 8555 // Pointer Register Addition
 8556 instruct addP_reg_reg(iRegPdst dst, iRegP_N2P src1, iRegLsrc src2) %{
 8557   match(Set dst (AddP src1 src2));
 8558   format %{ "ADD     $dst, $src1, $src2" %}
 8559   size(4);
 8560   ins_encode %{
 8561     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8562   %}
 8563   ins_pipe(pipe_class_default);
 8564 %}
 8565 
 8566 // Pointer Immediate Addition
 8567 // No constant pool entries required.
 8568 instruct addP_reg_imm16(iRegPdst dst, iRegP_N2P src1, immL16 src2) %{
 8569   match(Set dst (AddP src1 src2));
 8570 
 8571   format %{ "ADDI    $dst, $src1, $src2" %}
 8572   size(4);
 8573   ins_encode %{
 8574     __ addi($dst$$Register, $src1$$Register, $src2$$constant);
 8575   %}
 8576   ins_pipe(pipe_class_default);
 8577 %}
 8578 
 8579 // Pointer Immediate Addition with 16-bit shifted operand.
 8580 // No constant pool entries required.
 8581 instruct addP_reg_immhi16(iRegPdst dst, iRegP_N2P src1, immL32hi16 src2) %{
 8582   match(Set dst (AddP src1 src2));
 8583 
 8584   format %{ "ADDIS   $dst, $src1, $src2" %}
 8585   size(4);
 8586   ins_encode %{
 8587     __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
 8588   %}
 8589   ins_pipe(pipe_class_default);
 8590 %}
 8591 
 8592 // Pointer Immediate Addition using prefixed addi
 8593 // No constant pool entries required.
 8594 instruct addP_reg_imm34(iRegPdst dst, iRegP_N2P src1, immL34 src2) %{
 8595   match(Set dst (AddP src1 src2));
 8596   predicate(PowerArchitecturePPC64 >= 10);
 8597   ins_cost(DEFAULT_COST+1);
 8598 
 8599   format %{ "PADDI    $dst, $src1, $src2" %}
 8600   size(8);
 8601   ins_encode %{
 8602     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 8603     __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
 8604   %}
 8605   ins_pipe(pipe_class_default);
 8606   ins_alignment(2);
 8607 %}
 8608 
 8609 //---------------------
 8610 // Subtraction Instructions
 8611 
 8612 // Register Subtraction
 8613 instruct subI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8614   match(Set dst (SubI src1 src2));
 8615   format %{ "SUBF    $dst, $src2, $src1" %}
 8616   size(4);
 8617   ins_encode %{
 8618     __ subf($dst$$Register, $src2$$Register, $src1$$Register);
 8619   %}
 8620   ins_pipe(pipe_class_default);
 8621 %}
 8622 
 8623 // Immediate Subtraction
 8624 // Immediate Subtraction: The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal),
 8625 // Don't try to use addi with - $src2$$constant since it can overflow when $src2$$constant == minI16.
 8626 
 8627 // SubI from constant (using subfic).
 8628 instruct subI_imm16_reg(iRegIdst dst, immI16 src1, iRegIsrc src2) %{
 8629   match(Set dst (SubI src1 src2));
 8630   format %{ "SUBI    $dst, $src1, $src2" %}
 8631 
 8632   size(4);
 8633   ins_encode %{
 8634     __ subfic($dst$$Register, $src2$$Register, $src1$$constant);
 8635   %}
 8636   ins_pipe(pipe_class_default);
 8637 %}
 8638 
 8639 // Turn the sign-bit of an integer into a 32-bit mask, 0x0...0 for
 8640 // positive integers and 0xF...F for negative ones.
 8641 instruct signmask32I_regI(iRegIdst dst, iRegIsrc src) %{
 8642   // no match-rule, false predicate
 8643   effect(DEF dst, USE src);
 8644   predicate(false);
 8645 
 8646   format %{ "SRAWI   $dst, $src, #31" %}
 8647   size(4);
 8648   ins_encode %{
 8649     __ srawi($dst$$Register, $src$$Register, 0x1f);
 8650   %}
 8651   ins_pipe(pipe_class_default);
 8652 %}
 8653 
 8654 instruct absI_reg_Ex(iRegIdst dst, iRegIsrc src) %{
 8655   match(Set dst (AbsI src));
 8656   ins_cost(DEFAULT_COST*3);
 8657 
 8658   expand %{
 8659     iRegIdst tmp1;
 8660     iRegIdst tmp2;
 8661     signmask32I_regI(tmp1, src);
 8662     xorI_reg_reg(tmp2, tmp1, src);
 8663     subI_reg_reg(dst, tmp2, tmp1);
 8664   %}
 8665 %}
 8666 
 8667 instruct negI_regI(iRegIdst dst, immI_0 zero, iRegIsrc src2) %{
 8668   match(Set dst (SubI zero src2));
 8669   format %{ "NEG     $dst, $src2" %}
 8670   size(4);
 8671   ins_encode %{
 8672     __ neg($dst$$Register, $src2$$Register);
 8673   %}
 8674   ins_pipe(pipe_class_default);
 8675 %}
 8676 
 8677 // Long subtraction
 8678 instruct subL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8679   match(Set dst (SubL src1 src2));
 8680   format %{ "SUBF    $dst, $src2, $src1 \t// long" %}
 8681   size(4);
 8682   ins_encode %{
 8683     __ subf($dst$$Register, $src2$$Register, $src1$$Register);
 8684   %}
 8685   ins_pipe(pipe_class_default);
 8686 %}
 8687 
 8688 // SubL + convL2I.
 8689 instruct subI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8690   match(Set dst (ConvL2I (SubL src1 src2)));
 8691 
 8692   format %{ "SUBF    $dst, $src2, $src1 \t// long + l2i" %}
 8693   size(4);
 8694   ins_encode %{
 8695     __ subf($dst$$Register, $src2$$Register, $src1$$Register);
 8696   %}
 8697   ins_pipe(pipe_class_default);
 8698 %}
 8699 
 8700 // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
 8701 // positive longs and 0xF...F for negative ones.
 8702 instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{
 8703   // no match-rule, false predicate
 8704   effect(DEF dst, USE src);
 8705   predicate(false);
 8706 
 8707   format %{ "SRADI   $dst, $src, #63" %}
 8708   size(4);
 8709   ins_encode %{
 8710     __ sradi($dst$$Register, $src$$Register, 0x3f);
 8711   %}
 8712   ins_pipe(pipe_class_default);
 8713 %}
 8714 
 8715 // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
 8716 // positive longs and 0xF...F for negative ones.
 8717 instruct signmask64L_regL(iRegLdst dst, iRegLsrc src) %{
 8718   // no match-rule, false predicate
 8719   effect(DEF dst, USE src);
 8720   predicate(false);
 8721 
 8722   format %{ "SRADI   $dst, $src, #63" %}
 8723   size(4);
 8724   ins_encode %{
 8725     __ sradi($dst$$Register, $src$$Register, 0x3f);
 8726   %}
 8727   ins_pipe(pipe_class_default);
 8728 %}
 8729 
 8730 instruct absL_reg_Ex(iRegLdst dst, iRegLsrc src) %{
 8731   match(Set dst (AbsL src));
 8732   ins_cost(DEFAULT_COST*3);
 8733 
 8734   expand %{
 8735     iRegLdst tmp1;
 8736     iRegLdst tmp2;
 8737     signmask64L_regL(tmp1, src);
 8738     xorL_reg_reg(tmp2, tmp1, src);
 8739     subL_reg_reg(dst, tmp2, tmp1);
 8740   %}
 8741 %}
 8742 
 8743 // Long negation
 8744 instruct negL_reg_reg(iRegLdst dst, immL_0 zero, iRegLsrc src2) %{
 8745   match(Set dst (SubL zero src2));
 8746   format %{ "NEG     $dst, $src2 \t// long" %}
 8747   size(4);
 8748   ins_encode %{
 8749     __ neg($dst$$Register, $src2$$Register);
 8750   %}
 8751   ins_pipe(pipe_class_default);
 8752 %}
 8753 
 8754 // NegL + ConvL2I.
 8755 instruct negI_con0_regL(iRegIdst dst, immL_0 zero, iRegLsrc src2) %{
 8756   match(Set dst (ConvL2I (SubL zero src2)));
 8757 
 8758   format %{ "NEG     $dst, $src2 \t// long + l2i" %}
 8759   size(4);
 8760   ins_encode %{
 8761     __ neg($dst$$Register, $src2$$Register);
 8762   %}
 8763   ins_pipe(pipe_class_default);
 8764 %}
 8765 
 8766 // Multiplication Instructions
 8767 // Integer Multiplication
 8768 
 8769 // Register Multiplication
 8770 instruct mulI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8771   match(Set dst (MulI src1 src2));
 8772   ins_cost(DEFAULT_COST);
 8773 
 8774   format %{ "MULLW   $dst, $src1, $src2" %}
 8775   size(4);
 8776   ins_encode %{
 8777     __ mullw($dst$$Register, $src1$$Register, $src2$$Register);
 8778   %}
 8779   ins_pipe(pipe_class_default);
 8780 %}
 8781 
 8782 // Immediate Multiplication
 8783 instruct mulI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
 8784   match(Set dst (MulI src1 src2));
 8785   ins_cost(DEFAULT_COST);
 8786 
 8787   format %{ "MULLI   $dst, $src1, $src2" %}
 8788   size(4);
 8789   ins_encode %{
 8790     __ mulli($dst$$Register, $src1$$Register, $src2$$constant);
 8791   %}
 8792   ins_pipe(pipe_class_default);
 8793 %}
 8794 
 8795 instruct mulL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8796   match(Set dst (MulL src1 src2));
 8797   ins_cost(DEFAULT_COST);
 8798 
 8799   format %{ "MULLD   $dst $src1, $src2 \t// long" %}
 8800   size(4);
 8801   ins_encode %{
 8802     __ mulld($dst$$Register, $src1$$Register, $src2$$Register);
 8803   %}
 8804   ins_pipe(pipe_class_default);
 8805 %}
 8806 
 8807 // Multiply high for optimized long division by constant.
 8808 instruct mulHighL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8809   match(Set dst (MulHiL src1 src2));
 8810   ins_cost(DEFAULT_COST);
 8811 
 8812   format %{ "MULHD   $dst $src1, $src2 \t// long" %}
 8813   size(4);
 8814   ins_encode %{
 8815     __ mulhd($dst$$Register, $src1$$Register, $src2$$Register);
 8816   %}
 8817   ins_pipe(pipe_class_default);
 8818 %}
 8819 
 8820 // Immediate Multiplication
 8821 instruct mulL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
 8822   match(Set dst (MulL src1 src2));
 8823   ins_cost(DEFAULT_COST);
 8824 
 8825   format %{ "MULLI   $dst, $src1, $src2" %}
 8826   size(4);
 8827   ins_encode %{
 8828     __ mulli($dst$$Register, $src1$$Register, $src2$$constant);
 8829   %}
 8830   ins_pipe(pipe_class_default);
 8831 %}
 8832 
 8833 // Integer Division with Immediate -1: Negate.
 8834 instruct divI_reg_immIvalueMinus1(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{
 8835   match(Set dst (DivI src1 src2));
 8836   ins_cost(DEFAULT_COST);
 8837 
 8838   format %{ "NEG     $dst, $src1 \t// /-1" %}
 8839   size(4);
 8840   ins_encode %{
 8841     __ neg($dst$$Register, $src1$$Register);
 8842   %}
 8843   ins_pipe(pipe_class_default);
 8844 %}
 8845 
 8846 // Integer Division with constant, but not -1.
 8847 // We should be able to improve this by checking the type of src2.
 8848 // It might well be that src2 is known to be positive.
 8849 instruct divI_reg_regnotMinus1(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8850   match(Set dst (DivI src1 src2));
 8851   predicate(n->in(2)->find_int_con(-1) != -1); // src2 is a constant, but not -1
 8852   ins_cost(2*DEFAULT_COST);
 8853 
 8854   format %{ "DIVW    $dst, $src1, $src2 \t// /not-1" %}
 8855   size(4);
 8856   ins_encode %{
 8857     __ divw($dst$$Register, $src1$$Register, $src2$$Register);
 8858   %}
 8859   ins_pipe(pipe_class_default);
 8860 %}
 8861 
 8862 instruct cmovI_bne_negI_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src1) %{
 8863   effect(USE_DEF dst, USE src1, USE crx);
 8864   predicate(false);
 8865 
 8866   ins_variable_size_depending_on_alignment(true);
 8867 
 8868   format %{ "CMOVE   $dst, neg($src1), $crx" %}
 8869   // Worst case is branch + move + stop, no stop without scheduler.
 8870   size(8);
 8871   ins_encode %{
 8872     Label done;
 8873     __ bne($crx$$CondRegister, done);
 8874     __ neg($dst$$Register, $src1$$Register);
 8875     __ bind(done);
 8876   %}
 8877   ins_pipe(pipe_class_default);
 8878 %}
 8879 
 8880 // Integer Division with Registers not containing constants.
 8881 instruct divI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8882   match(Set dst (DivI src1 src2));
 8883   ins_cost(10*DEFAULT_COST);
 8884 
 8885   expand %{
 8886     immI16 imm %{ (int)-1 %}
 8887     flagsReg tmp1;
 8888     cmpI_reg_imm16(tmp1, src2, imm);          // check src2 == -1
 8889     divI_reg_regnotMinus1(dst, src1, src2);   // dst = src1 / src2
 8890     cmovI_bne_negI_reg(dst, tmp1, src1);      // cmove dst = neg(src1) if src2 == -1
 8891   %}
 8892 %}
 8893 
 8894 // Long Division with Immediate -1: Negate.
 8895 instruct divL_reg_immLvalueMinus1(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{
 8896   match(Set dst (DivL src1 src2));
 8897   ins_cost(DEFAULT_COST);
 8898 
 8899   format %{ "NEG     $dst, $src1 \t// /-1, long" %}
 8900   size(4);
 8901   ins_encode %{
 8902     __ neg($dst$$Register, $src1$$Register);
 8903   %}
 8904   ins_pipe(pipe_class_default);
 8905 %}
 8906 
 8907 // Long Division with constant, but not -1.
 8908 instruct divL_reg_regnotMinus1(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8909   match(Set dst (DivL src1 src2));
 8910   predicate(n->in(2)->find_long_con(-1L) != -1L); // Src2 is a constant, but not -1.
 8911   ins_cost(2*DEFAULT_COST);
 8912 
 8913   format %{ "DIVD    $dst, $src1, $src2 \t// /not-1, long" %}
 8914   size(4);
 8915   ins_encode %{
 8916     __ divd($dst$$Register, $src1$$Register, $src2$$Register);
 8917   %}
 8918   ins_pipe(pipe_class_default);
 8919 %}
 8920 
 8921 instruct cmovL_bne_negL_reg(iRegLdst dst, flagsRegSrc crx, iRegLsrc src1) %{
 8922   effect(USE_DEF dst, USE src1, USE crx);
 8923   predicate(false);
 8924 
 8925   ins_variable_size_depending_on_alignment(true);
 8926 
 8927   format %{ "CMOVE   $dst, neg($src1), $crx" %}
 8928   // Worst case is branch + move + stop, no stop without scheduler.
 8929   size(8);
 8930   ins_encode %{
 8931     Label done;
 8932     __ bne($crx$$CondRegister, done);
 8933     __ neg($dst$$Register, $src1$$Register);
 8934     __ bind(done);
 8935   %}
 8936   ins_pipe(pipe_class_default);
 8937 %}
 8938 
 8939 // Long Division with Registers not containing constants.
 8940 instruct divL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8941   match(Set dst (DivL src1 src2));
 8942   ins_cost(10*DEFAULT_COST);
 8943 
 8944   expand %{
 8945     immL16 imm %{ (int)-1 %}
 8946     flagsReg tmp1;
 8947     cmpL_reg_imm16(tmp1, src2, imm);          // check src2 == -1
 8948     divL_reg_regnotMinus1(dst, src1, src2);   // dst = src1 / src2
 8949     cmovL_bne_negL_reg(dst, tmp1, src1);      // cmove dst = neg(src1) if src2 == -1
 8950   %}
 8951 %}
 8952 
 8953 // Integer Remainder with registers.
 8954 instruct modI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8955   match(Set dst (ModI src1 src2));
 8956   ins_cost(10*DEFAULT_COST);
 8957 
 8958   expand %{
 8959     immI16 imm %{ (int)-1 %}
 8960     flagsReg tmp1;
 8961     iRegIdst tmp2;
 8962     iRegIdst tmp3;
 8963     cmpI_reg_imm16(tmp1, src2, imm);           // check src2 == -1
 8964     divI_reg_regnotMinus1(tmp2, src1, src2);   // tmp2 = src1 / src2
 8965     cmovI_bne_negI_reg(tmp2, tmp1, src1);      // cmove tmp2 = neg(src1) if src2 == -1
 8966     mulI_reg_reg(tmp3, src2, tmp2);            // tmp3 = src2 * tmp2
 8967     subI_reg_reg(dst, src1, tmp3);             // dst = src1 - tmp3
 8968   %}
 8969 %}
 8970 
 8971 // Long Remainder with registers
 8972 instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8973   match(Set dst (ModL src1 src2));
 8974   ins_cost(10*DEFAULT_COST);
 8975 
 8976   expand %{
 8977     immL16 imm %{ (int)-1 %}
 8978     flagsReg tmp1;
 8979     iRegLdst tmp2;
 8980     iRegLdst tmp3;
 8981     cmpL_reg_imm16(tmp1, src2, imm);             // check src2 == -1
 8982     divL_reg_regnotMinus1(tmp2, src1, src2);     // tmp2 = src1 / src2
 8983     cmovL_bne_negL_reg(tmp2, tmp1, src1);        // cmove tmp2 = neg(src1) if src2 == -1
 8984     mulL_reg_reg(tmp3, src2, tmp2);              // tmp3 = src2 * tmp2
 8985     subL_reg_reg(dst, src1, tmp3);               // dst = src1 - tmp3
 8986   %}
 8987 %}
 8988 
 8989 instruct udivI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8990   match(Set dst (UDivI src1 src2));
 8991   format %{ "DIVWU   $dst, $src1, $src2" %}
 8992   size(4);
 8993   ins_encode %{
 8994     __ divwu($dst$$Register, $src1$$Register, $src2$$Register);
 8995   %}
 8996   ins_pipe(pipe_class_default);
 8997 %}
 8998 
 8999 instruct umodI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9000   match(Set dst (UModI src1 src2));
 9001   expand %{
 9002     iRegIdst tmp1;
 9003     iRegIdst tmp2;
 9004     udivI_reg_reg(tmp1, src1, src2);
 9005     // Compute lower 32 bit result using signed instructions as suggested by ISA.
 9006     // Upper 32 bit will contain garbage.
 9007     mulI_reg_reg(tmp2, src2, tmp1);
 9008     subI_reg_reg(dst, src1, tmp2);
 9009   %}
 9010 %}
 9011 
 9012 instruct udivL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9013   match(Set dst (UDivL src1 src2));
 9014   format %{ "DIVDU   $dst, $src1, $src2" %}
 9015   size(4);
 9016   ins_encode %{
 9017     __ divdu($dst$$Register, $src1$$Register, $src2$$Register);
 9018   %}
 9019   ins_pipe(pipe_class_default);
 9020 %}
 9021 
 9022 instruct umodL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9023   match(Set dst (UModL src1 src2));
 9024   expand %{
 9025     iRegLdst tmp1;
 9026     iRegLdst tmp2;
 9027     udivL_reg_reg(tmp1, src1, src2);
 9028     mulL_reg_reg(tmp2, src2, tmp1);
 9029     subL_reg_reg(dst, src1, tmp2);
 9030   %}
 9031 %}
 9032 
 9033 // Integer Shift Instructions
 9034 
 9035 // Register Shift Left
 9036 
 9037 // Clear all but the lowest #mask bits.
 9038 // Used to normalize shift amounts in registers.
 9039 instruct maskI_reg_imm(iRegIdst dst, iRegIsrc src, uimmI6 mask) %{
 9040   // no match-rule, false predicate
 9041   effect(DEF dst, USE src, USE mask);
 9042   predicate(false);
 9043 
 9044   format %{ "MASK    $dst, $src, $mask \t// clear $mask upper bits" %}
 9045   size(4);
 9046   ins_encode %{
 9047     __ clrldi($dst$$Register, $src$$Register, $mask$$constant);
 9048   %}
 9049   ins_pipe(pipe_class_default);
 9050 %}
 9051 
 9052 instruct lShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9053   // no match-rule, false predicate
 9054   effect(DEF dst, USE src1, USE src2);
 9055   predicate(false);
 9056 
 9057   format %{ "SLW     $dst, $src1, $src2" %}
 9058   size(4);
 9059   ins_encode %{
 9060     __ slw($dst$$Register, $src1$$Register, $src2$$Register);
 9061   %}
 9062   ins_pipe(pipe_class_default);
 9063 %}
 9064 
 9065 instruct lShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9066   match(Set dst (LShiftI src1 src2));
 9067   ins_cost(DEFAULT_COST*2);
 9068   expand %{
 9069     uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
 9070     iRegIdst tmpI;
 9071     maskI_reg_imm(tmpI, src2, mask);
 9072     lShiftI_reg_reg(dst, src1, tmpI);
 9073   %}
 9074 %}
 9075 
 9076 // Register Shift Left Immediate
 9077 instruct lShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
 9078   match(Set dst (LShiftI src1 src2));
 9079 
 9080   format %{ "SLWI    $dst, $src1, ($src2 & 0x1f)" %}
 9081   size(4);
 9082   ins_encode %{
 9083     __ slwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
 9084   %}
 9085   ins_pipe(pipe_class_default);
 9086 %}
 9087 
 9088 // AndI with negpow2-constant + LShiftI
 9089 instruct lShiftI_andI_immInegpow2_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{
 9090   match(Set dst (LShiftI (AndI src1 src2) src3));
 9091   predicate(UseRotateAndMaskInstructionsPPC64);
 9092 
 9093   format %{ "RLWINM  $dst, lShiftI(AndI($src1, $src2), $src3)" %}
 9094   size(4);
 9095   ins_encode %{
 9096     long src3      = $src3$$constant;
 9097     long maskbits  = src3 + log2i_exact(-(juint)$src2$$constant);
 9098     if (maskbits >= 32) {
 9099       __ li($dst$$Register, 0); // addi
 9100     } else {
 9101       __ rlwinm($dst$$Register, $src1$$Register, src3 & 0x1f, 0, (31-maskbits) & 0x1f);
 9102     }
 9103   %}
 9104   ins_pipe(pipe_class_default);
 9105 %}
 9106 
 9107 // RShiftI + AndI with negpow2-constant + LShiftI
 9108 instruct lShiftI_andI_immInegpow2_rShiftI_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{
 9109   match(Set dst (LShiftI (AndI (RShiftI src1 src3) src2) src3));
 9110   predicate(UseRotateAndMaskInstructionsPPC64);
 9111 
 9112   format %{ "RLWINM  $dst, lShiftI(AndI(RShiftI($src1, $src3), $src2), $src3)" %}
 9113   size(4);
 9114   ins_encode %{
 9115     long src3      = $src3$$constant;
 9116     long maskbits  = src3 + log2i_exact(-(juint)$src2$$constant);
 9117     if (maskbits >= 32) {
 9118       __ li($dst$$Register, 0); // addi
 9119     } else {
 9120       __ rlwinm($dst$$Register, $src1$$Register, 0, 0, (31-maskbits) & 0x1f);
 9121     }
 9122   %}
 9123   ins_pipe(pipe_class_default);
 9124 %}
 9125 
 9126 instruct lShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9127   // no match-rule, false predicate
 9128   effect(DEF dst, USE src1, USE src2);
 9129   predicate(false);
 9130 
 9131   format %{ "SLD     $dst, $src1, $src2" %}
 9132   size(4);
 9133   ins_encode %{
 9134     __ sld($dst$$Register, $src1$$Register, $src2$$Register);
 9135   %}
 9136   ins_pipe(pipe_class_default);
 9137 %}
 9138 
 9139 // Register Shift Left
 9140 instruct lShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9141   match(Set dst (LShiftL src1 src2));
 9142   ins_cost(DEFAULT_COST*2);
 9143   expand %{
 9144     uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
 9145     iRegIdst tmpI;
 9146     maskI_reg_imm(tmpI, src2, mask);
 9147     lShiftL_regL_regI(dst, src1, tmpI);
 9148   %}
 9149 %}
 9150 
 9151 // Register Shift Left Immediate
 9152 instruct lshiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
 9153   match(Set dst (LShiftL src1 src2));
 9154   format %{ "SLDI    $dst, $src1, ($src2 & 0x3f)" %}
 9155   size(4);
 9156   ins_encode %{
 9157     __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9158   %}
 9159   ins_pipe(pipe_class_default);
 9160 %}
 9161 
 9162 // If we shift more than 32 bits, we need not convert I2L.
 9163 instruct lShiftL_regI_immGE32(iRegLdst dst, iRegIsrc src1, uimmI6_ge32 src2) %{
 9164   match(Set dst (LShiftL (ConvI2L src1) src2));
 9165   ins_cost(DEFAULT_COST);
 9166 
 9167   size(4);
 9168   format %{ "SLDI    $dst, i2l($src1), $src2" %}
 9169   ins_encode %{
 9170     __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9171   %}
 9172   ins_pipe(pipe_class_default);
 9173 %}
 9174 
 9175 // Shift a postivie int to the left.
 9176 // Clrlsldi clears the upper 32 bits and shifts.
 9177 instruct scaledPositiveI2L_lShiftL_convI2L_reg_imm6(iRegLdst dst, iRegIsrc src1, uimmI6 src2) %{
 9178   match(Set dst (LShiftL (ConvI2L src1) src2));
 9179   predicate(((ConvI2LNode*)(_kids[0]->_leaf))->type()->is_long()->is_positive_int());
 9180 
 9181   format %{ "SLDI    $dst, i2l(positive_int($src1)), $src2" %}
 9182   size(4);
 9183   ins_encode %{
 9184     __ clrlsldi($dst$$Register, $src1$$Register, 0x20, $src2$$constant);
 9185   %}
 9186   ins_pipe(pipe_class_default);
 9187 %}
 9188 
 9189 instruct arShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9190   // no match-rule, false predicate
 9191   effect(DEF dst, USE src1, USE src2);
 9192   predicate(false);
 9193 
 9194   format %{ "SRAW    $dst, $src1, $src2" %}
 9195   size(4);
 9196   ins_encode %{
 9197     __ sraw($dst$$Register, $src1$$Register, $src2$$Register);
 9198   %}
 9199   ins_pipe(pipe_class_default);
 9200 %}
 9201 
 9202 // Register Arithmetic Shift Right
 9203 instruct arShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9204   match(Set dst (RShiftI src1 src2));
 9205   ins_cost(DEFAULT_COST*2);
 9206   expand %{
 9207     uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
 9208     iRegIdst tmpI;
 9209     maskI_reg_imm(tmpI, src2, mask);
 9210     arShiftI_reg_reg(dst, src1, tmpI);
 9211   %}
 9212 %}
 9213 
 9214 // Register Arithmetic Shift Right Immediate
 9215 instruct arShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
 9216   match(Set dst (RShiftI src1 src2));
 9217 
 9218   format %{ "SRAWI   $dst, $src1, ($src2 & 0x1f)" %}
 9219   size(4);
 9220   ins_encode %{
 9221     __ srawi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
 9222   %}
 9223   ins_pipe(pipe_class_default);
 9224 %}
 9225 
 9226 instruct arShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9227   // no match-rule, false predicate
 9228   effect(DEF dst, USE src1, USE src2);
 9229   predicate(false);
 9230 
 9231   format %{ "SRAD    $dst, $src1, $src2" %}
 9232   size(4);
 9233   ins_encode %{
 9234     __ srad($dst$$Register, $src1$$Register, $src2$$Register);
 9235   %}
 9236   ins_pipe(pipe_class_default);
 9237 %}
 9238 
 9239 // Register Shift Right Arithmetic Long
 9240 instruct arShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9241   match(Set dst (RShiftL src1 src2));
 9242   ins_cost(DEFAULT_COST*2);
 9243 
 9244   expand %{
 9245     uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
 9246     iRegIdst tmpI;
 9247     maskI_reg_imm(tmpI, src2, mask);
 9248     arShiftL_regL_regI(dst, src1, tmpI);
 9249   %}
 9250 %}
 9251 
 9252 // Register Shift Right Immediate
 9253 instruct arShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
 9254   match(Set dst (RShiftL src1 src2));
 9255 
 9256   format %{ "SRADI   $dst, $src1, ($src2 & 0x3f)" %}
 9257   size(4);
 9258   ins_encode %{
 9259     __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9260   %}
 9261   ins_pipe(pipe_class_default);
 9262 %}
 9263 
 9264 // RShiftL + ConvL2I
 9265 instruct convL2I_arShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{
 9266   match(Set dst (ConvL2I (RShiftL src1 src2)));
 9267 
 9268   format %{ "SRADI   $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %}
 9269   size(4);
 9270   ins_encode %{
 9271     __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9272   %}
 9273   ins_pipe(pipe_class_default);
 9274 %}
 9275 
 9276 instruct urShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9277   // no match-rule, false predicate
 9278   effect(DEF dst, USE src1, USE src2);
 9279   predicate(false);
 9280 
 9281   format %{ "SRW     $dst, $src1, $src2" %}
 9282   size(4);
 9283   ins_encode %{
 9284     __ srw($dst$$Register, $src1$$Register, $src2$$Register);
 9285   %}
 9286   ins_pipe(pipe_class_default);
 9287 %}
 9288 
 9289 // Register Shift Right
 9290 instruct urShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9291   match(Set dst (URShiftI src1 src2));
 9292   ins_cost(DEFAULT_COST*2);
 9293 
 9294   expand %{
 9295     uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
 9296     iRegIdst tmpI;
 9297     maskI_reg_imm(tmpI, src2, mask);
 9298     urShiftI_reg_reg(dst, src1, tmpI);
 9299   %}
 9300 %}
 9301 
 9302 // Register Shift Right Immediate
 9303 instruct urShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
 9304   match(Set dst (URShiftI src1 src2));
 9305 
 9306   format %{ "SRWI    $dst, $src1, ($src2 & 0x1f)" %}
 9307   size(4);
 9308   ins_encode %{
 9309     __ srwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
 9310   %}
 9311   ins_pipe(pipe_class_default);
 9312 %}
 9313 
 9314 instruct urShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9315   // no match-rule, false predicate
 9316   effect(DEF dst, USE src1, USE src2);
 9317   predicate(false);
 9318 
 9319   format %{ "SRD     $dst, $src1, $src2" %}
 9320   size(4);
 9321   ins_encode %{
 9322     __ srd($dst$$Register, $src1$$Register, $src2$$Register);
 9323   %}
 9324   ins_pipe(pipe_class_default);
 9325 %}
 9326 
 9327 // Register Shift Right
 9328 instruct urShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9329   match(Set dst (URShiftL src1 src2));
 9330   ins_cost(DEFAULT_COST*2);
 9331 
 9332   expand %{
 9333     uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
 9334     iRegIdst tmpI;
 9335     maskI_reg_imm(tmpI, src2, mask);
 9336     urShiftL_regL_regI(dst, src1, tmpI);
 9337   %}
 9338 %}
 9339 
 9340 // Register Shift Right Immediate
 9341 instruct urShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
 9342   match(Set dst (URShiftL src1 src2));
 9343 
 9344   format %{ "SRDI    $dst, $src1, ($src2 & 0x3f)" %}
 9345   size(4);
 9346   ins_encode %{
 9347     __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9348   %}
 9349   ins_pipe(pipe_class_default);
 9350 %}
 9351 
 9352 // URShiftL + ConvL2I.
 9353 instruct convL2I_urShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{
 9354   match(Set dst (ConvL2I (URShiftL src1 src2)));
 9355 
 9356   format %{ "SRDI    $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %}
 9357   size(4);
 9358   ins_encode %{
 9359     __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9360   %}
 9361   ins_pipe(pipe_class_default);
 9362 %}
 9363 
 9364 // Register Shift Right Immediate with a CastP2X
 9365 instruct shrP_convP2X_reg_imm6(iRegLdst dst, iRegP_N2P src1, uimmI6 src2) %{
 9366   match(Set dst (URShiftL (CastP2X src1) src2));
 9367 
 9368   format %{ "SRDI    $dst, $src1, $src2 \t// Cast ptr $src1 to long and shift" %}
 9369   size(4);
 9370   ins_encode %{
 9371     __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9372   %}
 9373   ins_pipe(pipe_class_default);
 9374 %}
 9375 
 9376 // Bitfield Extract: URShiftI + AndI
 9377 instruct andI_urShiftI_regI_immI_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immI src2, immIpow2minus1 src3) %{
 9378   match(Set dst (AndI (URShiftI src1 src2) src3));
 9379 
 9380   format %{ "EXTRDI  $dst, $src1, shift=$src2, mask=$src3 \t// int bitfield extract" %}
 9381   size(4);
 9382   ins_encode %{
 9383     int rshift = ($src2$$constant) & 0x1f;
 9384     int length = log2i_exact((juint)$src3$$constant + 1u);
 9385     if (rshift + length > 32) {
 9386       // if necessary, adjust mask to omit rotated bits.
 9387       length = 32 - rshift;
 9388     }
 9389     __ extrdi($dst$$Register, $src1$$Register, length, 64 - (rshift + length));
 9390   %}
 9391   ins_pipe(pipe_class_default);
 9392 %}
 9393 
 9394 // Bitfield Extract: URShiftL + AndL
 9395 instruct andL_urShiftL_regL_immI_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immI src2, immLpow2minus1 src3) %{
 9396   match(Set dst (AndL (URShiftL src1 src2) src3));
 9397 
 9398   format %{ "EXTRDI  $dst, $src1, shift=$src2, mask=$src3 \t// long bitfield extract" %}
 9399   size(4);
 9400   ins_encode %{
 9401     int rshift  = ($src2$$constant) & 0x3f;
 9402     int length = log2i_exact((julong)$src3$$constant + 1ull);
 9403     if (rshift + length > 64) {
 9404       // if necessary, adjust mask to omit rotated bits.
 9405       length = 64 - rshift;
 9406     }
 9407     __ extrdi($dst$$Register, $src1$$Register, length, 64 - (rshift + length));
 9408   %}
 9409   ins_pipe(pipe_class_default);
 9410 %}
 9411 
 9412 instruct sxtI_reg(iRegIdst dst, iRegIsrc src) %{
 9413   match(Set dst (ConvL2I (ConvI2L src)));
 9414 
 9415   format %{ "EXTSW   $dst, $src \t// int->int" %}
 9416   size(4);
 9417   ins_encode %{
 9418     __ extsw($dst$$Register, $src$$Register);
 9419   %}
 9420   ins_pipe(pipe_class_default);
 9421 %}
 9422 
 9423 //----------Rotate Instructions------------------------------------------------
 9424 
 9425 // Rotate Left by 8-bit immediate
 9426 instruct rotlI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 lshift, immI8 rshift) %{
 9427   match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift)));
 9428   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 9429 
 9430   format %{ "ROTLWI  $dst, $src, $lshift" %}
 9431   size(4);
 9432   ins_encode %{
 9433     __ rotlwi($dst$$Register, $src$$Register, $lshift$$constant);
 9434   %}
 9435   ins_pipe(pipe_class_default);
 9436 %}
 9437 
 9438 // Rotate Right by 8-bit immediate
 9439 instruct rotrI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 rshift, immI8 lshift) %{
 9440   match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
 9441   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 9442 
 9443   format %{ "ROTRWI  $dst, $rshift" %}
 9444   size(4);
 9445   ins_encode %{
 9446     __ rotrwi($dst$$Register, $src$$Register, $rshift$$constant);
 9447   %}
 9448   ins_pipe(pipe_class_default);
 9449 %}
 9450 
 9451 //----------Floating Point Arithmetic Instructions-----------------------------
 9452 
 9453 // Add float single precision
 9454 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
 9455   match(Set dst (AddF src1 src2));
 9456 
 9457   format %{ "FADDS   $dst, $src1, $src2" %}
 9458   size(4);
 9459   ins_encode %{
 9460     __ fadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9461   %}
 9462   ins_pipe(pipe_class_default);
 9463 %}
 9464 
 9465 // Add float double precision
 9466 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
 9467   match(Set dst (AddD src1 src2));
 9468 
 9469   format %{ "FADD    $dst, $src1, $src2" %}
 9470   size(4);
 9471   ins_encode %{
 9472     __ fadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9473   %}
 9474   ins_pipe(pipe_class_default);
 9475 %}
 9476 
 9477 // Sub float single precision
 9478 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
 9479   match(Set dst (SubF src1 src2));
 9480 
 9481   format %{ "FSUBS   $dst, $src1, $src2" %}
 9482   size(4);
 9483   ins_encode %{
 9484     __ fsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9485   %}
 9486   ins_pipe(pipe_class_default);
 9487 %}
 9488 
 9489 // Sub float double precision
 9490 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
 9491   match(Set dst (SubD src1 src2));
 9492   format %{ "FSUB    $dst, $src1, $src2" %}
 9493   size(4);
 9494   ins_encode %{
 9495     __ fsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9496   %}
 9497   ins_pipe(pipe_class_default);
 9498 %}
 9499 
 9500 // Mul float single precision
 9501 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
 9502   match(Set dst (MulF src1 src2));
 9503   format %{ "FMULS   $dst, $src1, $src2" %}
 9504   size(4);
 9505   ins_encode %{
 9506     __ fmuls($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9507   %}
 9508   ins_pipe(pipe_class_default);
 9509 %}
 9510 
 9511 // Mul float double precision
 9512 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
 9513   match(Set dst (MulD src1 src2));
 9514   format %{ "FMUL    $dst, $src1, $src2" %}
 9515   size(4);
 9516   ins_encode %{
 9517     __ fmul($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9518   %}
 9519   ins_pipe(pipe_class_default);
 9520 %}
 9521 
 9522 // Div float single precision
 9523 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
 9524   match(Set dst (DivF src1 src2));
 9525   format %{ "FDIVS   $dst, $src1, $src2" %}
 9526   size(4);
 9527   ins_encode %{
 9528     __ fdivs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9529   %}
 9530   ins_pipe(pipe_class_default);
 9531 %}
 9532 
 9533 // Div float double precision
 9534 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
 9535   match(Set dst (DivD src1 src2));
 9536   format %{ "FDIV    $dst, $src1, $src2" %}
 9537   size(4);
 9538   ins_encode %{
 9539     __ fdiv($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9540   %}
 9541   ins_pipe(pipe_class_default);
 9542 %}
 9543 
 9544 // Absolute float single precision
 9545 instruct absF_reg(regF dst, regF src) %{
 9546   match(Set dst (AbsF src));
 9547   format %{ "FABS    $dst, $src \t// float" %}
 9548   size(4);
 9549   ins_encode %{
 9550     __ fabs($dst$$FloatRegister, $src$$FloatRegister);
 9551   %}
 9552   ins_pipe(pipe_class_default);
 9553 %}
 9554 
 9555 // Absolute float double precision
 9556 instruct absD_reg(regD dst, regD src) %{
 9557   match(Set dst (AbsD src));
 9558   format %{ "FABS    $dst, $src \t// double" %}
 9559   size(4);
 9560   ins_encode %{
 9561     __ fabs($dst$$FloatRegister, $src$$FloatRegister);
 9562   %}
 9563   ins_pipe(pipe_class_default);
 9564 %}
 9565 
 9566 instruct negF_reg(regF dst, regF src) %{
 9567   match(Set dst (NegF src));
 9568   format %{ "FNEG    $dst, $src \t// float" %}
 9569   size(4);
 9570   ins_encode %{
 9571     __ fneg($dst$$FloatRegister, $src$$FloatRegister);
 9572   %}
 9573   ins_pipe(pipe_class_default);
 9574 %}
 9575 
 9576 instruct negD_reg(regD dst, regD src) %{
 9577   match(Set dst (NegD src));
 9578   format %{ "FNEG    $dst, $src \t// double" %}
 9579   size(4);
 9580   ins_encode %{
 9581     __ fneg($dst$$FloatRegister, $src$$FloatRegister);
 9582   %}
 9583   ins_pipe(pipe_class_default);
 9584 %}
 9585 
 9586 // AbsF + NegF.
 9587 instruct negF_absF_reg(regF dst, regF src) %{
 9588   match(Set dst (NegF (AbsF src)));
 9589   format %{ "FNABS   $dst, $src \t// float" %}
 9590   size(4);
 9591   ins_encode %{
 9592     __ fnabs($dst$$FloatRegister, $src$$FloatRegister);
 9593   %}
 9594   ins_pipe(pipe_class_default);
 9595 %}
 9596 
 9597 // AbsD + NegD.
 9598 instruct negD_absD_reg(regD dst, regD src) %{
 9599   match(Set dst (NegD (AbsD src)));
 9600   format %{ "FNABS   $dst, $src \t// double" %}
 9601   size(4);
 9602   ins_encode %{
 9603     __ fnabs($dst$$FloatRegister, $src$$FloatRegister);
 9604   %}
 9605   ins_pipe(pipe_class_default);
 9606 %}
 9607 
 9608 // VM_Version::has_fsqrt() decides if this node will be used.
 9609 // Sqrt float double precision
 9610 instruct sqrtD_reg(regD dst, regD src) %{
 9611   match(Set dst (SqrtD src));
 9612   format %{ "FSQRT   $dst, $src" %}
 9613   size(4);
 9614   ins_encode %{
 9615     __ fsqrt($dst$$FloatRegister, $src$$FloatRegister);
 9616   %}
 9617   ins_pipe(pipe_class_default);
 9618 %}
 9619 
 9620 // Single-precision sqrt.
 9621 instruct sqrtF_reg(regF dst, regF src) %{
 9622   match(Set dst (SqrtF src));
 9623   predicate(VM_Version::has_fsqrts());
 9624   ins_cost(DEFAULT_COST);
 9625 
 9626   format %{ "FSQRTS  $dst, $src" %}
 9627   size(4);
 9628   ins_encode %{
 9629     __ fsqrts($dst$$FloatRegister, $src$$FloatRegister);
 9630   %}
 9631   ins_pipe(pipe_class_default);
 9632 %}
 9633 
 9634 instruct roundDouble_nop(regD dst) %{
 9635   match(Set dst (RoundDouble dst));
 9636   ins_cost(0);
 9637 
 9638   format %{ " -- \t// RoundDouble not needed - empty" %}
 9639   size(0);
 9640   // PPC results are already "rounded" (i.e., normal-format IEEE).
 9641   ins_encode( /*empty*/ );
 9642   ins_pipe(pipe_class_default);
 9643 %}
 9644 
 9645 instruct roundFloat_nop(regF dst) %{
 9646   match(Set dst (RoundFloat dst));
 9647   ins_cost(0);
 9648 
 9649   format %{ " -- \t// RoundFloat not needed - empty" %}
 9650   size(0);
 9651   // PPC results are already "rounded" (i.e., normal-format IEEE).
 9652   ins_encode( /*empty*/ );
 9653   ins_pipe(pipe_class_default);
 9654 %}
 9655 
 9656 
 9657 // Multiply-Accumulate
 9658 // src1 * src2 + src3
 9659 instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9660   match(Set dst (FmaF src3 (Binary src1 src2)));
 9661 
 9662   format %{ "FMADDS  $dst, $src1, $src2, $src3" %}
 9663   size(4);
 9664   ins_encode %{
 9665     __ fmadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9666   %}
 9667   ins_pipe(pipe_class_default);
 9668 %}
 9669 
 9670 // src1 * src2 + src3
 9671 instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9672   match(Set dst (FmaD src3 (Binary src1 src2)));
 9673 
 9674   format %{ "FMADD   $dst, $src1, $src2, $src3" %}
 9675   size(4);
 9676   ins_encode %{
 9677     __ fmadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9678   %}
 9679   ins_pipe(pipe_class_default);
 9680 %}
 9681 
 9682 // -src1 * src2 + src3 = -(src1*src2-src3)
 9683 instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9684   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
 9685   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
 9686 
 9687   format %{ "FNMSUBS $dst, $src1, $src2, $src3" %}
 9688   size(4);
 9689   ins_encode %{
 9690     __ fnmsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9691   %}
 9692   ins_pipe(pipe_class_default);
 9693 %}
 9694 
 9695 // -src1 * src2 + src3 = -(src1*src2-src3)
 9696 instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9697   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
 9698   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
 9699 
 9700   format %{ "FNMSUB  $dst, $src1, $src2, $src3" %}
 9701   size(4);
 9702   ins_encode %{
 9703     __ fnmsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9704   %}
 9705   ins_pipe(pipe_class_default);
 9706 %}
 9707 
 9708 // -src1 * src2 - src3 = -(src1*src2+src3)
 9709 instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9710   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
 9711   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
 9712 
 9713   format %{ "FNMADDS $dst, $src1, $src2, $src3" %}
 9714   size(4);
 9715   ins_encode %{
 9716     __ fnmadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9717   %}
 9718   ins_pipe(pipe_class_default);
 9719 %}
 9720 
 9721 // -src1 * src2 - src3 = -(src1*src2+src3)
 9722 instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9723   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
 9724   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
 9725 
 9726   format %{ "FNMADD  $dst, $src1, $src2, $src3" %}
 9727   size(4);
 9728   ins_encode %{
 9729     __ fnmadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9730   %}
 9731   ins_pipe(pipe_class_default);
 9732 %}
 9733 
 9734 // src1 * src2 - src3
 9735 instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9736   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
 9737 
 9738   format %{ "FMSUBS  $dst, $src1, $src2, $src3" %}
 9739   size(4);
 9740   ins_encode %{
 9741     __ fmsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9742   %}
 9743   ins_pipe(pipe_class_default);
 9744 %}
 9745 
 9746 // src1 * src2 - src3
 9747 instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9748   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
 9749 
 9750   format %{ "FMSUB   $dst, $src1, $src2, $src3" %}
 9751   size(4);
 9752   ins_encode %{
 9753     __ fmsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9754   %}
 9755   ins_pipe(pipe_class_default);
 9756 %}
 9757 
 9758 
 9759 //----------Logical Instructions-----------------------------------------------
 9760 
 9761 // And Instructions
 9762 
 9763 // Register And
 9764 instruct andI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9765   match(Set dst (AndI src1 src2));
 9766   format %{ "AND     $dst, $src1, $src2" %}
 9767   size(4);
 9768   ins_encode %{
 9769     __ andr($dst$$Register, $src1$$Register, $src2$$Register);
 9770   %}
 9771   ins_pipe(pipe_class_default);
 9772 %}
 9773 
 9774 // Left shifted Immediate And
 9775 instruct andI_reg_immIhi16(iRegIdst dst, iRegIsrc src1, immIhi16  src2, flagsRegCR0 cr0) %{
 9776   match(Set dst (AndI src1 src2));
 9777   effect(KILL cr0);
 9778   format %{ "ANDIS   $dst, $src1, $src2.hi" %}
 9779   size(4);
 9780   ins_encode %{
 9781     __ andis_($dst$$Register, $src1$$Register, (int)((unsigned short)(($src2$$constant & 0xFFFF0000) >> 16)));
 9782   %}
 9783   ins_pipe(pipe_class_default);
 9784 %}
 9785 
 9786 // Immediate And
 9787 instruct andI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2, flagsRegCR0 cr0) %{
 9788   match(Set dst (AndI src1 src2));
 9789   effect(KILL cr0);
 9790 
 9791   format %{ "ANDI    $dst, $src1, $src2" %}
 9792   size(4);
 9793   ins_encode %{
 9794     // FIXME: avoid andi_ ?
 9795     __ andi_($dst$$Register, $src1$$Register, $src2$$constant);
 9796   %}
 9797   ins_pipe(pipe_class_default);
 9798 %}
 9799 
 9800 // Immediate And where the immediate is a negative power of 2.
 9801 instruct andI_reg_immInegpow2(iRegIdst dst, iRegIsrc src1, immInegpow2 src2) %{
 9802   match(Set dst (AndI src1 src2));
 9803   format %{ "ANDWI   $dst, $src1, $src2" %}
 9804   size(4);
 9805   ins_encode %{
 9806     __ clrrdi($dst$$Register, $src1$$Register, log2i_exact(-(juint)$src2$$constant));
 9807   %}
 9808   ins_pipe(pipe_class_default);
 9809 %}
 9810 
 9811 instruct andI_reg_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immIpow2minus1 src2) %{
 9812   match(Set dst (AndI src1 src2));
 9813   format %{ "ANDWI   $dst, $src1, $src2" %}
 9814   size(4);
 9815   ins_encode %{
 9816     __ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((juint)$src2$$constant + 1u));
 9817   %}
 9818   ins_pipe(pipe_class_default);
 9819 %}
 9820 
 9821 instruct andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src1, immIpowerOf2 src2) %{
 9822   match(Set dst (AndI src1 src2));
 9823   predicate(UseRotateAndMaskInstructionsPPC64);
 9824   format %{ "ANDWI   $dst, $src1, $src2" %}
 9825   size(4);
 9826   ins_encode %{
 9827     int bitpos = 31 - log2i_exact((juint)$src2$$constant);
 9828     __ rlwinm($dst$$Register, $src1$$Register, 0, bitpos, bitpos);
 9829   %}
 9830   ins_pipe(pipe_class_default);
 9831 %}
 9832 
 9833 // Register And Long
 9834 instruct andL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9835   match(Set dst (AndL src1 src2));
 9836   ins_cost(DEFAULT_COST);
 9837 
 9838   format %{ "AND     $dst, $src1, $src2 \t// long" %}
 9839   size(4);
 9840   ins_encode %{
 9841     __ andr($dst$$Register, $src1$$Register, $src2$$Register);
 9842   %}
 9843   ins_pipe(pipe_class_default);
 9844 %}
 9845 
 9846 // Immediate And long
 9847 instruct andL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2, flagsRegCR0 cr0) %{
 9848   match(Set dst (AndL src1 src2));
 9849   effect(KILL cr0);
 9850 
 9851   format %{ "ANDI    $dst, $src1, $src2 \t// long" %}
 9852   size(4);
 9853   ins_encode %{
 9854     // FIXME: avoid andi_ ?
 9855     __ andi_($dst$$Register, $src1$$Register, $src2$$constant);
 9856   %}
 9857   ins_pipe(pipe_class_default);
 9858 %}
 9859 
 9860 // Immediate And Long where the immediate is a negative power of 2.
 9861 instruct andL_reg_immLnegpow2(iRegLdst dst, iRegLsrc src1, immLnegpow2 src2) %{
 9862   match(Set dst (AndL src1 src2));
 9863   format %{ "ANDDI   $dst, $src1, $src2" %}
 9864   size(4);
 9865   ins_encode %{
 9866     __ clrrdi($dst$$Register, $src1$$Register, log2i_exact(-(julong)$src2$$constant));
 9867   %}
 9868   ins_pipe(pipe_class_default);
 9869 %}
 9870 
 9871 instruct andL_reg_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immLpow2minus1 src2) %{
 9872   match(Set dst (AndL src1 src2));
 9873   format %{ "ANDDI   $dst, $src1, $src2" %}
 9874   size(4);
 9875   ins_encode %{
 9876     __ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((julong)$src2$$constant + 1ull));
 9877   %}
 9878   ins_pipe(pipe_class_default);
 9879 %}
 9880 
 9881 // AndL + ConvL2I.
 9882 instruct convL2I_andL_reg_immLpow2minus1(iRegIdst dst, iRegLsrc src1, immLpow2minus1 src2) %{
 9883   match(Set dst (ConvL2I (AndL src1 src2)));
 9884   ins_cost(DEFAULT_COST);
 9885 
 9886   format %{ "ANDDI   $dst, $src1, $src2 \t// long + l2i" %}
 9887   size(4);
 9888   ins_encode %{
 9889     __ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((julong)$src2$$constant + 1ull));
 9890   %}
 9891   ins_pipe(pipe_class_default);
 9892 %}
 9893 
 9894 // Or Instructions
 9895 
 9896 // Register Or
 9897 instruct orI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9898   match(Set dst (OrI src1 src2));
 9899   format %{ "OR      $dst, $src1, $src2" %}
 9900   size(4);
 9901   ins_encode %{
 9902     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9903   %}
 9904   ins_pipe(pipe_class_default);
 9905 %}
 9906 
 9907 // Expand does not work with above instruct. (??)
 9908 instruct orI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9909   // no match-rule
 9910   effect(DEF dst, USE src1, USE src2);
 9911   format %{ "OR      $dst, $src1, $src2" %}
 9912   size(4);
 9913   ins_encode %{
 9914     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9915   %}
 9916   ins_pipe(pipe_class_default);
 9917 %}
 9918 
 9919 instruct tree_orI_orI_orI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
 9920   match(Set dst (OrI (OrI (OrI src1 src2) src3) src4));
 9921   ins_cost(DEFAULT_COST*3);
 9922 
 9923   expand %{
 9924     // FIXME: we should do this in the ideal world.
 9925     iRegIdst tmp1;
 9926     iRegIdst tmp2;
 9927     orI_reg_reg(tmp1, src1, src2);
 9928     orI_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg.
 9929     orI_reg_reg(dst, tmp1, tmp2);
 9930   %}
 9931 %}
 9932 
 9933 // Immediate Or
 9934 instruct orI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{
 9935   match(Set dst (OrI src1 src2));
 9936   format %{ "ORI     $dst, $src1, $src2" %}
 9937   size(4);
 9938   ins_encode %{
 9939     __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
 9940   %}
 9941   ins_pipe(pipe_class_default);
 9942 %}
 9943 
 9944 // Register Or Long
 9945 instruct orL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9946   match(Set dst (OrL src1 src2));
 9947   ins_cost(DEFAULT_COST);
 9948 
 9949   size(4);
 9950   format %{ "OR      $dst, $src1, $src2 \t// long" %}
 9951   ins_encode %{
 9952     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9953   %}
 9954   ins_pipe(pipe_class_default);
 9955 %}
 9956 
 9957 // OrL + ConvL2I.
 9958 instruct orI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9959   match(Set dst (ConvL2I (OrL src1 src2)));
 9960   ins_cost(DEFAULT_COST);
 9961 
 9962   format %{ "OR      $dst, $src1, $src2 \t// long + l2i" %}
 9963   size(4);
 9964   ins_encode %{
 9965     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9966   %}
 9967   ins_pipe(pipe_class_default);
 9968 %}
 9969 
 9970 // Immediate Or long
 9971 instruct orL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 con) %{
 9972   match(Set dst (OrL src1 con));
 9973   ins_cost(DEFAULT_COST);
 9974 
 9975   format %{ "ORI     $dst, $src1, $con \t// long" %}
 9976   size(4);
 9977   ins_encode %{
 9978     __ ori($dst$$Register, $src1$$Register, ($con$$constant) & 0xFFFF);
 9979   %}
 9980   ins_pipe(pipe_class_default);
 9981 %}
 9982 
 9983 // Xor Instructions
 9984 
 9985 // Register Xor
 9986 instruct xorI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9987   match(Set dst (XorI src1 src2));
 9988   format %{ "XOR     $dst, $src1, $src2" %}
 9989   size(4);
 9990   ins_encode %{
 9991     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
 9992   %}
 9993   ins_pipe(pipe_class_default);
 9994 %}
 9995 
 9996 // Expand does not work with above instruct. (??)
 9997 instruct xorI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9998   // no match-rule
 9999   effect(DEF dst, USE src1, USE src2);
10000   format %{ "XOR     $dst, $src1, $src2" %}
10001   size(4);
10002   ins_encode %{
10003     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
10004   %}
10005   ins_pipe(pipe_class_default);
10006 %}
10007 
10008 instruct tree_xorI_xorI_xorI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
10009   match(Set dst (XorI (XorI (XorI src1 src2) src3) src4));
10010   ins_cost(DEFAULT_COST*3);
10011 
10012   expand %{
10013     // FIXME: we should do this in the ideal world.
10014     iRegIdst tmp1;
10015     iRegIdst tmp2;
10016     xorI_reg_reg(tmp1, src1, src2);
10017     xorI_reg_reg_2(tmp2, src3, src4); // Adlc complains about xorI_reg_reg.
10018     xorI_reg_reg(dst, tmp1, tmp2);
10019   %}
10020 %}
10021 
10022 // Immediate Xor
10023 instruct xorI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{
10024   match(Set dst (XorI src1 src2));
10025   format %{ "XORI    $dst, $src1, $src2" %}
10026   size(4);
10027   ins_encode %{
10028     __ xori($dst$$Register, $src1$$Register, $src2$$constant);
10029   %}
10030   ins_pipe(pipe_class_default);
10031 %}
10032 
10033 // Register Xor Long
10034 instruct xorL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
10035   match(Set dst (XorL src1 src2));
10036   ins_cost(DEFAULT_COST);
10037 
10038   format %{ "XOR     $dst, $src1, $src2 \t// long" %}
10039   size(4);
10040   ins_encode %{
10041     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
10042   %}
10043   ins_pipe(pipe_class_default);
10044 %}
10045 
10046 // XorL + ConvL2I.
10047 instruct xorI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
10048   match(Set dst (ConvL2I (XorL src1 src2)));
10049   ins_cost(DEFAULT_COST);
10050 
10051   format %{ "XOR     $dst, $src1, $src2 \t// long + l2i" %}
10052   size(4);
10053   ins_encode %{
10054     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
10055   %}
10056   ins_pipe(pipe_class_default);
10057 %}
10058 
10059 // Immediate Xor Long
10060 instruct xorL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2) %{
10061   match(Set dst (XorL src1 src2));
10062   ins_cost(DEFAULT_COST);
10063 
10064   format %{ "XORI    $dst, $src1, $src2 \t// long" %}
10065   size(4);
10066   ins_encode %{
10067     __ xori($dst$$Register, $src1$$Register, $src2$$constant);
10068   %}
10069   ins_pipe(pipe_class_default);
10070 %}
10071 
10072 instruct notI_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{
10073   match(Set dst (XorI src1 src2));
10074   ins_cost(DEFAULT_COST);
10075 
10076   format %{ "NOT     $dst, $src1 ($src2)" %}
10077   size(4);
10078   ins_encode %{
10079     __ nor($dst$$Register, $src1$$Register, $src1$$Register);
10080   %}
10081   ins_pipe(pipe_class_default);
10082 %}
10083 
10084 instruct notL_reg(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{
10085   match(Set dst (XorL src1 src2));
10086   ins_cost(DEFAULT_COST);
10087 
10088   format %{ "NOT     $dst, $src1 ($src2) \t// long" %}
10089   size(4);
10090   ins_encode %{
10091     __ nor($dst$$Register, $src1$$Register, $src1$$Register);
10092   %}
10093   ins_pipe(pipe_class_default);
10094 %}
10095 
10096 // And-complement
10097 instruct andcI_reg_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2, iRegIsrc src3) %{
10098   match(Set dst (AndI (XorI src1 src2) src3));
10099   ins_cost(DEFAULT_COST);
10100 
10101   format %{ "ANDW    $dst, xori($src1, $src2), $src3" %}
10102   size(4);
10103   ins_encode( enc_andc(dst, src3, src1) );
10104   ins_pipe(pipe_class_default);
10105 %}
10106 
10107 // And-complement
10108 instruct andcL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
10109   // no match-rule, false predicate
10110   effect(DEF dst, USE src1, USE src2);
10111   predicate(false);
10112 
10113   format %{ "ANDC    $dst, $src1, $src2" %}
10114   size(4);
10115   ins_encode %{
10116     __ andc($dst$$Register, $src1$$Register, $src2$$Register);
10117   %}
10118   ins_pipe(pipe_class_default);
10119 %}
10120 
10121 //----------Moves between int/long and float/double----------------------------
10122 //
10123 // The following rules move values from int/long registers/stack-locations
10124 // to float/double registers/stack-locations and vice versa, without doing any
10125 // conversions. These rules are used to implement the bit-conversion methods
10126 // of java.lang.Float etc., e.g.
10127 //   int   floatToIntBits(float value)
10128 //   float intBitsToFloat(int bits)
10129 //
10130 // Notes on the implementation on ppc64:
10131 // For Power7 and earlier, the rules are limited to those which move between a
10132 // register and a stack-location, because we always have to go through memory
10133 // when moving between a float register and an integer register.
10134 // This restriction is removed in Power8 with the introduction of the mtfprd
10135 // and mffprd instructions.
10136 
10137 instruct moveL2D_reg(regD dst, iRegLsrc src) %{
10138   match(Set dst (MoveL2D src));
10139   predicate(VM_Version::has_mtfprd());
10140 
10141   format %{ "MTFPRD  $dst, $src" %}
10142   size(4);
10143   ins_encode %{
10144     __ mtfprd($dst$$FloatRegister, $src$$Register);
10145   %}
10146   ins_pipe(pipe_class_default);
10147 %}
10148 
10149 instruct moveI2D_reg(regD dst, iRegIsrc src) %{
10150   // no match-rule, false predicate
10151   effect(DEF dst, USE src);
10152   predicate(false);
10153 
10154   format %{ "MTFPRWA $dst, $src" %}
10155   size(4);
10156   ins_encode %{
10157     __ mtfprwa($dst$$FloatRegister, $src$$Register);
10158   %}
10159   ins_pipe(pipe_class_default);
10160 %}
10161 
10162 //---------- Chain stack slots between similar types --------
10163 
10164 // These are needed so that the rules below can match.
10165 
10166 // Load integer from stack slot
10167 instruct stkI_to_regI(iRegIdst dst, stackSlotI src) %{
10168   match(Set dst src);
10169   ins_cost(MEMORY_REF_COST);
10170 
10171   format %{ "LWZ     $dst, $src" %}
10172   size(4);
10173   ins_encode( enc_lwz(dst, src) );
10174   ins_pipe(pipe_class_memory);
10175 %}
10176 
10177 // Store integer to stack slot
10178 instruct regI_to_stkI(stackSlotI dst, iRegIsrc src) %{
10179   match(Set dst src);
10180   ins_cost(MEMORY_REF_COST);
10181 
10182   format %{ "STW     $src, $dst \t// stk" %}
10183   size(4);
10184   ins_encode( enc_stw(src, dst) ); // rs=rt
10185   ins_pipe(pipe_class_memory);
10186 %}
10187 
10188 // Load long from stack slot
10189 instruct stkL_to_regL(iRegLdst dst, stackSlotL src) %{
10190   match(Set dst src);
10191   ins_cost(MEMORY_REF_COST);
10192 
10193   format %{ "LD      $dst, $src \t// long" %}
10194   size(4);
10195   ins_encode( enc_ld(dst, src) );
10196   ins_pipe(pipe_class_memory);
10197 %}
10198 
10199 // Store long to stack slot
10200 instruct regL_to_stkL(stackSlotL dst, iRegLsrc src) %{
10201   match(Set dst src);
10202   ins_cost(MEMORY_REF_COST);
10203 
10204   format %{ "STD     $src, $dst \t// long" %}
10205   size(4);
10206   ins_encode( enc_std(src, dst) ); // rs=rt
10207   ins_pipe(pipe_class_memory);
10208 %}
10209 
10210 //----------Moves between int and float
10211 
10212 // Move float value from float stack-location to integer register.
10213 instruct moveF2I_stack_reg(iRegIdst dst, stackSlotF src) %{
10214   match(Set dst (MoveF2I src));
10215   ins_cost(MEMORY_REF_COST);
10216 
10217   format %{ "LWZ     $dst, $src \t// MoveF2I" %}
10218   size(4);
10219   ins_encode( enc_lwz(dst, src) );
10220   ins_pipe(pipe_class_memory);
10221 %}
10222 
10223 // Move float value from float register to integer stack-location.
10224 instruct moveF2I_reg_stack(stackSlotI dst, regF src) %{
10225   match(Set dst (MoveF2I src));
10226   ins_cost(MEMORY_REF_COST);
10227 
10228   format %{ "STFS    $src, $dst \t// MoveF2I" %}
10229   size(4);
10230   ins_encode( enc_stfs(src, dst) );
10231   ins_pipe(pipe_class_memory);
10232 %}
10233 
10234 // Move integer value from integer stack-location to float register.
10235 instruct moveI2F_stack_reg(regF dst, stackSlotI src) %{
10236   match(Set dst (MoveI2F src));
10237   ins_cost(MEMORY_REF_COST);
10238 
10239   format %{ "LFS     $dst, $src \t// MoveI2F" %}
10240   size(4);
10241   ins_encode %{
10242     int Idisp = $src$$disp + frame_slots_bias($src$$base, ra_);
10243     __ lfs($dst$$FloatRegister, Idisp, $src$$base$$Register);
10244   %}
10245   ins_pipe(pipe_class_memory);
10246 %}
10247 
10248 // Move integer value from integer register to float stack-location.
10249 instruct moveI2F_reg_stack(stackSlotF dst, iRegIsrc src) %{
10250   match(Set dst (MoveI2F src));
10251   ins_cost(MEMORY_REF_COST);
10252 
10253   format %{ "STW     $src, $dst \t// MoveI2F" %}
10254   size(4);
10255   ins_encode( enc_stw(src, dst) );
10256   ins_pipe(pipe_class_memory);
10257 %}
10258 
10259 //----------Moves between long and float
10260 
10261 instruct moveF2L_reg_stack(stackSlotL dst, regF src) %{
10262   // no match-rule, false predicate
10263   effect(DEF dst, USE src);
10264   predicate(false);
10265 
10266   format %{ "storeD  $src, $dst \t// STACK" %}
10267   size(4);
10268   ins_encode( enc_stfd(src, dst) );
10269   ins_pipe(pipe_class_default);
10270 %}
10271 
10272 //----------Moves between long and double
10273 
10274 // Move double value from double stack-location to long register.
10275 instruct moveD2L_stack_reg(iRegLdst dst, stackSlotD src) %{
10276   match(Set dst (MoveD2L src));
10277   ins_cost(MEMORY_REF_COST);
10278   size(4);
10279   format %{ "LD      $dst, $src \t// MoveD2L" %}
10280   ins_encode( enc_ld(dst, src) );
10281   ins_pipe(pipe_class_memory);
10282 %}
10283 
10284 // Move double value from double register to long stack-location.
10285 instruct moveD2L_reg_stack(stackSlotL dst, regD src) %{
10286   match(Set dst (MoveD2L src));
10287   effect(DEF dst, USE src);
10288   ins_cost(MEMORY_REF_COST);
10289 
10290   format %{ "STFD    $src, $dst \t// MoveD2L" %}
10291   size(4);
10292   ins_encode( enc_stfd(src, dst) );
10293   ins_pipe(pipe_class_memory);
10294 %}
10295 
10296 // Move long value from long stack-location to double register.
10297 instruct moveL2D_stack_reg(regD dst, stackSlotL src) %{
10298   match(Set dst (MoveL2D src));
10299   ins_cost(MEMORY_REF_COST);
10300 
10301   format %{ "LFD     $dst, $src \t// MoveL2D" %}
10302   size(4);
10303   ins_encode( enc_lfd(dst, src) );
10304   ins_pipe(pipe_class_memory);
10305 %}
10306 
10307 // Move long value from long register to double stack-location.
10308 instruct moveL2D_reg_stack(stackSlotD dst, iRegLsrc src) %{
10309   match(Set dst (MoveL2D src));
10310   ins_cost(MEMORY_REF_COST);
10311 
10312   format %{ "STD     $src, $dst \t// MoveL2D" %}
10313   size(4);
10314   ins_encode( enc_std(src, dst) );
10315   ins_pipe(pipe_class_memory);
10316 %}
10317 
10318 //----------Register Move Instructions-----------------------------------------
10319 
10320 // Replicate for Superword
10321 
10322 instruct moveReg(iRegLdst dst, iRegIsrc src) %{
10323   predicate(false);
10324   effect(DEF dst, USE src);
10325 
10326   format %{ "MR      $dst, $src \t// replicate " %}
10327   // variable size, 0 or 4.
10328   ins_encode %{
10329     __ mr_if_needed($dst$$Register, $src$$Register);
10330   %}
10331   ins_pipe(pipe_class_default);
10332 %}
10333 
10334 //----------Cast instructions (Java-level type cast)---------------------------
10335 
10336 // Cast Long to Pointer for unsafe natives.
10337 instruct castX2P(iRegPdst dst, iRegLsrc src) %{
10338   match(Set dst (CastX2P src));
10339 
10340   format %{ "MR      $dst, $src \t// Long->Ptr" %}
10341   // variable size, 0 or 4.
10342   ins_encode %{
10343     __ mr_if_needed($dst$$Register, $src$$Register);
10344   %}
10345  ins_pipe(pipe_class_default);
10346 %}
10347 
10348 // Cast Pointer to Long for unsafe natives.
10349 instruct castP2X(iRegLdst dst, iRegP_N2P src) %{
10350   match(Set dst (CastP2X src));
10351 
10352   format %{ "MR      $dst, $src \t// Ptr->Long" %}
10353   // variable size, 0 or 4.
10354   ins_encode %{
10355     __ mr_if_needed($dst$$Register, $src$$Register);
10356   %}
10357   ins_pipe(pipe_class_default);
10358 %}
10359 
10360 instruct castPP(iRegPdst dst) %{
10361   match(Set dst (CastPP dst));
10362   format %{ " -- \t// castPP of $dst" %}
10363   size(0);
10364   ins_encode( /*empty*/ );
10365   ins_pipe(pipe_class_default);
10366 %}
10367 
10368 instruct castII(iRegIdst dst) %{
10369   match(Set dst (CastII dst));
10370   format %{ " -- \t// castII of $dst" %}
10371   size(0);
10372   ins_encode( /*empty*/ );
10373   ins_pipe(pipe_class_default);
10374 %}
10375 
10376 instruct castLL(iRegLdst dst) %{
10377   match(Set dst (CastLL dst));
10378   format %{ " -- \t// castLL of $dst" %}
10379   size(0);
10380   ins_encode( /*empty*/ );
10381   ins_pipe(pipe_class_default);
10382 %}
10383 
10384 instruct castFF(regF dst) %{
10385   match(Set dst (CastFF dst));
10386   format %{ " -- \t// castFF of $dst" %}
10387   size(0);
10388   ins_encode( /*empty*/ );
10389   ins_pipe(pipe_class_default);
10390 %}
10391 
10392 instruct castDD(regD dst) %{
10393   match(Set dst (CastDD dst));
10394   format %{ " -- \t// castDD of $dst" %}
10395   size(0);
10396   ins_encode( /*empty*/ );
10397   ins_pipe(pipe_class_default);
10398 %}
10399 
10400 instruct castVV8(iRegLdst dst) %{
10401   match(Set dst (CastVV dst));
10402   format %{ " -- \t// castVV of $dst" %}
10403   size(0);
10404   ins_encode( /*empty*/ );
10405   ins_pipe(pipe_class_default);
10406 %}
10407 
10408 instruct castVV16(vecX dst) %{
10409   match(Set dst (CastVV dst));
10410   format %{ " -- \t// castVV of $dst" %}
10411   size(0);
10412   ins_encode( /*empty*/ );
10413   ins_pipe(pipe_class_default);
10414 %}
10415 
10416 instruct checkCastPP(iRegPdst dst) %{
10417   match(Set dst (CheckCastPP dst));
10418   format %{ " -- \t// checkcastPP of $dst" %}
10419   size(0);
10420   ins_encode( /*empty*/ );
10421   ins_pipe(pipe_class_default);
10422 %}
10423 
10424 //----------Convert instructions-----------------------------------------------
10425 
10426 // Convert to boolean.
10427 
10428 // int_to_bool(src) : { 1   if src != 0
10429 //                    { 0   else
10430 //
10431 // strategy:
10432 // 1) Count leading zeros of 32 bit-value src,
10433 //    this returns 32 (0b10.0000) iff src == 0 and <32 otherwise.
10434 // 2) Shift 5 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise.
10435 // 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0.
10436 
10437 // convI2Bool
10438 instruct convI2Bool_reg__cntlz_Ex(iRegIdst dst, iRegIsrc src) %{
10439   match(Set dst (Conv2B src));
10440   predicate(UseCountLeadingZerosInstructionsPPC64);
10441   ins_cost(DEFAULT_COST);
10442 
10443   expand %{
10444     immI shiftAmount %{ 0x5 %}
10445     uimmI16 mask %{ 0x1 %}
10446     iRegIdst tmp1;
10447     iRegIdst tmp2;
10448     countLeadingZerosI(tmp1, src);
10449     urShiftI_reg_imm(tmp2, tmp1, shiftAmount);
10450     xorI_reg_uimm16(dst, tmp2, mask);
10451   %}
10452 %}
10453 
10454 instruct convI2Bool_reg__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx) %{
10455   match(Set dst (Conv2B src));
10456   effect(TEMP crx);
10457   predicate(!UseCountLeadingZerosInstructionsPPC64);
10458   ins_cost(DEFAULT_COST);
10459 
10460   format %{ "CMPWI   $crx, $src, #0 \t// convI2B"
10461             "LI      $dst, #0\n\t"
10462             "BEQ     $crx, done\n\t"
10463             "LI      $dst, #1\n"
10464             "done:" %}
10465   size(16);
10466   ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x0, 0x1) );
10467   ins_pipe(pipe_class_compare);
10468 %}
10469 
10470 // ConvI2B + XorI
10471 instruct xorI_convI2Bool_reg_immIvalue1__cntlz_Ex(iRegIdst dst, iRegIsrc src, immI_1 mask) %{
10472   match(Set dst (XorI (Conv2B src) mask));
10473   predicate(UseCountLeadingZerosInstructionsPPC64);
10474   ins_cost(DEFAULT_COST);
10475 
10476   expand %{
10477     immI shiftAmount %{ 0x5 %}
10478     iRegIdst tmp1;
10479     countLeadingZerosI(tmp1, src);
10480     urShiftI_reg_imm(dst, tmp1, shiftAmount);
10481   %}
10482 %}
10483 
10484 instruct xorI_convI2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI_1 mask) %{
10485   match(Set dst (XorI (Conv2B src) mask));
10486   effect(TEMP crx);
10487   predicate(!UseCountLeadingZerosInstructionsPPC64);
10488   ins_cost(DEFAULT_COST);
10489 
10490   format %{ "CMPWI   $crx, $src, #0 \t// Xor(convI2B($src), $mask)"
10491             "LI      $dst, #1\n\t"
10492             "BEQ     $crx, done\n\t"
10493             "LI      $dst, #0\n"
10494             "done:" %}
10495   size(16);
10496   ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x1, 0x0) );
10497   ins_pipe(pipe_class_compare);
10498 %}
10499 
10500 // AndI 0b0..010..0 + ConvI2B
10501 instruct convI2Bool_andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src, immIpowerOf2 mask) %{
10502   match(Set dst (Conv2B (AndI src mask)));
10503   predicate(UseRotateAndMaskInstructionsPPC64);
10504   ins_cost(DEFAULT_COST);
10505 
10506   format %{ "RLWINM  $dst, $src, $mask \t// convI2B(AndI($src, $mask))" %}
10507   size(4);
10508   ins_encode %{
10509     __ rlwinm($dst$$Register, $src$$Register, 32 - log2i_exact((juint)($mask$$constant)), 31, 31);
10510   %}
10511   ins_pipe(pipe_class_default);
10512 %}
10513 
10514 // Convert pointer to boolean.
10515 //
10516 // ptr_to_bool(src) : { 1   if src != 0
10517 //                    { 0   else
10518 //
10519 // strategy:
10520 // 1) Count leading zeros of 64 bit-value src,
10521 //    this returns 64 (0b100.0000) iff src == 0 and <64 otherwise.
10522 // 2) Shift 6 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise.
10523 // 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0.
10524 
10525 // ConvP2B
10526 instruct convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src) %{
10527   match(Set dst (Conv2B src));
10528   predicate(UseCountLeadingZerosInstructionsPPC64);
10529   ins_cost(DEFAULT_COST);
10530 
10531   expand %{
10532     immI shiftAmount %{ 0x6 %}
10533     uimmI16 mask %{ 0x1 %}
10534     iRegIdst tmp1;
10535     iRegIdst tmp2;
10536     countLeadingZerosP(tmp1, src);
10537     urShiftI_reg_imm(tmp2, tmp1, shiftAmount);
10538     xorI_reg_uimm16(dst, tmp2, mask);
10539   %}
10540 %}
10541 
10542 instruct convP2Bool_reg__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx) %{
10543   match(Set dst (Conv2B src));
10544   effect(TEMP crx);
10545   predicate(!UseCountLeadingZerosInstructionsPPC64);
10546   ins_cost(DEFAULT_COST);
10547 
10548   format %{ "CMPDI   $crx, $src, #0 \t// convP2B"
10549             "LI      $dst, #0\n\t"
10550             "BEQ     $crx, done\n\t"
10551             "LI      $dst, #1\n"
10552             "done:" %}
10553   size(16);
10554   ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x0, 0x1) );
10555   ins_pipe(pipe_class_compare);
10556 %}
10557 
10558 // ConvP2B + XorI
10559 instruct xorI_convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src, immI_1 mask) %{
10560   match(Set dst (XorI (Conv2B src) mask));
10561   predicate(UseCountLeadingZerosInstructionsPPC64);
10562   ins_cost(DEFAULT_COST);
10563 
10564   expand %{
10565     immI shiftAmount %{ 0x6 %}
10566     iRegIdst tmp1;
10567     countLeadingZerosP(tmp1, src);
10568     urShiftI_reg_imm(dst, tmp1, shiftAmount);
10569   %}
10570 %}
10571 
10572 instruct xorI_convP2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx, immI_1 mask) %{
10573   match(Set dst (XorI (Conv2B src) mask));
10574   effect(TEMP crx);
10575   predicate(!UseCountLeadingZerosInstructionsPPC64);
10576   ins_cost(DEFAULT_COST);
10577 
10578   format %{ "CMPDI   $crx, $src, #0 \t// XorI(convP2B($src), $mask)"
10579             "LI      $dst, #1\n\t"
10580             "BEQ     $crx, done\n\t"
10581             "LI      $dst, #0\n"
10582             "done:" %}
10583   size(16);
10584   ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x1, 0x0) );
10585   ins_pipe(pipe_class_compare);
10586 %}
10587 
10588 // if src1 < src2, return -1 else return 0
10589 instruct cmpLTMask_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
10590   match(Set dst (CmpLTMask src1 src2));
10591   ins_cost(DEFAULT_COST*4);
10592 
10593   expand %{
10594     iRegLdst src1s;
10595     iRegLdst src2s;
10596     iRegLdst diff;
10597     convI2L_reg(src1s, src1); // Ensure proper sign extension.
10598     convI2L_reg(src2s, src2); // Ensure proper sign extension.
10599     subL_reg_reg(diff, src1s, src2s);
10600     // Need to consider >=33 bit result, therefore we need signmaskL.
10601     signmask64I_regL(dst, diff);
10602   %}
10603 %}
10604 
10605 instruct cmpLTMask_reg_immI0(iRegIdst dst, iRegIsrc src1, immI_0 src2) %{
10606   match(Set dst (CmpLTMask src1 src2)); // if src1 < src2, return -1 else return 0
10607   format %{ "SRAWI   $dst, $src1, $src2 \t// CmpLTMask" %}
10608   size(4);
10609   ins_encode %{
10610     __ srawi($dst$$Register, $src1$$Register, 0x1f);
10611   %}
10612   ins_pipe(pipe_class_default);
10613 %}
10614 
10615 //----------Arithmetic Conversion Instructions---------------------------------
10616 
10617 // Convert to Byte  -- nop
10618 // Convert to Short -- nop
10619 
10620 // Convert to Int
10621 
10622 instruct convB2I_reg(iRegIdst dst, iRegIsrc src, immI_24 amount) %{
10623   match(Set dst (RShiftI (LShiftI src amount) amount));
10624   format %{ "EXTSB   $dst, $src \t// byte->int" %}
10625   size(4);
10626   ins_encode %{
10627     __ extsb($dst$$Register, $src$$Register);
10628   %}
10629   ins_pipe(pipe_class_default);
10630 %}
10631 
10632 instruct extsh(iRegIdst dst, iRegIsrc src) %{
10633   effect(DEF dst, USE src);
10634 
10635   size(4);
10636   ins_encode %{
10637     __ extsh($dst$$Register, $src$$Register);
10638   %}
10639   ins_pipe(pipe_class_default);
10640 %}
10641 
10642 // LShiftI 16 + RShiftI 16 converts short to int.
10643 instruct convS2I_reg(iRegIdst dst, iRegIsrc src, immI_16 amount) %{
10644   match(Set dst (RShiftI (LShiftI src amount) amount));
10645   format %{ "EXTSH   $dst, $src \t// short->int" %}
10646   size(4);
10647   ins_encode %{
10648     __ extsh($dst$$Register, $src$$Register);
10649   %}
10650   ins_pipe(pipe_class_default);
10651 %}
10652 
10653 // ConvL2I + ConvI2L: Sign extend int in long register.
10654 instruct sxtI_L2L_reg(iRegLdst dst, iRegLsrc src) %{
10655   match(Set dst (ConvI2L (ConvL2I src)));
10656 
10657   format %{ "EXTSW   $dst, $src \t// long->long" %}
10658   size(4);
10659   ins_encode %{
10660     __ extsw($dst$$Register, $src$$Register);
10661   %}
10662   ins_pipe(pipe_class_default);
10663 %}
10664 
10665 instruct convL2I_reg(iRegIdst dst, iRegLsrc src) %{
10666   match(Set dst (ConvL2I src));
10667   format %{ "MR      $dst, $src \t// long->int" %}
10668   // variable size, 0 or 4
10669   ins_encode %{
10670     __ mr_if_needed($dst$$Register, $src$$Register);
10671   %}
10672   ins_pipe(pipe_class_default);
10673 %}
10674 
10675 instruct convD2IRaw_regD(regD dst, regD src) %{
10676   // no match-rule, false predicate
10677   effect(DEF dst, USE src);
10678   predicate(false);
10679 
10680   format %{ "FCTIWZ $dst, $src \t// convD2I, $src != NaN" %}
10681   size(4);
10682   ins_encode %{
10683     __ fctiwz($dst$$FloatRegister, $src$$FloatRegister);
10684   %}
10685   ins_pipe(pipe_class_default);
10686 %}
10687 
10688 instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsRegSrc crx, stackSlotL src) %{
10689   // no match-rule, false predicate
10690   effect(DEF dst, USE crx, USE src);
10691   predicate(false);
10692 
10693   ins_variable_size_depending_on_alignment(true);
10694 
10695   format %{ "cmovI   $crx, $dst, $src" %}
10696   // Worst case is branch + move + stop, no stop without scheduler.
10697   size(8);
10698   ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) );
10699   ins_pipe(pipe_class_default);
10700 %}
10701 
10702 instruct cmovI_bso_reg(iRegIdst dst, flagsRegSrc crx, regD src) %{
10703   // no match-rule, false predicate
10704   effect(DEF dst, USE crx, USE src);
10705   predicate(false);
10706 
10707   ins_variable_size_depending_on_alignment(true);
10708 
10709   format %{ "cmovI   $crx, $dst, $src" %}
10710   // Worst case is branch + move + stop, no stop without scheduler.
10711   size(8);
10712   ins_encode( enc_cmove_bso_reg(dst, crx, src) );
10713   ins_pipe(pipe_class_default);
10714 %}
10715 
10716 instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, stackSlotL mem) %{
10717   // no match-rule, false predicate
10718   effect(DEF dst, USE crx, USE mem);
10719   predicate(false);
10720 
10721   format %{ "CmovI   $dst, $crx, $mem \t// postalloc expanded" %}
10722   postalloc_expand %{
10723     //
10724     // replaces
10725     //
10726     //   region  dst  crx  mem
10727     //    \       |    |   /
10728     //     dst=cmovI_bso_stackSlotL_conLvalue0
10729     //
10730     // with
10731     //
10732     //   region  dst
10733     //    \       /
10734     //     dst=loadConI16(0)
10735     //      |
10736     //      ^  region  dst  crx  mem
10737     //      |   \       |    |    /
10738     //      dst=cmovI_bso_stackSlotL
10739     //
10740 
10741     // Create new nodes.
10742     MachNode *m1 = new loadConI16Node();
10743     MachNode *m2 = new cmovI_bso_stackSlotLNode();
10744 
10745     // inputs for new nodes
10746     m1->add_req(n_region);
10747     m2->add_req(n_region, n_crx, n_mem);
10748 
10749     // precedences for new nodes
10750     m2->add_prec(m1);
10751 
10752     // operands for new nodes
10753     m1->_opnds[0] = op_dst;
10754     m1->_opnds[1] = new immI16Oper(0);
10755 
10756     m2->_opnds[0] = op_dst;
10757     m2->_opnds[1] = op_crx;
10758     m2->_opnds[2] = op_mem;
10759 
10760     // registers for new nodes
10761     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10762     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10763 
10764     // Insert new nodes.
10765     nodes->push(m1);
10766     nodes->push(m2);
10767   %}
10768 %}
10769 
10770 instruct cmovI_bso_reg_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, regD src) %{
10771   // no match-rule, false predicate
10772   effect(DEF dst, USE crx, USE src);
10773   predicate(false);
10774 
10775   format %{ "CmovI   $dst, $crx, $src \t// postalloc expanded" %}
10776   postalloc_expand %{
10777     //
10778     // replaces
10779     //
10780     //   region  dst  crx  src
10781     //    \       |    |   /
10782     //     dst=cmovI_bso_reg_conLvalue0
10783     //
10784     // with
10785     //
10786     //   region  dst
10787     //    \       /
10788     //     dst=loadConI16(0)
10789     //      |
10790     //      ^  region  dst  crx  src
10791     //      |   \       |    |    /
10792     //      dst=cmovI_bso_reg
10793     //
10794 
10795     // Create new nodes.
10796     MachNode *m1 = new loadConI16Node();
10797     MachNode *m2 = new cmovI_bso_regNode();
10798 
10799     // inputs for new nodes
10800     m1->add_req(n_region);
10801     m2->add_req(n_region, n_crx, n_src);
10802 
10803     // precedences for new nodes
10804     m2->add_prec(m1);
10805 
10806     // operands for new nodes
10807     m1->_opnds[0] = op_dst;
10808     m1->_opnds[1] = new immI16Oper(0);
10809 
10810     m2->_opnds[0] = op_dst;
10811     m2->_opnds[1] = op_crx;
10812     m2->_opnds[2] = op_src;
10813 
10814     // registers for new nodes
10815     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10816     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10817 
10818     // Insert new nodes.
10819     nodes->push(m1);
10820     nodes->push(m2);
10821   %}
10822 %}
10823 
10824 // Double to Int conversion, NaN is mapped to 0.
10825 instruct convD2I_reg_ExEx(iRegIdst dst, regD src) %{
10826   match(Set dst (ConvD2I src));
10827   predicate(!VM_Version::has_mtfprd());
10828   ins_cost(DEFAULT_COST);
10829 
10830   expand %{
10831     regD tmpD;
10832     stackSlotL tmpS;
10833     flagsReg crx;
10834     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10835     convD2IRaw_regD(tmpD, src);                         // Convert float to int (speculated).
10836     moveD2L_reg_stack(tmpS, tmpD);                      // Store float to stack (speculated).
10837     cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
10838   %}
10839 %}
10840 
10841 // Double to Int conversion, NaN is mapped to 0. Special version for Power8.
10842 instruct convD2I_reg_mffprd_ExEx(iRegIdst dst, regD src) %{
10843   match(Set dst (ConvD2I src));
10844   predicate(VM_Version::has_mtfprd());
10845   ins_cost(DEFAULT_COST);
10846 
10847   expand %{
10848     regD tmpD;
10849     flagsReg crx;
10850     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10851     convD2IRaw_regD(tmpD, src);                         // Convert float to int (speculated).
10852     cmovI_bso_reg_conLvalue0_Ex(dst, crx, tmpD);        // Cmove based on NaN check.
10853   %}
10854 %}
10855 
10856 instruct convF2IRaw_regF(regF dst, regF src) %{
10857   // no match-rule, false predicate
10858   effect(DEF dst, USE src);
10859   predicate(false);
10860 
10861   format %{ "FCTIWZ $dst, $src \t// convF2I, $src != NaN" %}
10862   size(4);
10863   ins_encode %{
10864     __ fctiwz($dst$$FloatRegister, $src$$FloatRegister);
10865   %}
10866   ins_pipe(pipe_class_default);
10867 %}
10868 
10869 // Float to Int conversion, NaN is mapped to 0.
10870 instruct convF2I_regF_ExEx(iRegIdst dst, regF src) %{
10871   match(Set dst (ConvF2I src));
10872   predicate(!VM_Version::has_mtfprd());
10873   ins_cost(DEFAULT_COST);
10874 
10875   expand %{
10876     regF tmpF;
10877     stackSlotL tmpS;
10878     flagsReg crx;
10879     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10880     convF2IRaw_regF(tmpF, src);                         // Convert float to int (speculated).
10881     moveF2L_reg_stack(tmpS, tmpF);                      // Store float to stack (speculated).
10882     cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
10883   %}
10884 %}
10885 
10886 // Float to Int conversion, NaN is mapped to 0. Special version for Power8.
10887 instruct convF2I_regF_mffprd_ExEx(iRegIdst dst, regF src) %{
10888   match(Set dst (ConvF2I src));
10889   predicate(VM_Version::has_mtfprd());
10890   ins_cost(DEFAULT_COST);
10891 
10892   expand %{
10893     regF tmpF;
10894     flagsReg crx;
10895     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10896     convF2IRaw_regF(tmpF, src);                         // Convert float to int (speculated).
10897     cmovI_bso_reg_conLvalue0_Ex(dst, crx, tmpF);        // Cmove based on NaN check.
10898   %}
10899 %}
10900 
10901 // Convert to Long
10902 
10903 instruct convI2L_reg(iRegLdst dst, iRegIsrc src) %{
10904   match(Set dst (ConvI2L src));
10905   format %{ "EXTSW   $dst, $src \t// int->long" %}
10906   size(4);
10907   ins_encode %{
10908     __ extsw($dst$$Register, $src$$Register);
10909   %}
10910   ins_pipe(pipe_class_default);
10911 %}
10912 
10913 // Zero-extend: convert unsigned int to long (convUI2L).
10914 instruct zeroExtendL_regI(iRegLdst dst, iRegIsrc src, immL_32bits mask) %{
10915   match(Set dst (AndL (ConvI2L src) mask));
10916   ins_cost(DEFAULT_COST);
10917 
10918   format %{ "CLRLDI  $dst, $src, #32 \t// zero-extend int to long" %}
10919   size(4);
10920   ins_encode %{
10921     __ clrldi($dst$$Register, $src$$Register, 32);
10922   %}
10923   ins_pipe(pipe_class_default);
10924 %}
10925 
10926 // Zero-extend: convert unsigned int to long in long register.
10927 instruct zeroExtendL_regL(iRegLdst dst, iRegLsrc src, immL_32bits mask) %{
10928   match(Set dst (AndL src mask));
10929   ins_cost(DEFAULT_COST);
10930 
10931   format %{ "CLRLDI  $dst, $src, #32 \t// zero-extend int to long" %}
10932   size(4);
10933   ins_encode %{
10934     __ clrldi($dst$$Register, $src$$Register, 32);
10935   %}
10936   ins_pipe(pipe_class_default);
10937 %}
10938 
10939 instruct convF2LRaw_regF(regF dst, regF src) %{
10940   // no match-rule, false predicate
10941   effect(DEF dst, USE src);
10942   predicate(false);
10943 
10944   format %{ "FCTIDZ $dst, $src \t// convF2L, $src != NaN" %}
10945   size(4);
10946   ins_encode %{
10947     __ fctidz($dst$$FloatRegister, $src$$FloatRegister);
10948   %}
10949   ins_pipe(pipe_class_default);
10950 %}
10951 
10952 instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL src) %{
10953   // no match-rule, false predicate
10954   effect(DEF dst, USE crx, USE src);
10955   predicate(false);
10956 
10957   ins_variable_size_depending_on_alignment(true);
10958 
10959   format %{ "cmovL   $crx, $dst, $src" %}
10960   // Worst case is branch + move + stop, no stop without scheduler.
10961   size(8);
10962   ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) );
10963   ins_pipe(pipe_class_default);
10964 %}
10965 
10966 instruct cmovL_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
10967   // no match-rule, false predicate
10968   effect(DEF dst, USE crx, USE src);
10969   predicate(false);
10970 
10971   ins_variable_size_depending_on_alignment(true);
10972 
10973   format %{ "cmovL   $crx, $dst, $src" %}
10974   // Worst case is branch + move + stop, no stop without scheduler.
10975   size(8);
10976   ins_encode( enc_cmove_bso_reg(dst, crx, src) );
10977   ins_pipe(pipe_class_default);
10978 %}
10979 
10980 instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, stackSlotL mem) %{
10981   // no match-rule, false predicate
10982   effect(DEF dst, USE crx, USE mem);
10983   predicate(false);
10984 
10985   format %{ "CmovL   $dst, $crx, $mem \t// postalloc expanded" %}
10986   postalloc_expand %{
10987     //
10988     // replaces
10989     //
10990     //   region  dst  crx  mem
10991     //    \       |    |   /
10992     //     dst=cmovL_bso_stackSlotL_conLvalue0
10993     //
10994     // with
10995     //
10996     //   region  dst
10997     //    \       /
10998     //     dst=loadConL16(0)
10999     //      |
11000     //      ^  region  dst  crx  mem
11001     //      |   \       |    |    /
11002     //      dst=cmovL_bso_stackSlotL
11003     //
11004 
11005     // Create new nodes.
11006     MachNode *m1 = new loadConL16Node();
11007     MachNode *m2 = new cmovL_bso_stackSlotLNode();
11008 
11009     // inputs for new nodes
11010     m1->add_req(n_region);
11011     m2->add_req(n_region, n_crx, n_mem);
11012     m2->add_prec(m1);
11013 
11014     // operands for new nodes
11015     m1->_opnds[0] = op_dst;
11016     m1->_opnds[1] = new immL16Oper(0);
11017     m2->_opnds[0] = op_dst;
11018     m2->_opnds[1] = op_crx;
11019     m2->_opnds[2] = op_mem;
11020 
11021     // registers for new nodes
11022     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
11023     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
11024 
11025     // Insert new nodes.
11026     nodes->push(m1);
11027     nodes->push(m2);
11028   %}
11029 %}
11030 
11031 instruct cmovL_bso_reg_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, regD src) %{
11032   // no match-rule, false predicate
11033   effect(DEF dst, USE crx, USE src);
11034   predicate(false);
11035 
11036   format %{ "CmovL   $dst, $crx, $src \t// postalloc expanded" %}
11037   postalloc_expand %{
11038     //
11039     // replaces
11040     //
11041     //   region  dst  crx  src
11042     //    \       |    |   /
11043     //     dst=cmovL_bso_reg_conLvalue0
11044     //
11045     // with
11046     //
11047     //   region  dst
11048     //    \       /
11049     //     dst=loadConL16(0)
11050     //      |
11051     //      ^  region  dst  crx  src
11052     //      |   \       |    |    /
11053     //      dst=cmovL_bso_reg
11054     //
11055 
11056     // Create new nodes.
11057     MachNode *m1 = new loadConL16Node();
11058     MachNode *m2 = new cmovL_bso_regNode();
11059 
11060     // inputs for new nodes
11061     m1->add_req(n_region);
11062     m2->add_req(n_region, n_crx, n_src);
11063     m2->add_prec(m1);
11064 
11065     // operands for new nodes
11066     m1->_opnds[0] = op_dst;
11067     m1->_opnds[1] = new immL16Oper(0);
11068     m2->_opnds[0] = op_dst;
11069     m2->_opnds[1] = op_crx;
11070     m2->_opnds[2] = op_src;
11071 
11072     // registers for new nodes
11073     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
11074     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
11075 
11076     // Insert new nodes.
11077     nodes->push(m1);
11078     nodes->push(m2);
11079   %}
11080 %}
11081 
11082 // Float to Long conversion, NaN is mapped to 0.
11083 instruct convF2L_reg_ExEx(iRegLdst dst, regF src) %{
11084   match(Set dst (ConvF2L src));
11085   predicate(!VM_Version::has_mtfprd());
11086   ins_cost(DEFAULT_COST);
11087 
11088   expand %{
11089     regF tmpF;
11090     stackSlotL tmpS;
11091     flagsReg crx;
11092     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
11093     convF2LRaw_regF(tmpF, src);                         // Convert float to long (speculated).
11094     moveF2L_reg_stack(tmpS, tmpF);                      // Store float to stack (speculated).
11095     cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
11096   %}
11097 %}
11098 
11099 // Float to Long conversion, NaN is mapped to 0. Special version for Power8.
11100 instruct convF2L_reg_mffprd_ExEx(iRegLdst dst, regF src) %{
11101   match(Set dst (ConvF2L src));
11102   predicate(VM_Version::has_mtfprd());
11103   ins_cost(DEFAULT_COST);
11104 
11105   expand %{
11106     regF tmpF;
11107     flagsReg crx;
11108     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
11109     convF2LRaw_regF(tmpF, src);                         // Convert float to long (speculated).
11110     cmovL_bso_reg_conLvalue0_Ex(dst, crx, tmpF);        // Cmove based on NaN check.
11111   %}
11112 %}
11113 
11114 instruct convD2LRaw_regD(regD dst, regD src) %{
11115   // no match-rule, false predicate
11116   effect(DEF dst, USE src);
11117   predicate(false);
11118 
11119   format %{ "FCTIDZ $dst, $src \t// convD2L $src != NaN" %}
11120   size(4);
11121   ins_encode %{
11122     __ fctidz($dst$$FloatRegister, $src$$FloatRegister);
11123   %}
11124   ins_pipe(pipe_class_default);
11125 %}
11126 
11127 // Double to Long conversion, NaN is mapped to 0.
11128 instruct convD2L_reg_ExEx(iRegLdst dst, regD src) %{
11129   match(Set dst (ConvD2L src));
11130   predicate(!VM_Version::has_mtfprd());
11131   ins_cost(DEFAULT_COST);
11132 
11133   expand %{
11134     regD tmpD;
11135     stackSlotL tmpS;
11136     flagsReg crx;
11137     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
11138     convD2LRaw_regD(tmpD, src);                         // Convert float to long (speculated).
11139     moveD2L_reg_stack(tmpS, tmpD);                      // Store float to stack (speculated).
11140     cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
11141   %}
11142 %}
11143 
11144 // Double to Long conversion, NaN is mapped to 0. Special version for Power8.
11145 instruct convD2L_reg_mffprd_ExEx(iRegLdst dst, regD src) %{
11146   match(Set dst (ConvD2L src));
11147   predicate(VM_Version::has_mtfprd());
11148   ins_cost(DEFAULT_COST);
11149 
11150   expand %{
11151     regD tmpD;
11152     flagsReg crx;
11153     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
11154     convD2LRaw_regD(tmpD, src);                         // Convert float to long (speculated).
11155     cmovL_bso_reg_conLvalue0_Ex(dst, crx, tmpD);        // Cmove based on NaN check.
11156   %}
11157 %}
11158 
11159 // Convert to Float
11160 
11161 // Placed here as needed in expand.
11162 instruct convL2DRaw_regD(regD dst, regD src) %{
11163   // no match-rule, false predicate
11164   effect(DEF dst, USE src);
11165   predicate(false);
11166 
11167   format %{ "FCFID $dst, $src \t// convL2D" %}
11168   size(4);
11169   ins_encode %{
11170     __ fcfid($dst$$FloatRegister, $src$$FloatRegister);
11171   %}
11172   ins_pipe(pipe_class_default);
11173 %}
11174 
11175 // Placed here as needed in expand.
11176 instruct convD2F_reg(regF dst, regD src) %{
11177   match(Set dst (ConvD2F src));
11178   format %{ "FRSP    $dst, $src \t// convD2F" %}
11179   size(4);
11180   ins_encode %{
11181     __ frsp($dst$$FloatRegister, $src$$FloatRegister);
11182   %}
11183   ins_pipe(pipe_class_default);
11184 %}
11185 
11186 // Integer to Float conversion.
11187 instruct convI2F_ireg_Ex(regF dst, iRegIsrc src) %{
11188   match(Set dst (ConvI2F src));
11189   predicate(!VM_Version::has_fcfids());
11190   ins_cost(DEFAULT_COST);
11191 
11192   expand %{
11193     iRegLdst tmpL;
11194     stackSlotL tmpS;
11195     regD tmpD;
11196     regD tmpD2;
11197     convI2L_reg(tmpL, src);              // Sign-extension int to long.
11198     regL_to_stkL(tmpS, tmpL);            // Store long to stack.
11199     moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
11200     convL2DRaw_regD(tmpD2, tmpD);        // Convert to double.
11201     convD2F_reg(dst, tmpD2);             // Convert double to float.
11202   %}
11203 %}
11204 
11205 instruct convL2FRaw_regF(regF dst, regD src) %{
11206   // no match-rule, false predicate
11207   effect(DEF dst, USE src);
11208   predicate(false);
11209 
11210   format %{ "FCFIDS $dst, $src \t// convL2F" %}
11211   size(4);
11212   ins_encode %{
11213     __ fcfids($dst$$FloatRegister, $src$$FloatRegister);
11214   %}
11215   ins_pipe(pipe_class_default);
11216 %}
11217 
11218 // Integer to Float conversion. Special version for Power7.
11219 instruct convI2F_ireg_fcfids_Ex(regF dst, iRegIsrc src) %{
11220   match(Set dst (ConvI2F src));
11221   predicate(VM_Version::has_fcfids() && !VM_Version::has_mtfprd());
11222   ins_cost(DEFAULT_COST);
11223 
11224   expand %{
11225     iRegLdst tmpL;
11226     stackSlotL tmpS;
11227     regD tmpD;
11228     convI2L_reg(tmpL, src);              // Sign-extension int to long.
11229     regL_to_stkL(tmpS, tmpL);            // Store long to stack.
11230     moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
11231     convL2FRaw_regF(dst, tmpD);          // Convert to float.
11232   %}
11233 %}
11234 
11235 // Integer to Float conversion. Special version for Power8.
11236 instruct convI2F_ireg_mtfprd_Ex(regF dst, iRegIsrc src) %{
11237   match(Set dst (ConvI2F src));
11238   predicate(VM_Version::has_fcfids() && VM_Version::has_mtfprd());
11239   ins_cost(DEFAULT_COST);
11240 
11241   expand %{
11242     regD tmpD;
11243     moveI2D_reg(tmpD, src);
11244     convL2FRaw_regF(dst, tmpD);          // Convert to float.
11245   %}
11246 %}
11247 
11248 // L2F to avoid runtime call.
11249 instruct convL2F_ireg_fcfids_Ex(regF dst, iRegLsrc src) %{
11250   match(Set dst (ConvL2F src));
11251   predicate(VM_Version::has_fcfids() && !VM_Version::has_mtfprd());
11252   ins_cost(DEFAULT_COST);
11253 
11254   expand %{
11255     stackSlotL tmpS;
11256     regD tmpD;
11257     regL_to_stkL(tmpS, src);             // Store long to stack.
11258     moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
11259     convL2FRaw_regF(dst, tmpD);          // Convert to float.
11260   %}
11261 %}
11262 
11263 // L2F to avoid runtime call.  Special version for Power8.
11264 instruct convL2F_ireg_mtfprd_Ex(regF dst, iRegLsrc src) %{
11265   match(Set dst (ConvL2F src));
11266   predicate(VM_Version::has_fcfids() && VM_Version::has_mtfprd());
11267   ins_cost(DEFAULT_COST);
11268 
11269   expand %{
11270     regD tmpD;
11271     moveL2D_reg(tmpD, src);
11272     convL2FRaw_regF(dst, tmpD);          // Convert to float.
11273   %}
11274 %}
11275 
11276 // Moved up as used in expand.
11277 //instruct convD2F_reg(regF dst, regD src) %{%}
11278 
11279 // Convert to Double
11280 
11281 // Integer to Double conversion.
11282 instruct convI2D_reg_Ex(regD dst, iRegIsrc src) %{
11283   match(Set dst (ConvI2D src));
11284   predicate(!VM_Version::has_mtfprd());
11285   ins_cost(DEFAULT_COST);
11286 
11287   expand %{
11288     iRegLdst tmpL;
11289     stackSlotL tmpS;
11290     regD tmpD;
11291     convI2L_reg(tmpL, src);              // Sign-extension int to long.
11292     regL_to_stkL(tmpS, tmpL);            // Store long to stack.
11293     moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
11294     convL2DRaw_regD(dst, tmpD);          // Convert to double.
11295   %}
11296 %}
11297 
11298 // Integer to Double conversion. Special version for Power8.
11299 instruct convI2D_reg_mtfprd_Ex(regD dst, iRegIsrc src) %{
11300   match(Set dst (ConvI2D src));
11301   predicate(VM_Version::has_mtfprd());
11302   ins_cost(DEFAULT_COST);
11303 
11304   expand %{
11305     regD tmpD;
11306     moveI2D_reg(tmpD, src);
11307     convL2DRaw_regD(dst, tmpD);          // Convert to double.
11308   %}
11309 %}
11310 
11311 // Long to Double conversion
11312 instruct convL2D_reg_Ex(regD dst, stackSlotL src) %{
11313   match(Set dst (ConvL2D src));
11314   ins_cost(DEFAULT_COST + MEMORY_REF_COST);
11315 
11316   expand %{
11317     regD tmpD;
11318     moveL2D_stack_reg(tmpD, src);
11319     convL2DRaw_regD(dst, tmpD);
11320   %}
11321 %}
11322 
11323 // Long to Double conversion. Special version for Power8.
11324 instruct convL2D_reg_mtfprd_Ex(regD dst, iRegLsrc src) %{
11325   match(Set dst (ConvL2D src));
11326   predicate(VM_Version::has_mtfprd());
11327   ins_cost(DEFAULT_COST);
11328 
11329   expand %{
11330     regD tmpD;
11331     moveL2D_reg(tmpD, src);
11332     convL2DRaw_regD(dst, tmpD);          // Convert to double.
11333   %}
11334 %}
11335 
11336 instruct convF2D_reg(regD dst, regF src) %{
11337   match(Set dst (ConvF2D src));
11338   format %{ "FMR     $dst, $src \t// float->double" %}
11339   // variable size, 0 or 4
11340   ins_encode %{
11341     __ fmr_if_needed($dst$$FloatRegister, $src$$FloatRegister);
11342   %}
11343   ins_pipe(pipe_class_default);
11344 %}
11345 
11346 //----------Control Flow Instructions------------------------------------------
11347 // Compare Instructions
11348 
11349 // Compare Integers
11350 instruct cmpI_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{
11351   match(Set crx (CmpI src1 src2));
11352   size(4);
11353   format %{ "CMPW    $crx, $src1, $src2" %}
11354   ins_encode %{
11355     __ cmpw($crx$$CondRegister, $src1$$Register, $src2$$Register);
11356   %}
11357   ins_pipe(pipe_class_compare);
11358 %}
11359 
11360 instruct cmpI_reg_imm16(flagsReg crx, iRegIsrc src1, immI16 src2) %{
11361   match(Set crx (CmpI src1 src2));
11362   format %{ "CMPWI   $crx, $src1, $src2" %}
11363   size(4);
11364   ins_encode %{
11365     __ cmpwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11366   %}
11367   ins_pipe(pipe_class_compare);
11368 %}
11369 
11370 // (src1 & src2) == 0?
11371 instruct testI_reg_imm(flagsRegCR0 cr0, iRegIsrc src1, uimmI16 src2, immI_0 zero) %{
11372   match(Set cr0 (CmpI (AndI src1 src2) zero));
11373   // r0 is killed
11374   format %{ "ANDI    R0, $src1, $src2 \t// BTST int" %}
11375   size(4);
11376   ins_encode %{
11377     __ andi_(R0, $src1$$Register, $src2$$constant);
11378   %}
11379   ins_pipe(pipe_class_compare);
11380 %}
11381 
11382 instruct cmpL_reg_reg(flagsReg crx, iRegLsrc src1, iRegLsrc src2) %{
11383   match(Set crx (CmpL src1 src2));
11384   format %{ "CMPD    $crx, $src1, $src2" %}
11385   size(4);
11386   ins_encode %{
11387     __ cmpd($crx$$CondRegister, $src1$$Register, $src2$$Register);
11388   %}
11389   ins_pipe(pipe_class_compare);
11390 %}
11391 
11392 instruct cmpL_reg_imm16(flagsReg crx, iRegLsrc src1, immL16 src2) %{
11393   match(Set crx (CmpL src1 src2));
11394   format %{ "CMPDI   $crx, $src1, $src2" %}
11395   size(4);
11396   ins_encode %{
11397     __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11398   %}
11399   ins_pipe(pipe_class_compare);
11400 %}
11401 
11402 // Added CmpUL for LoopPredicate.
11403 instruct cmpUL_reg_reg(flagsReg crx, iRegLsrc src1, iRegLsrc src2) %{
11404   match(Set crx (CmpUL src1 src2));
11405   format %{ "CMPLD   $crx, $src1, $src2" %}
11406   size(4);
11407   ins_encode %{
11408     __ cmpld($crx$$CondRegister, $src1$$Register, $src2$$Register);
11409   %}
11410   ins_pipe(pipe_class_compare);
11411 %}
11412 
11413 instruct cmpUL_reg_imm16(flagsReg crx, iRegLsrc src1, uimmL16 src2) %{
11414   match(Set crx (CmpUL src1 src2));
11415   format %{ "CMPLDI  $crx, $src1, $src2" %}
11416   size(4);
11417   ins_encode %{
11418     __ cmpldi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11419   %}
11420   ins_pipe(pipe_class_compare);
11421 %}
11422 
11423 instruct testL_reg_reg(flagsRegCR0 cr0, iRegLsrc src1, iRegLsrc src2, immL_0 zero) %{
11424   match(Set cr0 (CmpL (AndL src1 src2) zero));
11425   // r0 is killed
11426   format %{ "AND     R0, $src1, $src2 \t// BTST long" %}
11427   size(4);
11428   ins_encode %{
11429     __ and_(R0, $src1$$Register, $src2$$Register);
11430   %}
11431   ins_pipe(pipe_class_compare);
11432 %}
11433 
11434 instruct testL_reg_imm(flagsRegCR0 cr0, iRegLsrc src1, uimmL16 src2, immL_0 zero) %{
11435   match(Set cr0 (CmpL (AndL src1 src2) zero));
11436   // r0 is killed
11437   format %{ "ANDI    R0, $src1, $src2 \t// BTST long" %}
11438   size(4);
11439   ins_encode %{
11440     __ andi_(R0, $src1$$Register, $src2$$constant);
11441   %}
11442   ins_pipe(pipe_class_compare);
11443 %}
11444 
11445 // Manifest a CmpL3 result in an integer register.
11446 instruct cmpL3_reg_reg(iRegIdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
11447   match(Set dst (CmpL3 src1 src2));
11448   effect(KILL cr0);
11449   ins_cost(DEFAULT_COST * 5);
11450   size((VM_Version::has_brw() ? 16 : 20));
11451 
11452   format %{ "cmpL3_reg_reg $dst, $src1, $src2" %}
11453 
11454   ins_encode %{
11455     __ cmpd(CCR0, $src1$$Register, $src2$$Register);
11456     __ set_cmp3($dst$$Register);
11457   %}
11458   ins_pipe(pipe_class_default);
11459 %}
11460 
11461 // Implicit range checks.
11462 // A range check in the ideal world has one of the following shapes:
11463 //  - (If le (CmpU length index)), (IfTrue  throw exception)
11464 //  - (If lt (CmpU index length)), (IfFalse throw exception)
11465 //
11466 // Match range check 'If le (CmpU length index)'.
11467 instruct rangeCheck_iReg_uimm15(cmpOp cmp, iRegIsrc src_length, uimmI15 index, label labl) %{
11468   match(If cmp (CmpU src_length index));
11469   effect(USE labl);
11470   predicate(TrapBasedRangeChecks &&
11471             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le &&
11472             PROB_UNLIKELY(_leaf->as_If()->_prob) >= PROB_ALWAYS &&
11473             (Matcher::branches_to_uncommon_trap(_leaf)));
11474 
11475   ins_is_TrapBasedCheckNode(true);
11476 
11477   format %{ "TWI     $index $cmp $src_length \t// RangeCheck => trap $labl" %}
11478   size(4);
11479   ins_encode %{
11480     if ($cmp$$cmpcode == 0x1 /* less_equal */) {
11481       __ trap_range_check_le($src_length$$Register, $index$$constant);
11482     } else {
11483       // Both successors are uncommon traps, probability is 0.
11484       // Node got flipped during fixup flow.
11485       assert($cmp$$cmpcode == 0x9, "must be greater");
11486       __ trap_range_check_g($src_length$$Register, $index$$constant);
11487     }
11488   %}
11489   ins_pipe(pipe_class_trap);
11490 %}
11491 
11492 // Match range check 'If lt (CmpU index length)'.
11493 instruct rangeCheck_iReg_iReg(cmpOp cmp, iRegIsrc src_index, iRegIsrc src_length, label labl) %{
11494   match(If cmp (CmpU src_index src_length));
11495   effect(USE labl);
11496   predicate(TrapBasedRangeChecks &&
11497             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
11498             _leaf->as_If()->_prob >= PROB_ALWAYS &&
11499             (Matcher::branches_to_uncommon_trap(_leaf)));
11500 
11501   ins_is_TrapBasedCheckNode(true);
11502 
11503   format %{ "TW      $src_index $cmp $src_length \t// RangeCheck => trap $labl" %}
11504   size(4);
11505   ins_encode %{
11506     if ($cmp$$cmpcode == 0x0 /* greater_equal */) {
11507       __ trap_range_check_ge($src_index$$Register, $src_length$$Register);
11508     } else {
11509       // Both successors are uncommon traps, probability is 0.
11510       // Node got flipped during fixup flow.
11511       assert($cmp$$cmpcode == 0x8, "must be less");
11512       __ trap_range_check_l($src_index$$Register, $src_length$$Register);
11513     }
11514   %}
11515   ins_pipe(pipe_class_trap);
11516 %}
11517 
11518 // Match range check 'If lt (CmpU index length)'.
11519 instruct rangeCheck_uimm15_iReg(cmpOp cmp, iRegIsrc src_index, uimmI15 length, label labl) %{
11520   match(If cmp (CmpU src_index length));
11521   effect(USE labl);
11522   predicate(TrapBasedRangeChecks &&
11523             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
11524             _leaf->as_If()->_prob >= PROB_ALWAYS &&
11525             (Matcher::branches_to_uncommon_trap(_leaf)));
11526 
11527   ins_is_TrapBasedCheckNode(true);
11528 
11529   format %{ "TWI     $src_index $cmp $length \t// RangeCheck => trap $labl" %}
11530   size(4);
11531   ins_encode %{
11532     if ($cmp$$cmpcode == 0x0 /* greater_equal */) {
11533       __ trap_range_check_ge($src_index$$Register, $length$$constant);
11534     } else {
11535       // Both successors are uncommon traps, probability is 0.
11536       // Node got flipped during fixup flow.
11537       assert($cmp$$cmpcode == 0x8, "must be less");
11538       __ trap_range_check_l($src_index$$Register, $length$$constant);
11539     }
11540   %}
11541   ins_pipe(pipe_class_trap);
11542 %}
11543 
11544 instruct compU_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{
11545   match(Set crx (CmpU src1 src2));
11546   format %{ "CMPLW   $crx, $src1, $src2 \t// unsigned" %}
11547   size(4);
11548   ins_encode %{
11549     __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register);
11550   %}
11551   ins_pipe(pipe_class_compare);
11552 %}
11553 
11554 instruct compU_reg_uimm16(flagsReg crx, iRegIsrc src1, uimmI16 src2) %{
11555   match(Set crx (CmpU src1 src2));
11556   size(4);
11557   format %{ "CMPLWI  $crx, $src1, $src2" %}
11558   ins_encode %{
11559     __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11560   %}
11561   ins_pipe(pipe_class_compare);
11562 %}
11563 
11564 // Implicit zero checks (more implicit null checks).
11565 // No constant pool entries required.
11566 instruct zeroCheckN_iReg_imm0(cmpOp cmp, iRegNsrc value, immN_0 zero, label labl) %{
11567   match(If cmp (CmpN value zero));
11568   effect(USE labl);
11569   predicate(TrapBasedNullChecks &&
11570             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
11571             _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) &&
11572             Matcher::branches_to_uncommon_trap(_leaf));
11573   ins_cost(1);
11574 
11575   ins_is_TrapBasedCheckNode(true);
11576 
11577   format %{ "TDI     $value $cmp $zero \t// ZeroCheckN => trap $labl" %}
11578   size(4);
11579   ins_encode %{
11580     if ($cmp$$cmpcode == 0xA) {
11581       __ trap_null_check($value$$Register);
11582     } else {
11583       // Both successors are uncommon traps, probability is 0.
11584       // Node got flipped during fixup flow.
11585       assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)");
11586       __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned);
11587     }
11588   %}
11589   ins_pipe(pipe_class_trap);
11590 %}
11591 
11592 // Compare narrow oops.
11593 instruct cmpN_reg_reg(flagsReg crx, iRegNsrc src1, iRegNsrc src2) %{
11594   match(Set crx (CmpN src1 src2));
11595 
11596   size(4);
11597   ins_cost(2);
11598   format %{ "CMPLW   $crx, $src1, $src2 \t// compressed ptr" %}
11599   ins_encode %{
11600     __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register);
11601   %}
11602   ins_pipe(pipe_class_compare);
11603 %}
11604 
11605 instruct cmpN_reg_imm0(flagsReg crx, iRegNsrc src1, immN_0 src2) %{
11606   match(Set crx (CmpN src1 src2));
11607   // Make this more expensive than zeroCheckN_iReg_imm0.
11608   ins_cost(2);
11609 
11610   format %{ "CMPLWI  $crx, $src1, $src2 \t// compressed ptr" %}
11611   size(4);
11612   ins_encode %{
11613     __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11614   %}
11615   ins_pipe(pipe_class_compare);
11616 %}
11617 
11618 // Implicit zero checks (more implicit null checks).
11619 // No constant pool entries required.
11620 instruct zeroCheckP_reg_imm0(cmpOp cmp, iRegP_N2P value, immP_0 zero, label labl) %{
11621   match(If cmp (CmpP value zero));
11622   effect(USE labl);
11623   predicate(TrapBasedNullChecks &&
11624             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
11625             _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) &&
11626             Matcher::branches_to_uncommon_trap(_leaf));
11627   ins_cost(1); // Should not be cheaper than zeroCheckN.
11628 
11629   ins_is_TrapBasedCheckNode(true);
11630 
11631   format %{ "TDI     $value $cmp $zero \t// ZeroCheckP => trap $labl" %}
11632   size(4);
11633   ins_encode %{
11634     if ($cmp$$cmpcode == 0xA) {
11635       __ trap_null_check($value$$Register);
11636     } else {
11637       // Both successors are uncommon traps, probability is 0.
11638       // Node got flipped during fixup flow.
11639       assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)");
11640       __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned);
11641     }
11642   %}
11643   ins_pipe(pipe_class_trap);
11644 %}
11645 
11646 // Compare Pointers
11647 instruct cmpP_reg_reg(flagsReg crx, iRegP_N2P src1, iRegP_N2P src2) %{
11648   match(Set crx (CmpP src1 src2));
11649   format %{ "CMPLD   $crx, $src1, $src2 \t// ptr" %}
11650   size(4);
11651   ins_encode %{
11652     __ cmpld($crx$$CondRegister, $src1$$Register, $src2$$Register);
11653   %}
11654   ins_pipe(pipe_class_compare);
11655 %}
11656 
11657 instruct cmpP_reg_null(flagsReg crx, iRegP_N2P src1, immP_0or1 src2) %{
11658   match(Set crx (CmpP src1 src2));
11659   format %{ "CMPLDI   $crx, $src1, $src2 \t// ptr" %}
11660   size(4);
11661   ins_encode %{
11662     __ cmpldi($crx$$CondRegister, $src1$$Register, (int)((short)($src2$$constant & 0xFFFF)));
11663   %}
11664   ins_pipe(pipe_class_compare);
11665 %}
11666 
11667 // Used in postalloc expand.
11668 instruct cmpP_reg_imm16(flagsReg crx, iRegPsrc src1, immL16 src2) %{
11669   // This match rule prevents reordering of node before a safepoint.
11670   // This only makes sense if this instructions is used exclusively
11671   // for the expansion of EncodeP!
11672   match(Set crx (CmpP src1 src2));
11673   predicate(false);
11674 
11675   format %{ "CMPDI   $crx, $src1, $src2" %}
11676   size(4);
11677   ins_encode %{
11678     __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11679   %}
11680   ins_pipe(pipe_class_compare);
11681 %}
11682 
11683 //----------Float Compares----------------------------------------------------
11684 
11685 instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{
11686   // Needs matchrule, see cmpDUnordered.
11687   match(Set crx (CmpF src1 src2));
11688   // no match-rule, false predicate
11689   predicate(false);
11690 
11691   format %{ "cmpFUrd $crx, $src1, $src2" %}
11692   size(4);
11693   ins_encode %{
11694     __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister);
11695   %}
11696   ins_pipe(pipe_class_default);
11697 %}
11698 
11699 instruct cmov_bns_less(flagsReg crx) %{
11700   // no match-rule, false predicate
11701   effect(DEF crx);
11702   predicate(false);
11703 
11704   ins_variable_size_depending_on_alignment(true);
11705 
11706   format %{ "cmov    $crx" %}
11707   // Worst case is branch + move + stop, no stop without scheduler.
11708   size(12);
11709   ins_encode %{
11710     Label done;
11711     __ bns($crx$$CondRegister, done);        // not unordered -> keep crx
11712     __ li(R0, 0);
11713     __ cmpwi($crx$$CondRegister, R0, 1);     // unordered -> set crx to 'less'
11714     __ bind(done);
11715   %}
11716   ins_pipe(pipe_class_default);
11717 %}
11718 
11719 // Compare floating, generate condition code.
11720 instruct cmpF_reg_reg_Ex(flagsReg crx, regF src1, regF src2) %{
11721   // FIXME: should we match 'If cmp (CmpF src1 src2))' ??
11722   //
11723   // The following code sequence occurs a lot in mpegaudio:
11724   //
11725   // block BXX:
11726   // 0: instruct cmpFUnordered_reg_reg (cmpF_reg_reg-0):
11727   //    cmpFUrd CCR6, F11, F9
11728   // 4: instruct cmov_bns_less (cmpF_reg_reg-1):
11729   //    cmov CCR6
11730   // 8: instruct branchConSched:
11731   //    B_FARle CCR6, B56  P=0.500000 C=-1.000000
11732   match(Set crx (CmpF src1 src2));
11733   ins_cost(DEFAULT_COST+BRANCH_COST);
11734 
11735   format %{ "CmpF    $crx, $src1, $src2 \t// postalloc expanded" %}
11736   postalloc_expand %{
11737     //
11738     // replaces
11739     //
11740     //   region  src1  src2
11741     //    \       |     |
11742     //     crx=cmpF_reg_reg
11743     //
11744     // with
11745     //
11746     //   region  src1  src2
11747     //    \       |     |
11748     //     crx=cmpFUnordered_reg_reg
11749     //      |
11750     //      ^  region
11751     //      |   \
11752     //      crx=cmov_bns_less
11753     //
11754 
11755     // Create new nodes.
11756     MachNode *m1 = new cmpFUnordered_reg_regNode();
11757     MachNode *m2 = new cmov_bns_lessNode();
11758 
11759     // inputs for new nodes
11760     m1->add_req(n_region, n_src1, n_src2);
11761     m2->add_req(n_region);
11762     m2->add_prec(m1);
11763 
11764     // operands for new nodes
11765     m1->_opnds[0] = op_crx;
11766     m1->_opnds[1] = op_src1;
11767     m1->_opnds[2] = op_src2;
11768     m2->_opnds[0] = op_crx;
11769 
11770     // registers for new nodes
11771     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11772     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11773 
11774     // Insert new nodes.
11775     nodes->push(m1);
11776     nodes->push(m2);
11777   %}
11778 %}
11779 
11780 // Compare float, generate -1,0,1
11781 instruct cmpF3_reg_reg(iRegIdst dst, regF src1, regF src2, flagsRegCR0 cr0) %{
11782   match(Set dst (CmpF3 src1 src2));
11783   effect(KILL cr0);
11784   ins_cost(DEFAULT_COST * 6);
11785   size((VM_Version::has_brw() ? 20 : 24));
11786 
11787   format %{ "cmpF3_reg_reg $dst, $src1, $src2" %}
11788 
11789   ins_encode %{
11790     __ fcmpu(CCR0, $src1$$FloatRegister, $src2$$FloatRegister);
11791     __ set_cmpu3($dst$$Register, true); // C2 requires unordered to get treated like less
11792   %}
11793   ins_pipe(pipe_class_default);
11794 %}
11795 
11796 instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
11797   // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the
11798   // node right before the conditional move using it.
11799   // In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7,
11800   // compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle
11801   // crashed in register allocation where the flags Reg between cmpDUnoredered and a
11802   // conditional move was supposed to be spilled.
11803   match(Set crx (CmpD src1 src2));
11804   // False predicate, shall not be matched.
11805   predicate(false);
11806 
11807   format %{ "cmpFUrd $crx, $src1, $src2" %}
11808   size(4);
11809   ins_encode %{
11810     __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister);
11811   %}
11812   ins_pipe(pipe_class_default);
11813 %}
11814 
11815 instruct cmpD_reg_reg_Ex(flagsReg crx, regD src1, regD src2) %{
11816   match(Set crx (CmpD src1 src2));
11817   ins_cost(DEFAULT_COST+BRANCH_COST);
11818 
11819   format %{ "CmpD    $crx, $src1, $src2 \t// postalloc expanded" %}
11820   postalloc_expand %{
11821     //
11822     // replaces
11823     //
11824     //   region  src1  src2
11825     //    \       |     |
11826     //     crx=cmpD_reg_reg
11827     //
11828     // with
11829     //
11830     //   region  src1  src2
11831     //    \       |     |
11832     //     crx=cmpDUnordered_reg_reg
11833     //      |
11834     //      ^  region
11835     //      |   \
11836     //      crx=cmov_bns_less
11837     //
11838 
11839     // create new nodes
11840     MachNode *m1 = new cmpDUnordered_reg_regNode();
11841     MachNode *m2 = new cmov_bns_lessNode();
11842 
11843     // inputs for new nodes
11844     m1->add_req(n_region, n_src1, n_src2);
11845     m2->add_req(n_region);
11846     m2->add_prec(m1);
11847 
11848     // operands for new nodes
11849     m1->_opnds[0] = op_crx;
11850     m1->_opnds[1] = op_src1;
11851     m1->_opnds[2] = op_src2;
11852     m2->_opnds[0] = op_crx;
11853 
11854     // registers for new nodes
11855     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11856     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11857 
11858     // Insert new nodes.
11859     nodes->push(m1);
11860     nodes->push(m2);
11861   %}
11862 %}
11863 
11864 // Compare double, generate -1,0,1
11865 instruct cmpD3_reg_reg(iRegIdst dst, regD src1, regD src2, flagsRegCR0 cr0) %{
11866   match(Set dst (CmpD3 src1 src2));
11867   effect(KILL cr0);
11868   ins_cost(DEFAULT_COST * 6);
11869   size((VM_Version::has_brw() ? 20 : 24));
11870 
11871   format %{ "cmpD3_reg_reg $dst, $src1, $src2" %}
11872 
11873   ins_encode %{
11874     __ fcmpu(CCR0, $src1$$FloatRegister, $src2$$FloatRegister);
11875     __ set_cmpu3($dst$$Register, true); // C2 requires unordered to get treated like less
11876   %}
11877   ins_pipe(pipe_class_default);
11878 %}
11879 
11880 // Compare char
11881 instruct cmprb_Digit_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11882   match(Set dst (Digit src1));
11883   effect(TEMP src2, TEMP crx);
11884   ins_cost(3 * DEFAULT_COST);
11885 
11886   format %{ "LI      $src2, 0x3930\n\t"
11887             "CMPRB   $crx, 0, $src1, $src2\n\t"
11888             "SETB    $dst, $crx" %}
11889   size(12);
11890   ins_encode %{
11891     // 0x30: 0, 0x39: 9
11892     __ li($src2$$Register, 0x3930);
11893     // compare src1 with ranges 0x30 to 0x39
11894     __ cmprb($crx$$CondRegister, 0, $src1$$Register, $src2$$Register);
11895     __ setb($dst$$Register, $crx$$CondRegister);
11896   %}
11897   ins_pipe(pipe_class_default);
11898 %}
11899 
11900 instruct cmprb_LowerCase_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11901   match(Set dst (LowerCase src1));
11902   effect(TEMP src2, TEMP crx);
11903   ins_cost(12 * DEFAULT_COST);
11904 
11905   format %{ "LI      $src2, 0x7A61\n\t"
11906             "CMPRB   $crx, 0, $src1, $src2\n\t"
11907             "BGT     $crx, done\n\t"
11908             "LIS     $src2, (signed short)0xF6DF\n\t"
11909             "ORI     $src2, $src2, 0xFFF8\n\t"
11910             "CMPRB   $crx, 1, $src1, $src2\n\t"
11911             "BGT     $crx, done\n\t"
11912             "LIS     $src2, (signed short)0xAAB5\n\t"
11913             "ORI     $src2, $src2, 0xBABA\n\t"
11914             "INSRDI  $src2, $src2, 32, 0\n\t"
11915             "CMPEQB  $crx, 1, $src1, $src2\n"
11916             "done:\n\t"
11917             "SETB    $dst, $crx" %}
11918 
11919   size(48);
11920   ins_encode %{
11921     Label done;
11922     // 0x61: a, 0x7A: z
11923     __ li($src2$$Register, 0x7A61);
11924     // compare src1 with ranges 0x61 to 0x7A
11925     __ cmprb($crx$$CondRegister, 0, $src1$$Register, $src2$$Register);
11926     __ bgt($crx$$CondRegister, done);
11927 
11928     // 0xDF: sharp s, 0xFF: y with diaeresis, 0xF7 is not the lower case
11929     __ lis($src2$$Register, (signed short)0xF6DF);
11930     __ ori($src2$$Register, $src2$$Register, 0xFFF8);
11931     // compare src1 with ranges 0xDF to 0xF6 and 0xF8 to 0xFF
11932     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
11933     __ bgt($crx$$CondRegister, done);
11934 
11935     // 0xAA: feminine ordinal indicator
11936     // 0xB5: micro sign
11937     // 0xBA: masculine ordinal indicator
11938     __ lis($src2$$Register, (signed short)0xAAB5);
11939     __ ori($src2$$Register, $src2$$Register, 0xBABA);
11940     __ insrdi($src2$$Register, $src2$$Register, 32, 0);
11941     // compare src1 with 0xAA, 0xB5, and 0xBA
11942     __ cmpeqb($crx$$CondRegister, $src1$$Register, $src2$$Register);
11943 
11944     __ bind(done);
11945     __ setb($dst$$Register, $crx$$CondRegister);
11946   %}
11947   ins_pipe(pipe_class_default);
11948 %}
11949 
11950 instruct cmprb_UpperCase_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11951   match(Set dst (UpperCase src1));
11952   effect(TEMP src2, TEMP crx);
11953   ins_cost(7 * DEFAULT_COST);
11954 
11955   format %{ "LI      $src2, 0x5A41\n\t"
11956             "CMPRB   $crx, 0, $src1, $src2\n\t"
11957             "BGT     $crx, done\n\t"
11958             "LIS     $src2, (signed short)0xD6C0\n\t"
11959             "ORI     $src2, $src2, 0xDED8\n\t"
11960             "CMPRB   $crx, 1, $src1, $src2\n"
11961             "done:\n\t"
11962             "SETB    $dst, $crx" %}
11963 
11964   size(28);
11965   ins_encode %{
11966     Label done;
11967     // 0x41: A, 0x5A: Z
11968     __ li($src2$$Register, 0x5A41);
11969     // compare src1 with a range 0x41 to 0x5A
11970     __ cmprb($crx$$CondRegister, 0, $src1$$Register, $src2$$Register);
11971     __ bgt($crx$$CondRegister, done);
11972 
11973     // 0xC0: a with grave, 0xDE: thorn, 0xD7 is not the upper case
11974     __ lis($src2$$Register, (signed short)0xD6C0);
11975     __ ori($src2$$Register, $src2$$Register, 0xDED8);
11976     // compare src1 with ranges 0xC0 to 0xD6 and 0xD8 to 0xDE
11977     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
11978 
11979     __ bind(done);
11980     __ setb($dst$$Register, $crx$$CondRegister);
11981   %}
11982   ins_pipe(pipe_class_default);
11983 %}
11984 
11985 instruct cmprb_Whitespace_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11986   match(Set dst (Whitespace src1));
11987   predicate(PowerArchitecturePPC64 <= 9);
11988   effect(TEMP src2, TEMP crx);
11989   ins_cost(4 * DEFAULT_COST);
11990 
11991   format %{ "LI      $src2, 0x0D09\n\t"
11992             "ADDIS   $src2, 0x201C\n\t"
11993             "CMPRB   $crx, 1, $src1, $src2\n\t"
11994             "SETB    $dst, $crx" %}
11995   size(16);
11996   ins_encode %{
11997     // 0x09 to 0x0D, 0x1C to 0x20
11998     __ li($src2$$Register, 0x0D09);
11999     __ addis($src2$$Register, $src2$$Register, 0x0201C);
12000     // compare src with ranges 0x09 to 0x0D and 0x1C to 0x20
12001     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
12002     __ setb($dst$$Register, $crx$$CondRegister);
12003   %}
12004   ins_pipe(pipe_class_default);
12005 %}
12006 
12007 // Power 10 version, using prefixed addi to load 32-bit constant
12008 instruct cmprb_Whitespace_reg_reg_prefixed(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
12009   match(Set dst (Whitespace src1));
12010   predicate(PowerArchitecturePPC64 >= 10);
12011   effect(TEMP src2, TEMP crx);
12012   ins_cost(3 * DEFAULT_COST);
12013 
12014   format %{ "PLI     $src2, 0x201C0D09\n\t"
12015             "CMPRB   $crx, 1, $src1, $src2\n\t"
12016             "SETB    $dst, $crx" %}
12017   size(16);
12018   ins_encode %{
12019     // 0x09 to 0x0D, 0x1C to 0x20
12020     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
12021     __ pli($src2$$Register, 0x201C0D09);
12022     // compare src with ranges 0x09 to 0x0D and 0x1C to 0x20
12023     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
12024     __ setb($dst$$Register, $crx$$CondRegister);
12025   %}
12026   ins_pipe(pipe_class_default);
12027   ins_alignment(2);
12028 %}
12029 
12030 //----------Branches---------------------------------------------------------
12031 // Jump
12032 
12033 // Direct Branch.
12034 instruct branch(label labl) %{
12035   match(Goto);
12036   effect(USE labl);
12037   ins_cost(BRANCH_COST);
12038 
12039   format %{ "B       $labl" %}
12040   size(4);
12041   ins_encode %{
12042      Label d;    // dummy
12043      __ bind(d);
12044      Label* p = $labl$$label;
12045      // `p' is `NULL' when this encoding class is used only to
12046      // determine the size of the encoded instruction.
12047      Label& l = (NULL == p)? d : *(p);
12048      __ b(l);
12049   %}
12050   ins_pipe(pipe_class_default);
12051 %}
12052 
12053 // Conditional Near Branch
12054 instruct branchCon(cmpOp cmp, flagsRegSrc crx, label lbl) %{
12055   // Same match rule as `branchConFar'.
12056   match(If cmp crx);
12057   effect(USE lbl);
12058   ins_cost(BRANCH_COST);
12059 
12060   // If set to 1 this indicates that the current instruction is a
12061   // short variant of a long branch. This avoids using this
12062   // instruction in first-pass matching. It will then only be used in
12063   // the `Shorten_branches' pass.
12064   ins_short_branch(1);
12065 
12066   format %{ "B$cmp     $crx, $lbl" %}
12067   size(4);
12068   ins_encode( enc_bc(crx, cmp, lbl) );
12069   ins_pipe(pipe_class_default);
12070 %}
12071 
12072 // This is for cases when the ppc64 `bc' instruction does not
12073 // reach far enough. So we emit a far branch here, which is more
12074 // expensive.
12075 //
12076 // Conditional Far Branch
12077 instruct branchConFar(cmpOp cmp, flagsRegSrc crx, label lbl) %{
12078   // Same match rule as `branchCon'.
12079   match(If cmp crx);
12080   effect(USE crx, USE lbl);
12081   // Higher cost than `branchCon'.
12082   ins_cost(5*BRANCH_COST);
12083 
12084   // This is not a short variant of a branch, but the long variant.
12085   ins_short_branch(0);
12086 
12087   format %{ "B_FAR$cmp $crx, $lbl" %}
12088   size(8);
12089   ins_encode( enc_bc_far(crx, cmp, lbl) );
12090   ins_pipe(pipe_class_default);
12091 %}
12092 
12093 instruct branchLoopEnd(cmpOp cmp, flagsRegSrc crx, label labl) %{
12094   match(CountedLoopEnd cmp crx);
12095   effect(USE labl);
12096   ins_cost(BRANCH_COST);
12097 
12098   // short variant.
12099   ins_short_branch(1);
12100 
12101   format %{ "B$cmp     $crx, $labl \t// counted loop end" %}
12102   size(4);
12103   ins_encode( enc_bc(crx, cmp, labl) );
12104   ins_pipe(pipe_class_default);
12105 %}
12106 
12107 instruct branchLoopEndFar(cmpOp cmp, flagsRegSrc crx, label labl) %{
12108   match(CountedLoopEnd cmp crx);
12109   effect(USE labl);
12110   ins_cost(BRANCH_COST);
12111 
12112   // Long variant.
12113   ins_short_branch(0);
12114 
12115   format %{ "B_FAR$cmp $crx, $labl \t// counted loop end" %}
12116   size(8);
12117   ins_encode( enc_bc_far(crx, cmp, labl) );
12118   ins_pipe(pipe_class_default);
12119 %}
12120 
12121 // ============================================================================
12122 // Java runtime operations, intrinsics and other complex operations.
12123 
12124 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
12125 // array for an instance of the superklass. Set a hidden internal cache on a
12126 // hit (cache is checked with exposed code in gen_subtype_check()). Return
12127 // not zero for a miss or zero for a hit. The encoding ALSO sets flags.
12128 //
12129 // GL TODO: Improve this.
12130 // - result should not be a TEMP
12131 // - Add match rule as on sparc avoiding additional Cmp.
12132 instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P superklass,
12133                              iRegPdst tmp_klass, iRegPdst tmp_arrayptr) %{
12134   match(Set result (PartialSubtypeCheck subklass superklass));
12135   effect(TEMP_DEF result, TEMP tmp_klass, TEMP tmp_arrayptr);
12136   ins_cost(DEFAULT_COST*10);
12137 
12138   format %{ "PartialSubtypeCheck $result = ($subklass instanceOf $superklass) tmp: $tmp_klass, $tmp_arrayptr" %}
12139   ins_encode %{
12140     __ check_klass_subtype_slow_path($subklass$$Register, $superklass$$Register, $tmp_arrayptr$$Register,
12141                                      $tmp_klass$$Register, NULL, $result$$Register);
12142   %}
12143   ins_pipe(pipe_class_default);
12144 %}
12145 
12146 // inlined locking and unlocking
12147 
12148 instruct cmpFastLock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2) %{
12149   match(Set crx (FastLock oop box));
12150   effect(TEMP tmp1, TEMP tmp2);
12151   predicate(!Compile::current()->use_rtm());
12152 
12153   format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2" %}
12154   ins_encode %{
12155     __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
12156                                  $tmp1$$Register, $tmp2$$Register, /*tmp3*/ R0);
12157     // If locking was successful, crx should indicate 'EQ'.
12158     // The compiler generates a branch to the runtime call to
12159     // _complete_monitor_locking_Java for the case where crx is 'NE'.
12160   %}
12161   ins_pipe(pipe_class_compare);
12162 %}
12163 
12164 // Separate version for TM. Use bound register for box to enable USE_KILL.
12165 instruct cmpFastLock_tm(flagsReg crx, iRegPdst oop, rarg2RegP box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
12166   match(Set crx (FastLock oop box));
12167   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL box);
12168   predicate(Compile::current()->use_rtm());
12169 
12170   format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2, $tmp3 (TM)" %}
12171   ins_encode %{
12172     __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
12173                                  $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
12174                                  _rtm_counters, _stack_rtm_counters,
12175                                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12176                                  /*RTM*/ true, ra_->C->profile_rtm());
12177     // If locking was successful, crx should indicate 'EQ'.
12178     // The compiler generates a branch to the runtime call to
12179     // _complete_monitor_locking_Java for the case where crx is 'NE'.
12180   %}
12181   ins_pipe(pipe_class_compare);
12182 %}
12183 
12184 instruct cmpFastUnlock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
12185   match(Set crx (FastUnlock oop box));
12186   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
12187   predicate(!Compile::current()->use_rtm());
12188 
12189   format %{ "FASTUNLOCK  $oop, $box, $tmp1, $tmp2" %}
12190   ins_encode %{
12191     __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
12192                                    $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
12193                                    false);
12194     // If unlocking was successful, crx should indicate 'EQ'.
12195     // The compiler generates a branch to the runtime call to
12196     // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
12197   %}
12198   ins_pipe(pipe_class_compare);
12199 %}
12200 
12201 instruct cmpFastUnlock_tm(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
12202   match(Set crx (FastUnlock oop box));
12203   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
12204   predicate(Compile::current()->use_rtm());
12205 
12206   format %{ "FASTUNLOCK  $oop, $box, $tmp1, $tmp2 (TM)" %}
12207   ins_encode %{
12208     __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
12209                                    $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
12210                                    /*RTM*/ true);
12211     // If unlocking was successful, crx should indicate 'EQ'.
12212     // The compiler generates a branch to the runtime call to
12213     // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
12214   %}
12215   ins_pipe(pipe_class_compare);
12216 %}
12217 
12218 // Align address.
12219 instruct align_addr(iRegPdst dst, iRegPsrc src, immLnegpow2 mask) %{
12220   match(Set dst (CastX2P (AndL (CastP2X src) mask)));
12221 
12222   format %{ "ANDDI   $dst, $src, $mask \t// next aligned address" %}
12223   size(4);
12224   ins_encode %{
12225     __ clrrdi($dst$$Register, $src$$Register, log2i_exact(-(julong)$mask$$constant));
12226   %}
12227   ins_pipe(pipe_class_default);
12228 %}
12229 
12230 // Array size computation.
12231 instruct array_size(iRegLdst dst, iRegPsrc end, iRegPsrc start) %{
12232   match(Set dst (SubL (CastP2X end) (CastP2X start)));
12233 
12234   format %{ "SUB     $dst, $end, $start \t// array size in bytes" %}
12235   size(4);
12236   ins_encode %{
12237     __ subf($dst$$Register, $start$$Register, $end$$Register);
12238   %}
12239   ins_pipe(pipe_class_default);
12240 %}
12241 
12242 // Clear-array with constant short array length. The versions below can use dcbz with cnt > 30.
12243 instruct inlineCallClearArrayShort(immLmax30 cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{
12244   match(Set dummy (ClearArray cnt base));
12245   effect(USE_KILL base, KILL ctr);
12246   ins_cost(2 * MEMORY_REF_COST);
12247 
12248   format %{ "ClearArray $cnt, $base" %}
12249   ins_encode %{
12250     __ clear_memory_constlen($base$$Register, $cnt$$constant, R0); // kills base, R0
12251   %}
12252   ins_pipe(pipe_class_default);
12253 %}
12254 
12255 // Clear-array with constant large array length.
12256 instruct inlineCallClearArrayLarge(immL cnt, rarg2RegP base, Universe dummy, iRegLdst tmp, regCTR ctr) %{
12257   match(Set dummy (ClearArray cnt base));
12258   effect(USE_KILL base, TEMP tmp, KILL ctr);
12259   ins_cost(3 * MEMORY_REF_COST);
12260 
12261   format %{ "ClearArray $cnt, $base \t// KILL $tmp" %}
12262   ins_encode %{
12263     __ clear_memory_doubleword($base$$Register, $tmp$$Register, R0, $cnt$$constant); // kills base, R0
12264   %}
12265   ins_pipe(pipe_class_default);
12266 %}
12267 
12268 // Clear-array with dynamic array length.
12269 instruct inlineCallClearArray(rarg1RegL cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{
12270   match(Set dummy (ClearArray cnt base));
12271   effect(USE_KILL cnt, USE_KILL base, KILL ctr);
12272   ins_cost(4 * MEMORY_REF_COST);
12273 
12274   format %{ "ClearArray $cnt, $base" %}
12275   ins_encode %{
12276     __ clear_memory_doubleword($base$$Register, $cnt$$Register, R0); // kills cnt, base, R0
12277   %}
12278   ins_pipe(pipe_class_default);
12279 %}
12280 
12281 instruct string_compareL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
12282                          iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12283   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12284   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12285   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
12286   ins_cost(300);
12287   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
12288   ins_encode %{
12289     __ string_compare($str1$$Register, $str2$$Register,
12290                       $cnt1$$Register, $cnt2$$Register,
12291                       $tmp$$Register,
12292                       $result$$Register, StrIntrinsicNode::LL);
12293   %}
12294   ins_pipe(pipe_class_default);
12295 %}
12296 
12297 instruct string_compareU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
12298                          iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12299   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
12300   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12301   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
12302   ins_cost(300);
12303   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
12304   ins_encode %{
12305     __ string_compare($str1$$Register, $str2$$Register,
12306                       $cnt1$$Register, $cnt2$$Register,
12307                       $tmp$$Register,
12308                       $result$$Register, StrIntrinsicNode::UU);
12309   %}
12310   ins_pipe(pipe_class_default);
12311 %}
12312 
12313 instruct string_compareLU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
12314                           iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12315   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
12316   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12317   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
12318   ins_cost(300);
12319   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
12320   ins_encode %{
12321     __ string_compare($str1$$Register, $str2$$Register,
12322                       $cnt1$$Register, $cnt2$$Register,
12323                       $tmp$$Register,
12324                       $result$$Register, StrIntrinsicNode::LU);
12325   %}
12326   ins_pipe(pipe_class_default);
12327 %}
12328 
12329 instruct string_compareUL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
12330                           iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12331   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
12332   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12333   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
12334   ins_cost(300);
12335   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
12336   ins_encode %{
12337     __ string_compare($str2$$Register, $str1$$Register,
12338                       $cnt2$$Register, $cnt1$$Register,
12339                       $tmp$$Register,
12340                       $result$$Register, StrIntrinsicNode::UL);
12341   %}
12342   ins_pipe(pipe_class_default);
12343 %}
12344 
12345 instruct string_equalsL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt, iRegIdst result,
12346                         iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12347   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
12348   match(Set result (StrEquals (Binary str1 str2) cnt));
12349   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp, KILL ctr, KILL cr0);
12350   ins_cost(300);
12351   format %{ "String Equals byte[] $str1,$str2,$cnt -> $result \t// KILL $tmp" %}
12352   ins_encode %{
12353     __ array_equals(false, $str1$$Register, $str2$$Register,
12354                     $cnt$$Register, $tmp$$Register,
12355                     $result$$Register, true /* byte */);
12356   %}
12357   ins_pipe(pipe_class_default);
12358 %}
12359 
12360 instruct string_equalsU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt, iRegIdst result,
12361                         iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12362   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
12363   match(Set result (StrEquals (Binary str1 str2) cnt));
12364   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp, KILL ctr, KILL cr0);
12365   ins_cost(300);
12366   format %{ "String Equals char[]  $str1,$str2,$cnt -> $result \t// KILL $tmp" %}
12367   ins_encode %{
12368     __ array_equals(false, $str1$$Register, $str2$$Register,
12369                     $cnt$$Register, $tmp$$Register,
12370                     $result$$Register, false /* byte */);
12371   %}
12372   ins_pipe(pipe_class_default);
12373 %}
12374 
12375 instruct array_equalsB(rarg1RegP ary1, rarg2RegP ary2, iRegIdst result,
12376                        iRegIdst tmp1, iRegIdst tmp2, regCTR ctr, flagsRegCR0 cr0, flagsRegCR0 cr1) %{
12377   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12378   match(Set result (AryEq ary1 ary2));
12379   effect(TEMP_DEF result, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0, KILL cr1);
12380   ins_cost(300);
12381   format %{ "Array Equals $ary1,$ary2 -> $result \t// KILL $tmp1,$tmp2" %}
12382   ins_encode %{
12383     __ array_equals(true, $ary1$$Register, $ary2$$Register,
12384                     $tmp1$$Register, $tmp2$$Register,
12385                     $result$$Register, true /* byte */);
12386   %}
12387   ins_pipe(pipe_class_default);
12388 %}
12389 
12390 instruct array_equalsC(rarg1RegP ary1, rarg2RegP ary2, iRegIdst result,
12391                        iRegIdst tmp1, iRegIdst tmp2, regCTR ctr, flagsRegCR0 cr0, flagsRegCR0 cr1) %{
12392   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12393   match(Set result (AryEq ary1 ary2));
12394   effect(TEMP_DEF result, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0, KILL cr1);
12395   ins_cost(300);
12396   format %{ "Array Equals $ary1,$ary2 -> $result \t// KILL $tmp1,$tmp2" %}
12397   ins_encode %{
12398     __ array_equals(true, $ary1$$Register, $ary2$$Register,
12399                     $tmp1$$Register, $tmp2$$Register,
12400                     $result$$Register, false /* byte */);
12401   %}
12402   ins_pipe(pipe_class_default);
12403 %}
12404 
12405 instruct indexOf_imm1_char_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12406                              immP needleImm, immL offsetImm, immI_1 needlecntImm,
12407                              iRegIdst tmp1, iRegIdst tmp2,
12408                              flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12409   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
12410   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12411   // Required for EA: check if it is still a type_array.
12412   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
12413   ins_cost(150);
12414 
12415   format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
12416             "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12417 
12418   ins_encode %{
12419     immPOper *needleOper = (immPOper *)$needleImm;
12420     const TypeOopPtr *t = needleOper->type()->isa_oopptr();
12421     ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
12422     jchar chr;
12423 #ifdef VM_LITTLE_ENDIAN
12424     chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) |
12425            ((jchar)(unsigned char)needle_values->element_value(0).as_byte());
12426 #else
12427     chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
12428            ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
12429 #endif
12430     __ string_indexof_char($result$$Register,
12431                            $haystack$$Register, $haycnt$$Register,
12432                            R0, chr,
12433                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12434   %}
12435   ins_pipe(pipe_class_compare);
12436 %}
12437 
12438 instruct indexOf_imm1_char_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12439                              immP needleImm, immL offsetImm, immI_1 needlecntImm,
12440                              iRegIdst tmp1, iRegIdst tmp2,
12441                              flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12442   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
12443   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12444   // Required for EA: check if it is still a type_array.
12445   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
12446   ins_cost(150);
12447 
12448   format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
12449             "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12450 
12451   ins_encode %{
12452     immPOper *needleOper = (immPOper *)$needleImm;
12453     const TypeOopPtr *t = needleOper->type()->isa_oopptr();
12454     ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
12455     jchar chr = (jchar)needle_values->element_value(0).as_byte();
12456     __ string_indexof_char($result$$Register,
12457                            $haystack$$Register, $haycnt$$Register,
12458                            R0, chr,
12459                            $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
12460   %}
12461   ins_pipe(pipe_class_compare);
12462 %}
12463 
12464 instruct indexOf_imm1_char_UL(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12465                               immP needleImm, immL offsetImm, immI_1 needlecntImm,
12466                               iRegIdst tmp1, iRegIdst tmp2,
12467                               flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12468   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
12469   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12470   // Required for EA: check if it is still a type_array.
12471   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
12472   ins_cost(150);
12473 
12474   format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
12475             "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12476 
12477   ins_encode %{
12478     immPOper *needleOper = (immPOper *)$needleImm;
12479     const TypeOopPtr *t = needleOper->type()->isa_oopptr();
12480     ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
12481     jchar chr = (jchar)needle_values->element_value(0).as_byte();
12482     __ string_indexof_char($result$$Register,
12483                            $haystack$$Register, $haycnt$$Register,
12484                            R0, chr,
12485                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12486   %}
12487   ins_pipe(pipe_class_compare);
12488 %}
12489 
12490 instruct indexOf_imm1_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12491                         rscratch2RegP needle, immI_1 needlecntImm,
12492                         iRegIdst tmp1, iRegIdst tmp2,
12493                         flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12494   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12495   effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12496   // Required for EA: check if it is still a type_array.
12497   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU &&
12498             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12499             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12500   ins_cost(180);
12501 
12502   format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12503             " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
12504   ins_encode %{
12505     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12506     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12507     guarantee(needle_values, "sanity");
12508     jchar chr;
12509 #ifdef VM_LITTLE_ENDIAN
12510     chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) |
12511            ((jchar)(unsigned char)needle_values->element_value(0).as_byte());
12512 #else
12513     chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
12514            ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
12515 #endif
12516     __ string_indexof_char($result$$Register,
12517                            $haystack$$Register, $haycnt$$Register,
12518                            R0, chr,
12519                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12520   %}
12521   ins_pipe(pipe_class_compare);
12522 %}
12523 
12524 instruct indexOf_imm1_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12525                         rscratch2RegP needle, immI_1 needlecntImm,
12526                         iRegIdst tmp1, iRegIdst tmp2,
12527                         flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12528   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12529   effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12530   // Required for EA: check if it is still a type_array.
12531   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL &&
12532             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12533             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12534   ins_cost(180);
12535 
12536   format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12537             " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
12538   ins_encode %{
12539     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12540     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12541     guarantee(needle_values, "sanity");
12542     jchar chr = (jchar)needle_values->element_value(0).as_byte();
12543     __ string_indexof_char($result$$Register,
12544                            $haystack$$Register, $haycnt$$Register,
12545                            R0, chr,
12546                            $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
12547   %}
12548   ins_pipe(pipe_class_compare);
12549 %}
12550 
12551 instruct indexOf_imm1_UL(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12552                          rscratch2RegP needle, immI_1 needlecntImm,
12553                          iRegIdst tmp1, iRegIdst tmp2,
12554                          flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12555   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12556   effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12557   // Required for EA: check if it is still a type_array.
12558   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL &&
12559             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12560             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12561   ins_cost(180);
12562 
12563   format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12564             " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
12565   ins_encode %{
12566     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12567     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12568     guarantee(needle_values, "sanity");
12569     jchar chr = (jchar)needle_values->element_value(0).as_byte();
12570     __ string_indexof_char($result$$Register,
12571                            $haystack$$Register, $haycnt$$Register,
12572                            R0, chr,
12573                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12574   %}
12575   ins_pipe(pipe_class_compare);
12576 %}
12577 
12578 instruct indexOfChar_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12579                        iRegIsrc ch, iRegIdst tmp1, iRegIdst tmp2,
12580                        flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12581   match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
12582   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12583   predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
12584   ins_cost(180);
12585 
12586   format %{ "StringUTF16 IndexOfChar $haystack[0..$haycnt], $ch"
12587             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12588   ins_encode %{
12589     __ string_indexof_char($result$$Register,
12590                            $haystack$$Register, $haycnt$$Register,
12591                            $ch$$Register, 0 /* this is not used if the character is already in a register */,
12592                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12593   %}
12594   ins_pipe(pipe_class_compare);
12595 %}
12596 
12597 instruct indexOfChar_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12598                        iRegIsrc ch, iRegIdst tmp1, iRegIdst tmp2,
12599                        flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12600   match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
12601   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12602   predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
12603   ins_cost(180);
12604 
12605   format %{ "StringLatin1 IndexOfChar $haystack[0..$haycnt], $ch"
12606             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12607   ins_encode %{
12608     __ string_indexof_char($result$$Register,
12609                            $haystack$$Register, $haycnt$$Register,
12610                            $ch$$Register, 0 /* this is not used if the character is already in a register */,
12611                            $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
12612   %}
12613   ins_pipe(pipe_class_compare);
12614 %}
12615 
12616 instruct indexOf_imm_U(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
12617                        iRegPsrc needle, uimmI15 needlecntImm,
12618                        iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
12619                        flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12620   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12621   effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
12622          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12623   // Required for EA: check if it is still a type_array.
12624   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU &&
12625             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12626             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12627   ins_cost(250);
12628 
12629   format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12630             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
12631   ins_encode %{
12632     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12633     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12634 
12635     __ string_indexof($result$$Register,
12636                       $haystack$$Register, $haycnt$$Register,
12637                       $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
12638                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UU);
12639   %}
12640   ins_pipe(pipe_class_compare);
12641 %}
12642 
12643 instruct indexOf_imm_L(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
12644                        iRegPsrc needle, uimmI15 needlecntImm,
12645                        iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
12646                        flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12647   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12648   effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
12649          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12650   // Required for EA: check if it is still a type_array.
12651   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL &&
12652             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12653             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12654   ins_cost(250);
12655 
12656   format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12657             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
12658   ins_encode %{
12659     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12660     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12661 
12662     __ string_indexof($result$$Register,
12663                       $haystack$$Register, $haycnt$$Register,
12664                       $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
12665                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::LL);
12666   %}
12667   ins_pipe(pipe_class_compare);
12668 %}
12669 
12670 instruct indexOf_imm_UL(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
12671                         iRegPsrc needle, uimmI15 needlecntImm,
12672                         iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
12673                         flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12674   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12675   effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
12676          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12677   // Required for EA: check if it is still a type_array.
12678   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL &&
12679             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12680             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12681   ins_cost(250);
12682 
12683   format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12684             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
12685   ins_encode %{
12686     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12687     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12688 
12689     __ string_indexof($result$$Register,
12690                       $haystack$$Register, $haycnt$$Register,
12691                       $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
12692                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UL);
12693   %}
12694   ins_pipe(pipe_class_compare);
12695 %}
12696 
12697 instruct indexOf_U(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
12698                    iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
12699                    flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12700   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
12701   effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
12702          TEMP_DEF result,
12703          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12704   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
12705   ins_cost(300);
12706 
12707   format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
12708              " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
12709   ins_encode %{
12710     __ string_indexof($result$$Register,
12711                       $haystack$$Register, $haycnt$$Register,
12712                       $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
12713                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UU);
12714   %}
12715   ins_pipe(pipe_class_compare);
12716 %}
12717 
12718 instruct indexOf_L(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
12719                    iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
12720                    flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12721   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
12722   effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
12723          TEMP_DEF result,
12724          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12725   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
12726   ins_cost(300);
12727 
12728   format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
12729              " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
12730   ins_encode %{
12731     __ string_indexof($result$$Register,
12732                       $haystack$$Register, $haycnt$$Register,
12733                       $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
12734                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::LL);
12735   %}
12736   ins_pipe(pipe_class_compare);
12737 %}
12738 
12739 instruct indexOf_UL(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
12740                     iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
12741                     flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12742   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
12743   effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
12744          TEMP_DEF result,
12745          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12746   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
12747   ins_cost(300);
12748 
12749   format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
12750              " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
12751   ins_encode %{
12752     __ string_indexof($result$$Register,
12753                       $haystack$$Register, $haycnt$$Register,
12754                       $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
12755                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UL);
12756   %}
12757   ins_pipe(pipe_class_compare);
12758 %}
12759 
12760 // char[] to byte[] compression
12761 instruct string_compress(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1,
12762                          iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
12763   match(Set result (StrCompressedCopy src (Binary dst len)));
12764   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
12765          USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
12766   ins_cost(300);
12767   format %{ "String Compress $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
12768   ins_encode %{
12769     Label Lskip, Ldone;
12770     __ li($result$$Register, 0);
12771     __ string_compress_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
12772                           $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, Ldone);
12773     __ rldicl_($tmp1$$Register, $len$$Register, 0, 64-3); // Remaining characters.
12774     __ beq(CCR0, Lskip);
12775     __ string_compress($src$$Register, $dst$$Register, $tmp1$$Register, $tmp2$$Register, Ldone);
12776     __ bind(Lskip);
12777     __ mr($result$$Register, $len$$Register);
12778     __ bind(Ldone);
12779   %}
12780   ins_pipe(pipe_class_default);
12781 %}
12782 
12783 // byte[] to char[] inflation
12784 instruct string_inflate(Universe dummy, rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegLdst tmp1,
12785                         iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
12786   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12787   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
12788   ins_cost(300);
12789   format %{ "String Inflate $src,$dst,$len \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
12790   ins_encode %{
12791     Label Ldone;
12792     __ string_inflate_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
12793                          $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register);
12794     __ rldicl_($tmp1$$Register, $len$$Register, 0, 64-3); // Remaining characters.
12795     __ beq(CCR0, Ldone);
12796     __ string_inflate($src$$Register, $dst$$Register, $tmp1$$Register, $tmp2$$Register);
12797     __ bind(Ldone);
12798   %}
12799   ins_pipe(pipe_class_default);
12800 %}
12801 
12802 // StringCoding.java intrinsics
12803 instruct count_positives(iRegPsrc ary1, iRegIsrc len, iRegIdst result, iRegLdst tmp1, iRegLdst tmp2,
12804                          regCTR ctr, flagsRegCR0 cr0)
12805 %{
12806   match(Set result (CountPositives ary1 len));
12807   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0);
12808   ins_cost(300);
12809   format %{ "count positives byte[] $ary1,$len -> $result \t// KILL $tmp1, $tmp2" %}
12810   ins_encode %{
12811     __ count_positives($ary1$$Register, $len$$Register, $result$$Register,
12812                        $tmp1$$Register, $tmp2$$Register);
12813   %}
12814   ins_pipe(pipe_class_default);
12815 %}
12816 
12817 // encode char[] to byte[] in ISO_8859_1
12818 instruct encode_iso_array(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1,
12819                           iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
12820   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12821   match(Set result (EncodeISOArray src (Binary dst len)));
12822   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
12823          USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
12824   ins_cost(300);
12825   format %{ "Encode iso array $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
12826   ins_encode %{
12827     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register, $tmp2$$Register,
12828                         $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, $result$$Register, false);
12829   %}
12830   ins_pipe(pipe_class_default);
12831 %}
12832 
12833 // encode char[] to byte[] in ASCII
12834 instruct encode_ascii_array(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1,
12835                           iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
12836   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12837   match(Set result (EncodeISOArray src (Binary dst len)));
12838   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
12839          USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
12840   ins_cost(300);
12841   format %{ "Encode ascii array $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
12842   ins_encode %{
12843     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register, $tmp2$$Register,
12844                         $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, $result$$Register, true);
12845   %}
12846   ins_pipe(pipe_class_default);
12847 %}
12848 
12849 
12850 //---------- Min/Max Instructions ---------------------------------------------
12851 
12852 instruct minI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
12853   match(Set dst (MinI src1 src2));
12854   ins_cost(DEFAULT_COST*6);
12855 
12856   expand %{
12857     iRegLdst src1s;
12858     iRegLdst src2s;
12859     iRegLdst diff;
12860     iRegLdst sm;
12861     iRegLdst doz; // difference or zero
12862     convI2L_reg(src1s, src1); // Ensure proper sign extension.
12863     convI2L_reg(src2s, src2); // Ensure proper sign extension.
12864     subL_reg_reg(diff, src2s, src1s);
12865     // Need to consider >=33 bit result, therefore we need signmaskL.
12866     signmask64L_regL(sm, diff);
12867     andL_reg_reg(doz, diff, sm); // <=0
12868     addI_regL_regL(dst, doz, src1s);
12869   %}
12870 %}
12871 
12872 instruct minI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
12873   match(Set dst (MinI src1 src2));
12874   effect(KILL cr0);
12875   predicate(VM_Version::has_isel());
12876   ins_cost(DEFAULT_COST*2);
12877 
12878   ins_encode %{
12879     __ cmpw(CCR0, $src1$$Register, $src2$$Register);
12880     __ isel($dst$$Register, CCR0, Assembler::less, /*invert*/false, $src1$$Register, $src2$$Register);
12881   %}
12882   ins_pipe(pipe_class_default);
12883 %}
12884 
12885 instruct maxI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
12886   match(Set dst (MaxI src1 src2));
12887   ins_cost(DEFAULT_COST*6);
12888 
12889   expand %{
12890     iRegLdst src1s;
12891     iRegLdst src2s;
12892     iRegLdst diff;
12893     iRegLdst sm;
12894     iRegLdst doz; // difference or zero
12895     convI2L_reg(src1s, src1); // Ensure proper sign extension.
12896     convI2L_reg(src2s, src2); // Ensure proper sign extension.
12897     subL_reg_reg(diff, src2s, src1s);
12898     // Need to consider >=33 bit result, therefore we need signmaskL.
12899     signmask64L_regL(sm, diff);
12900     andcL_reg_reg(doz, diff, sm); // >=0
12901     addI_regL_regL(dst, doz, src1s);
12902   %}
12903 %}
12904 
12905 instruct maxI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
12906   match(Set dst (MaxI src1 src2));
12907   effect(KILL cr0);
12908   predicate(VM_Version::has_isel());
12909   ins_cost(DEFAULT_COST*2);
12910 
12911   ins_encode %{
12912     __ cmpw(CCR0, $src1$$Register, $src2$$Register);
12913     __ isel($dst$$Register, CCR0, Assembler::greater, /*invert*/false, $src1$$Register, $src2$$Register);
12914   %}
12915   ins_pipe(pipe_class_default);
12916 %}
12917 
12918 //---------- Population Count Instructions ------------------------------------
12919 
12920 // Popcnt for Power7.
12921 instruct popCountI(iRegIdst dst, iRegIsrc src) %{
12922   match(Set dst (PopCountI src));
12923   predicate(UsePopCountInstruction && VM_Version::has_popcntw());
12924   ins_cost(DEFAULT_COST);
12925 
12926   format %{ "POPCNTW $dst, $src" %}
12927   size(4);
12928   ins_encode %{
12929     __ popcntw($dst$$Register, $src$$Register);
12930   %}
12931   ins_pipe(pipe_class_default);
12932 %}
12933 
12934 // Popcnt for Power7.
12935 instruct popCountL(iRegIdst dst, iRegLsrc src) %{
12936   predicate(UsePopCountInstruction && VM_Version::has_popcntw());
12937   match(Set dst (PopCountL src));
12938   ins_cost(DEFAULT_COST);
12939 
12940   format %{ "POPCNTD $dst, $src" %}
12941   size(4);
12942   ins_encode %{
12943     __ popcntd($dst$$Register, $src$$Register);
12944   %}
12945   ins_pipe(pipe_class_default);
12946 %}
12947 
12948 instruct countLeadingZerosI(iRegIdst dst, iRegIsrc src) %{
12949   match(Set dst (CountLeadingZerosI src));
12950   predicate(UseCountLeadingZerosInstructionsPPC64);  // See Matcher::match_rule_supported.
12951   ins_cost(DEFAULT_COST);
12952 
12953   format %{ "CNTLZW  $dst, $src" %}
12954   size(4);
12955   ins_encode %{
12956     __ cntlzw($dst$$Register, $src$$Register);
12957   %}
12958   ins_pipe(pipe_class_default);
12959 %}
12960 
12961 instruct countLeadingZerosL(iRegIdst dst, iRegLsrc src) %{
12962   match(Set dst (CountLeadingZerosL src));
12963   predicate(UseCountLeadingZerosInstructionsPPC64);  // See Matcher::match_rule_supported.
12964   ins_cost(DEFAULT_COST);
12965 
12966   format %{ "CNTLZD  $dst, $src" %}
12967   size(4);
12968   ins_encode %{
12969     __ cntlzd($dst$$Register, $src$$Register);
12970   %}
12971   ins_pipe(pipe_class_default);
12972 %}
12973 
12974 instruct countLeadingZerosP(iRegIdst dst, iRegPsrc src) %{
12975   // no match-rule, false predicate
12976   effect(DEF dst, USE src);
12977   predicate(false);
12978 
12979   format %{ "CNTLZD  $dst, $src" %}
12980   size(4);
12981   ins_encode %{
12982     __ cntlzd($dst$$Register, $src$$Register);
12983   %}
12984   ins_pipe(pipe_class_default);
12985 %}
12986 
12987 instruct countTrailingZerosI_Ex(iRegIdst dst, iRegIsrc src) %{
12988   match(Set dst (CountTrailingZerosI src));
12989   predicate(UseCountLeadingZerosInstructionsPPC64 && !UseCountTrailingZerosInstructionsPPC64);
12990   ins_cost(DEFAULT_COST);
12991 
12992   expand %{
12993     immI16 imm1 %{ (int)-1 %}
12994     immI16 imm2 %{ (int)32 %}
12995     immI_minus1 m1 %{ -1 %}
12996     iRegIdst tmpI1;
12997     iRegIdst tmpI2;
12998     iRegIdst tmpI3;
12999     addI_reg_imm16(tmpI1, src, imm1);
13000     andcI_reg_reg(tmpI2, src, m1, tmpI1);
13001     countLeadingZerosI(tmpI3, tmpI2);
13002     subI_imm16_reg(dst, imm2, tmpI3);
13003   %}
13004 %}
13005 
13006 instruct countTrailingZerosI_cnttzw(iRegIdst dst, iRegIsrc src) %{
13007   match(Set dst (CountTrailingZerosI src));
13008   predicate(UseCountTrailingZerosInstructionsPPC64);
13009   ins_cost(DEFAULT_COST);
13010 
13011   format %{ "CNTTZW  $dst, $src" %}
13012   size(4);
13013   ins_encode %{
13014     __ cnttzw($dst$$Register, $src$$Register);
13015   %}
13016   ins_pipe(pipe_class_default);
13017 %}
13018 
13019 instruct countTrailingZerosL_Ex(iRegIdst dst, iRegLsrc src) %{
13020   match(Set dst (CountTrailingZerosL src));
13021   predicate(UseCountLeadingZerosInstructionsPPC64 && !UseCountTrailingZerosInstructionsPPC64);
13022   ins_cost(DEFAULT_COST);
13023 
13024   expand %{
13025     immL16 imm1 %{ (long)-1 %}
13026     immI16 imm2 %{ (int)64 %}
13027     iRegLdst tmpL1;
13028     iRegLdst tmpL2;
13029     iRegIdst tmpL3;
13030     addL_reg_imm16(tmpL1, src, imm1);
13031     andcL_reg_reg(tmpL2, tmpL1, src);
13032     countLeadingZerosL(tmpL3, tmpL2);
13033     subI_imm16_reg(dst, imm2, tmpL3);
13034  %}
13035 %}
13036 
13037 instruct countTrailingZerosL_cnttzd(iRegIdst dst, iRegLsrc src) %{
13038   match(Set dst (CountTrailingZerosL src));
13039   predicate(UseCountTrailingZerosInstructionsPPC64);
13040   ins_cost(DEFAULT_COST);
13041 
13042   format %{ "CNTTZD  $dst, $src" %}
13043   size(4);
13044   ins_encode %{
13045     __ cnttzd($dst$$Register, $src$$Register);
13046   %}
13047   ins_pipe(pipe_class_default);
13048 %}
13049 
13050 // Expand nodes for byte_reverse_int.
13051 instruct insrwi_a(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
13052   effect(DEF dst, USE src, USE pos, USE shift);
13053   predicate(false);
13054 
13055   format %{ "INSRWI  $dst, $src, $pos, $shift" %}
13056   size(4);
13057   ins_encode %{
13058     __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant);
13059   %}
13060   ins_pipe(pipe_class_default);
13061 %}
13062 
13063 // As insrwi_a, but with USE_DEF.
13064 instruct insrwi(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
13065   effect(USE_DEF dst, USE src, USE pos, USE shift);
13066   predicate(false);
13067 
13068   format %{ "INSRWI  $dst, $src, $pos, $shift" %}
13069   size(4);
13070   ins_encode %{
13071     __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant);
13072   %}
13073   ins_pipe(pipe_class_default);
13074 %}
13075 
13076 // Just slightly faster than java implementation.
13077 instruct bytes_reverse_int_Ex(iRegIdst dst, iRegIsrc src) %{
13078   match(Set dst (ReverseBytesI src));
13079   predicate(!UseByteReverseInstructions);
13080   ins_cost(7*DEFAULT_COST);
13081 
13082   expand %{
13083     immI16 imm24 %{ (int) 24 %}
13084     immI16 imm16 %{ (int) 16 %}
13085     immI16  imm8 %{ (int)  8 %}
13086     immI16  imm4 %{ (int)  4 %}
13087     immI16  imm0 %{ (int)  0 %}
13088     iRegLdst tmpI1;
13089     iRegLdst tmpI2;
13090     iRegLdst tmpI3;
13091 
13092     urShiftI_reg_imm(tmpI1, src, imm24);
13093     insrwi_a(dst, tmpI1, imm24, imm8);
13094     urShiftI_reg_imm(tmpI2, src, imm16);
13095     insrwi(dst, tmpI2, imm8, imm16);
13096     urShiftI_reg_imm(tmpI3, src, imm8);
13097     insrwi(dst, tmpI3, imm8, imm8);
13098     insrwi(dst, src, imm0, imm8);
13099   %}
13100 %}
13101 
13102 instruct bytes_reverse_int_vec(iRegIdst dst, iRegIsrc src, vecX tmpV) %{
13103   match(Set dst (ReverseBytesI src));
13104   predicate(UseVectorByteReverseInstructionsPPC64);
13105   effect(TEMP tmpV);
13106   ins_cost(DEFAULT_COST*3);
13107   size(12);
13108   format %{ "MTVSRWZ $tmpV, $src\n"
13109             "\tXXBRW   $tmpV, $tmpV\n"
13110             "\tMFVSRWZ $dst, $tmpV" %}
13111 
13112   ins_encode %{
13113     __ mtvsrwz($tmpV$$VectorSRegister, $src$$Register);
13114     __ xxbrw($tmpV$$VectorSRegister, $tmpV$$VectorSRegister);
13115     __ mfvsrwz($dst$$Register, $tmpV$$VectorSRegister);
13116   %}
13117   ins_pipe(pipe_class_default);
13118 %}
13119 
13120 instruct bytes_reverse_int(iRegIdst dst, iRegIsrc src) %{
13121   match(Set dst (ReverseBytesI src));
13122   predicate(UseByteReverseInstructions);
13123   ins_cost(DEFAULT_COST);
13124   size(4);
13125 
13126   format %{ "BRW  $dst, $src" %}
13127 
13128   ins_encode %{
13129     __ brw($dst$$Register, $src$$Register);
13130   %}
13131   ins_pipe(pipe_class_default);
13132 %}
13133 
13134 instruct bytes_reverse_long_Ex(iRegLdst dst, iRegLsrc src) %{
13135   match(Set dst (ReverseBytesL src));
13136   predicate(!UseByteReverseInstructions);
13137   ins_cost(15*DEFAULT_COST);
13138 
13139   expand %{
13140     immI16 imm56 %{ (int) 56 %}
13141     immI16 imm48 %{ (int) 48 %}
13142     immI16 imm40 %{ (int) 40 %}
13143     immI16 imm32 %{ (int) 32 %}
13144     immI16 imm24 %{ (int) 24 %}
13145     immI16 imm16 %{ (int) 16 %}
13146     immI16  imm8 %{ (int)  8 %}
13147     immI16  imm0 %{ (int)  0 %}
13148     iRegLdst tmpL1;
13149     iRegLdst tmpL2;
13150     iRegLdst tmpL3;
13151     iRegLdst tmpL4;
13152     iRegLdst tmpL5;
13153     iRegLdst tmpL6;
13154 
13155                                         // src   : |a|b|c|d|e|f|g|h|
13156     rldicl(tmpL1, src, imm8, imm24);    // tmpL1 : | | | |e|f|g|h|a|
13157     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |a| | | |e|
13158     rldicl(tmpL3, tmpL2, imm32, imm0);  // tmpL3 : | | | |e| | | |a|
13159     rldicl(tmpL1, src, imm16, imm24);   // tmpL1 : | | | |f|g|h|a|b|
13160     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |b| | | |f|
13161     rldicl(tmpL4, tmpL2, imm40, imm0);  // tmpL4 : | | |f| | | |b| |
13162     orL_reg_reg(tmpL5, tmpL3, tmpL4);   // tmpL5 : | | |f|e| | |b|a|
13163     rldicl(tmpL1, src, imm24, imm24);   // tmpL1 : | | | |g|h|a|b|c|
13164     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |c| | | |g|
13165     rldicl(tmpL3, tmpL2, imm48, imm0);  // tmpL3 : | |g| | | |c| | |
13166     rldicl(tmpL1, src, imm32, imm24);   // tmpL1 : | | | |h|a|b|c|d|
13167     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |d| | | |h|
13168     rldicl(tmpL4, tmpL2, imm56, imm0);  // tmpL4 : |h| | | |d| | | |
13169     orL_reg_reg(tmpL6, tmpL3, tmpL4);   // tmpL6 : |h|g| | |d|c| | |
13170     orL_reg_reg(dst, tmpL5, tmpL6);     // dst   : |h|g|f|e|d|c|b|a|
13171   %}
13172 %}
13173 
13174 instruct bytes_reverse_long_vec(iRegLdst dst, iRegLsrc src, vecX tmpV) %{
13175   match(Set dst (ReverseBytesL src));
13176   predicate(UseVectorByteReverseInstructionsPPC64);
13177   effect(TEMP tmpV);
13178   ins_cost(DEFAULT_COST*3);
13179   size(12);
13180   format %{ "MTVSRD  $tmpV, $src\n"
13181             "\tXXBRD   $tmpV, $tmpV\n"
13182             "\tMFVSRD  $dst, $tmpV" %}
13183 
13184   ins_encode %{
13185     __ mtvsrd($tmpV$$VectorSRegister, $src$$Register);
13186     __ xxbrd($tmpV$$VectorSRegister, $tmpV$$VectorSRegister);
13187     __ mfvsrd($dst$$Register, $tmpV$$VectorSRegister);
13188   %}
13189   ins_pipe(pipe_class_default);
13190 %}
13191 
13192 instruct bytes_reverse_long(iRegLdst dst, iRegLsrc src) %{
13193   match(Set dst (ReverseBytesL src));
13194   predicate(UseByteReverseInstructions);
13195   ins_cost(DEFAULT_COST);
13196   size(4);
13197 
13198   format %{ "BRD  $dst, $src" %}
13199 
13200   ins_encode %{
13201     __ brd($dst$$Register, $src$$Register);
13202   %}
13203   ins_pipe(pipe_class_default);
13204 %}
13205 
13206 instruct bytes_reverse_ushort_Ex(iRegIdst dst, iRegIsrc src) %{
13207   match(Set dst (ReverseBytesUS src));
13208   predicate(!UseByteReverseInstructions);
13209   ins_cost(2*DEFAULT_COST);
13210 
13211   expand %{
13212     immI16  imm16 %{ (int) 16 %}
13213     immI16   imm8 %{ (int)  8 %}
13214 
13215     urShiftI_reg_imm(dst, src, imm8);
13216     insrwi(dst, src, imm16, imm8);
13217   %}
13218 %}
13219 
13220 instruct bytes_reverse_ushort(iRegIdst dst, iRegIsrc src) %{
13221   match(Set dst (ReverseBytesUS src));
13222   predicate(UseByteReverseInstructions);
13223   ins_cost(DEFAULT_COST);
13224   size(4);
13225 
13226   format %{ "BRH  $dst, $src" %}
13227 
13228   ins_encode %{
13229     __ brh($dst$$Register, $src$$Register);
13230   %}
13231   ins_pipe(pipe_class_default);
13232 %}
13233 
13234 instruct bytes_reverse_short_Ex(iRegIdst dst, iRegIsrc src) %{
13235   match(Set dst (ReverseBytesS src));
13236   predicate(!UseByteReverseInstructions);
13237   ins_cost(3*DEFAULT_COST);
13238 
13239   expand %{
13240     immI16  imm16 %{ (int) 16 %}
13241     immI16   imm8 %{ (int)  8 %}
13242     iRegLdst tmpI1;
13243 
13244     urShiftI_reg_imm(tmpI1, src, imm8);
13245     insrwi(tmpI1, src, imm16, imm8);
13246     extsh(dst, tmpI1);
13247   %}
13248 %}
13249 
13250 instruct bytes_reverse_short(iRegIdst dst, iRegIsrc src) %{
13251   match(Set dst (ReverseBytesS src));
13252   predicate(UseByteReverseInstructions);
13253   ins_cost(DEFAULT_COST);
13254   size(8);
13255 
13256   format %{ "BRH   $dst, $src\n\t"
13257             "EXTSH $dst, $dst" %}
13258 
13259   ins_encode %{
13260     __ brh($dst$$Register, $src$$Register);
13261     __ extsh($dst$$Register, $dst$$Register);
13262   %}
13263   ins_pipe(pipe_class_default);
13264 %}
13265 
13266 // Load Integer reversed byte order
13267 instruct loadI_reversed(iRegIdst dst, indirect mem) %{
13268   match(Set dst (ReverseBytesI (LoadI mem)));
13269   predicate(n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)));
13270   ins_cost(MEMORY_REF_COST);
13271 
13272   size(4);
13273   ins_encode %{
13274     __ lwbrx($dst$$Register, $mem$$Register);
13275   %}
13276   ins_pipe(pipe_class_default);
13277 %}
13278 
13279 instruct loadI_reversed_acquire(iRegIdst dst, indirect mem) %{
13280   match(Set dst (ReverseBytesI (LoadI mem)));
13281   ins_cost(2 * MEMORY_REF_COST);
13282 
13283   size(12);
13284   ins_encode %{
13285     __ lwbrx($dst$$Register, $mem$$Register);
13286     __ twi_0($dst$$Register);
13287     __ isync();
13288   %}
13289   ins_pipe(pipe_class_default);
13290 %}
13291 
13292 // Load Long - aligned and reversed
13293 instruct loadL_reversed(iRegLdst dst, indirect mem) %{
13294   match(Set dst (ReverseBytesL (LoadL mem)));
13295   predicate(VM_Version::has_ldbrx() && (n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1))));
13296   ins_cost(MEMORY_REF_COST);
13297 
13298   size(4);
13299   ins_encode %{
13300     __ ldbrx($dst$$Register, $mem$$Register);
13301   %}
13302   ins_pipe(pipe_class_default);
13303 %}
13304 
13305 instruct loadL_reversed_acquire(iRegLdst dst, indirect mem) %{
13306   match(Set dst (ReverseBytesL (LoadL mem)));
13307   predicate(VM_Version::has_ldbrx());
13308   ins_cost(2 * MEMORY_REF_COST);
13309 
13310   size(12);
13311   ins_encode %{
13312     __ ldbrx($dst$$Register, $mem$$Register);
13313     __ twi_0($dst$$Register);
13314     __ isync();
13315   %}
13316   ins_pipe(pipe_class_default);
13317 %}
13318 
13319 // Load unsigned short / char reversed byte order
13320 instruct loadUS_reversed(iRegIdst dst, indirect mem) %{
13321   match(Set dst (ReverseBytesUS (LoadUS mem)));
13322   predicate(n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)));
13323   ins_cost(MEMORY_REF_COST);
13324 
13325   size(4);
13326   ins_encode %{
13327     __ lhbrx($dst$$Register, $mem$$Register);
13328   %}
13329   ins_pipe(pipe_class_default);
13330 %}
13331 
13332 instruct loadUS_reversed_acquire(iRegIdst dst, indirect mem) %{
13333   match(Set dst (ReverseBytesUS (LoadUS mem)));
13334   ins_cost(2 * MEMORY_REF_COST);
13335 
13336   size(12);
13337   ins_encode %{
13338     __ lhbrx($dst$$Register, $mem$$Register);
13339     __ twi_0($dst$$Register);
13340     __ isync();
13341   %}
13342   ins_pipe(pipe_class_default);
13343 %}
13344 
13345 // Load short reversed byte order
13346 instruct loadS_reversed(iRegIdst dst, indirect mem) %{
13347   match(Set dst (ReverseBytesS (LoadS mem)));
13348   predicate(n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)));
13349   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
13350 
13351   size(8);
13352   ins_encode %{
13353     __ lhbrx($dst$$Register, $mem$$Register);
13354     __ extsh($dst$$Register, $dst$$Register);
13355   %}
13356   ins_pipe(pipe_class_default);
13357 %}
13358 
13359 instruct loadS_reversed_acquire(iRegIdst dst, indirect mem) %{
13360   match(Set dst (ReverseBytesS (LoadS mem)));
13361   ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
13362 
13363   size(16);
13364   ins_encode %{
13365     __ lhbrx($dst$$Register, $mem$$Register);
13366     __ twi_0($dst$$Register);
13367     __ extsh($dst$$Register, $dst$$Register);
13368     __ isync();
13369   %}
13370   ins_pipe(pipe_class_default);
13371 %}
13372 
13373 // Store Integer reversed byte order
13374 instruct storeI_reversed(iRegIsrc src, indirect mem) %{
13375   match(Set mem (StoreI mem (ReverseBytesI src)));
13376   ins_cost(MEMORY_REF_COST);
13377 
13378   size(4);
13379   ins_encode %{
13380     __ stwbrx($src$$Register, $mem$$Register);
13381   %}
13382   ins_pipe(pipe_class_default);
13383 %}
13384 
13385 // Store Long reversed byte order
13386 instruct storeL_reversed(iRegLsrc src, indirect mem) %{
13387   match(Set mem (StoreL mem (ReverseBytesL src)));
13388   predicate(VM_Version::has_stdbrx());
13389   ins_cost(MEMORY_REF_COST);
13390 
13391   size(4);
13392   ins_encode %{
13393     __ stdbrx($src$$Register, $mem$$Register);
13394   %}
13395   ins_pipe(pipe_class_default);
13396 %}
13397 
13398 // Store unsigned short / char reversed byte order
13399 instruct storeUS_reversed(iRegIsrc src, indirect mem) %{
13400   match(Set mem (StoreC mem (ReverseBytesUS src)));
13401   ins_cost(MEMORY_REF_COST);
13402 
13403   size(4);
13404   ins_encode %{
13405     __ sthbrx($src$$Register, $mem$$Register);
13406   %}
13407   ins_pipe(pipe_class_default);
13408 %}
13409 
13410 // Store short reversed byte order
13411 instruct storeS_reversed(iRegIsrc src, indirect mem) %{
13412   match(Set mem (StoreC mem (ReverseBytesS src)));
13413   ins_cost(MEMORY_REF_COST);
13414 
13415   size(4);
13416   ins_encode %{
13417     __ sthbrx($src$$Register, $mem$$Register);
13418   %}
13419   ins_pipe(pipe_class_default);
13420 %}
13421 
13422 instruct mtvsrwz(vecX temp1, iRegIsrc src) %{
13423   effect(DEF temp1, USE src);
13424 
13425   format %{ "MTVSRWZ $temp1, $src \t// Move to 16-byte register" %}
13426   size(4);
13427   ins_encode %{
13428     __ mtvsrwz($temp1$$VectorSRegister, $src$$Register);
13429   %}
13430   ins_pipe(pipe_class_default);
13431 %}
13432 
13433 instruct xxspltw(vecX dst, vecX src, immI8 imm1) %{
13434   effect(DEF dst, USE src, USE imm1);
13435 
13436   format %{ "XXSPLTW $dst, $src, $imm1 \t// Splat word" %}
13437   size(4);
13438   ins_encode %{
13439     __ xxspltw($dst$$VectorSRegister, $src$$VectorSRegister, $imm1$$constant);
13440   %}
13441   ins_pipe(pipe_class_default);
13442 %}
13443 
13444 instruct xscvdpspn_regF(vecX dst, regF src) %{
13445   effect(DEF dst, USE src);
13446 
13447   format %{ "XSCVDPSPN $dst, $src \t// Convert scalar single precision to vector single precision" %}
13448   size(4);
13449   ins_encode %{
13450     __ xscvdpspn($dst$$VectorSRegister, $src$$FloatRegister->to_vsr());
13451   %}
13452   ins_pipe(pipe_class_default);
13453 %}
13454 
13455 //---------- Replicate Vector Instructions ------------------------------------
13456 
13457 // Insrdi does replicate if src == dst.
13458 instruct repl32(iRegLdst dst) %{
13459   predicate(false);
13460   effect(USE_DEF dst);
13461 
13462   format %{ "INSRDI  $dst, #0, $dst, #32 \t// replicate" %}
13463   size(4);
13464   ins_encode %{
13465     __ insrdi($dst$$Register, $dst$$Register, 32, 0);
13466   %}
13467   ins_pipe(pipe_class_default);
13468 %}
13469 
13470 // Insrdi does replicate if src == dst.
13471 instruct repl48(iRegLdst dst) %{
13472   predicate(false);
13473   effect(USE_DEF dst);
13474 
13475   format %{ "INSRDI  $dst, #0, $dst, #48 \t// replicate" %}
13476   size(4);
13477   ins_encode %{
13478     __ insrdi($dst$$Register, $dst$$Register, 48, 0);
13479   %}
13480   ins_pipe(pipe_class_default);
13481 %}
13482 
13483 // Insrdi does replicate if src == dst.
13484 instruct repl56(iRegLdst dst) %{
13485   predicate(false);
13486   effect(USE_DEF dst);
13487 
13488   format %{ "INSRDI  $dst, #0, $dst, #56 \t// replicate" %}
13489   size(4);
13490   ins_encode %{
13491     __ insrdi($dst$$Register, $dst$$Register, 56, 0);
13492   %}
13493   ins_pipe(pipe_class_default);
13494 %}
13495 
13496 instruct repl8B_reg_Ex(iRegLdst dst, iRegIsrc src) %{
13497   match(Set dst (ReplicateB src));
13498   predicate(n->as_Vector()->length() == 8);
13499   expand %{
13500     moveReg(dst, src);
13501     repl56(dst);
13502     repl48(dst);
13503     repl32(dst);
13504   %}
13505 %}
13506 
13507 instruct repl8B_immI0(iRegLdst dst, immI_0 zero) %{
13508   match(Set dst (ReplicateB zero));
13509   predicate(n->as_Vector()->length() == 8);
13510   format %{ "LI      $dst, #0 \t// replicate8B" %}
13511   size(4);
13512   ins_encode %{
13513     __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
13514   %}
13515   ins_pipe(pipe_class_default);
13516 %}
13517 
13518 instruct repl8B_immIminus1(iRegLdst dst, immI_minus1 src) %{
13519   match(Set dst (ReplicateB src));
13520   predicate(n->as_Vector()->length() == 8);
13521   format %{ "LI      $dst, #-1 \t// replicate8B" %}
13522   size(4);
13523   ins_encode %{
13524     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
13525   %}
13526   ins_pipe(pipe_class_default);
13527 %}
13528 
13529 instruct repl16B_reg_Ex(vecX dst, iRegIsrc src) %{
13530   match(Set dst (ReplicateB src));
13531   predicate(n->as_Vector()->length() == 16);
13532 
13533   expand %{
13534     iRegLdst tmpL;
13535     vecX tmpV;
13536     immI8  imm1 %{ (int)  1 %}
13537     moveReg(tmpL, src);
13538     repl56(tmpL);
13539     repl48(tmpL);
13540     mtvsrwz(tmpV, tmpL);
13541     xxspltw(dst, tmpV, imm1);
13542   %}
13543 %}
13544 
13545 instruct repl16B_immI0(vecX dst, immI_0 zero) %{
13546   match(Set dst (ReplicateB zero));
13547   predicate(n->as_Vector()->length() == 16);
13548 
13549   format %{ "XXLXOR      $dst, $zero \t// replicate16B" %}
13550   size(4);
13551   ins_encode %{
13552     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13553   %}
13554   ins_pipe(pipe_class_default);
13555 %}
13556 
13557 instruct repl16B_immIminus1(vecX dst, immI_minus1 src) %{
13558   match(Set dst (ReplicateB src));
13559   predicate(n->as_Vector()->length() == 16);
13560 
13561   format %{ "XXLEQV      $dst, $src \t// replicate16B" %}
13562   size(4);
13563   ins_encode %{
13564     __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13565   %}
13566   ins_pipe(pipe_class_default);
13567 %}
13568 
13569 instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{
13570   match(Set dst (ReplicateS src));
13571   predicate(n->as_Vector()->length() == 4);
13572   expand %{
13573     moveReg(dst, src);
13574     repl48(dst);
13575     repl32(dst);
13576   %}
13577 %}
13578 
13579 instruct repl4S_immI0(iRegLdst dst, immI_0 zero) %{
13580   match(Set dst (ReplicateS zero));
13581   predicate(n->as_Vector()->length() == 4);
13582   format %{ "LI      $dst, #0 \t// replicate4S" %}
13583   size(4);
13584   ins_encode %{
13585     __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
13586   %}
13587   ins_pipe(pipe_class_default);
13588 %}
13589 
13590 instruct repl4S_immIminus1(iRegLdst dst, immI_minus1 src) %{
13591   match(Set dst (ReplicateS src));
13592   predicate(n->as_Vector()->length() == 4);
13593   format %{ "LI      $dst, -1 \t// replicate4S" %}
13594   size(4);
13595   ins_encode %{
13596     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
13597   %}
13598   ins_pipe(pipe_class_default);
13599 %}
13600 
13601 instruct repl8S_reg_Ex(vecX dst, iRegIsrc src) %{
13602   match(Set dst (ReplicateS src));
13603   predicate(n->as_Vector()->length() == 8);
13604 
13605   expand %{
13606     iRegLdst tmpL;
13607     vecX tmpV;
13608     immI8  zero %{ (int)  0 %}
13609     moveReg(tmpL, src);
13610     repl48(tmpL);
13611     repl32(tmpL);
13612     mtvsrd(tmpV, tmpL);
13613     xxpermdi(dst, tmpV, tmpV, zero);
13614   %}
13615 %}
13616 
13617 instruct repl8S_immI0(vecX dst, immI_0 zero) %{
13618   match(Set dst (ReplicateS zero));
13619   predicate(n->as_Vector()->length() == 8);
13620 
13621   format %{ "XXLXOR      $dst, $zero \t// replicate8S" %}
13622   size(4);
13623   ins_encode %{
13624     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13625   %}
13626   ins_pipe(pipe_class_default);
13627 %}
13628 
13629 instruct repl8S_immIminus1(vecX dst, immI_minus1 src) %{
13630   match(Set dst (ReplicateS src));
13631   predicate(n->as_Vector()->length() == 8);
13632 
13633   format %{ "XXLEQV      $dst, $src \t// replicate8S" %}
13634   size(4);
13635   ins_encode %{
13636     __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13637   %}
13638   ins_pipe(pipe_class_default);
13639 %}
13640 
13641 instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{
13642   match(Set dst (ReplicateI src));
13643   predicate(n->as_Vector()->length() == 2);
13644   ins_cost(2 * DEFAULT_COST);
13645   expand %{
13646     moveReg(dst, src);
13647     repl32(dst);
13648   %}
13649 %}
13650 
13651 instruct repl2I_immI0(iRegLdst dst, immI_0 zero) %{
13652   match(Set dst (ReplicateI zero));
13653   predicate(n->as_Vector()->length() == 2);
13654   format %{ "LI      $dst, #0 \t// replicate2I" %}
13655   size(4);
13656   ins_encode %{
13657     __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
13658   %}
13659   ins_pipe(pipe_class_default);
13660 %}
13661 
13662 instruct repl2I_immIminus1(iRegLdst dst, immI_minus1 src) %{
13663   match(Set dst (ReplicateI src));
13664   predicate(n->as_Vector()->length() == 2);
13665   format %{ "LI      $dst, -1 \t// replicate2I" %}
13666   size(4);
13667   ins_encode %{
13668     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
13669   %}
13670   ins_pipe(pipe_class_default);
13671 %}
13672 
13673 instruct repl4I_reg_Ex(vecX dst, iRegIsrc src) %{
13674   match(Set dst (ReplicateI src));
13675   predicate(n->as_Vector()->length() == 4);
13676   ins_cost(2 * DEFAULT_COST);
13677 
13678   expand %{
13679     iRegLdst tmpL;
13680     vecX tmpV;
13681     immI8  zero %{ (int)  0 %}
13682     moveReg(tmpL, src);
13683     repl32(tmpL);
13684     mtvsrd(tmpV, tmpL);
13685     xxpermdi(dst, tmpV, tmpV, zero);
13686   %}
13687 %}
13688 
13689 instruct repl4I_immI0(vecX dst, immI_0 zero) %{
13690   match(Set dst (ReplicateI zero));
13691   predicate(n->as_Vector()->length() == 4);
13692 
13693   format %{ "XXLXOR      $dst, $zero \t// replicate4I" %}
13694   size(4);
13695   ins_encode %{
13696     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13697   %}
13698   ins_pipe(pipe_class_default);
13699 %}
13700 
13701 instruct repl4I_immIminus1(vecX dst, immI_minus1 src) %{
13702   match(Set dst (ReplicateI src));
13703   predicate(n->as_Vector()->length() == 4);
13704 
13705   format %{ "XXLEQV      $dst, $dst, $dst \t// replicate4I" %}
13706   size(4);
13707   ins_encode %{
13708     __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13709   %}
13710   ins_pipe(pipe_class_default);
13711 %}
13712 
13713 // Move float to int register via stack, replicate.
13714 instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{
13715   match(Set dst (ReplicateF src));
13716   predicate(n->as_Vector()->length() == 2);
13717   ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
13718   expand %{
13719     stackSlotL tmpS;
13720     iRegIdst tmpI;
13721     moveF2I_reg_stack(tmpS, src);   // Move float to stack.
13722     moveF2I_stack_reg(tmpI, tmpS);  // Move stack to int reg.
13723     moveReg(dst, tmpI);             // Move int to long reg.
13724     repl32(dst);                    // Replicate bitpattern.
13725   %}
13726 %}
13727 
13728 // Replicate scalar constant to packed float values in Double register
13729 instruct repl2F_immF_Ex(iRegLdst dst, immF src) %{
13730   match(Set dst (ReplicateF src));
13731   predicate(n->as_Vector()->length() == 2);
13732   ins_cost(5 * DEFAULT_COST);
13733 
13734   format %{ "LD      $dst, offset, $constanttablebase\t// load replicated float $src $src from table, postalloc expanded" %}
13735   postalloc_expand( postalloc_expand_load_replF_constant(dst, src, constanttablebase) );
13736 %}
13737 
13738 // Replicate scalar zero constant to packed float values in Double register
13739 instruct repl2F_immF0(iRegLdst dst, immF_0 zero) %{
13740   match(Set dst (ReplicateF zero));
13741   predicate(n->as_Vector()->length() == 2);
13742 
13743   format %{ "LI      $dst, #0 \t// replicate2F" %}
13744   ins_encode %{
13745     __ li($dst$$Register, 0x0);
13746   %}
13747   ins_pipe(pipe_class_default);
13748 %}
13749 
13750 
13751 //----------Vector Arithmetic Instructions--------------------------------------
13752 
13753 // Vector Addition Instructions
13754 
13755 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
13756   match(Set dst (AddVB src1 src2));
13757   predicate(n->as_Vector()->length() == 16);
13758   format %{ "VADDUBM  $dst,$src1,$src2\t// add packed16B" %}
13759   size(4);
13760   ins_encode %{
13761     __ vaddubm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13762   %}
13763   ins_pipe(pipe_class_default);
13764 %}
13765 
13766 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
13767   match(Set dst (AddVS src1 src2));
13768   predicate(n->as_Vector()->length() == 8);
13769   format %{ "VADDUHM  $dst,$src1,$src2\t// add packed8S" %}
13770   size(4);
13771   ins_encode %{
13772     __ vadduhm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13773   %}
13774   ins_pipe(pipe_class_default);
13775 %}
13776 
13777 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
13778   match(Set dst (AddVI src1 src2));
13779   predicate(n->as_Vector()->length() == 4);
13780   format %{ "VADDUWM  $dst,$src1,$src2\t// add packed4I" %}
13781   size(4);
13782   ins_encode %{
13783     __ vadduwm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13784   %}
13785   ins_pipe(pipe_class_default);
13786 %}
13787 
13788 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
13789   match(Set dst (AddVF src1 src2));
13790   predicate(n->as_Vector()->length() == 4);
13791   format %{ "VADDFP  $dst,$src1,$src2\t// add packed4F" %}
13792   size(4);
13793   ins_encode %{
13794     __ vaddfp($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13795   %}
13796   ins_pipe(pipe_class_default);
13797 %}
13798 
13799 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
13800   match(Set dst (AddVL src1 src2));
13801   predicate(n->as_Vector()->length() == 2);
13802   format %{ "VADDUDM  $dst,$src1,$src2\t// add packed2L" %}
13803   size(4);
13804   ins_encode %{
13805     __ vaddudm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13806   %}
13807   ins_pipe(pipe_class_default);
13808 %}
13809 
13810 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
13811   match(Set dst (AddVD src1 src2));
13812   predicate(n->as_Vector()->length() == 2);
13813   format %{ "XVADDDP  $dst,$src1,$src2\t// add packed2D" %}
13814   size(4);
13815   ins_encode %{
13816     __ xvadddp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13817   %}
13818   ins_pipe(pipe_class_default);
13819 %}
13820 
13821 // Vector Subtraction Instructions
13822 
13823 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
13824   match(Set dst (SubVB src1 src2));
13825   predicate(n->as_Vector()->length() == 16);
13826   format %{ "VSUBUBM  $dst,$src1,$src2\t// sub packed16B" %}
13827   size(4);
13828   ins_encode %{
13829     __ vsububm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13830   %}
13831   ins_pipe(pipe_class_default);
13832 %}
13833 
13834 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
13835   match(Set dst (SubVS src1 src2));
13836   predicate(n->as_Vector()->length() == 8);
13837   format %{ "VSUBUHM  $dst,$src1,$src2\t// sub packed8S" %}
13838   size(4);
13839   ins_encode %{
13840     __ vsubuhm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13841   %}
13842   ins_pipe(pipe_class_default);
13843 %}
13844 
13845 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
13846   match(Set dst (SubVI src1 src2));
13847   predicate(n->as_Vector()->length() == 4);
13848   format %{ "VSUBUWM  $dst,$src1,$src2\t// sub packed4I" %}
13849   size(4);
13850   ins_encode %{
13851     __ vsubuwm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13852   %}
13853   ins_pipe(pipe_class_default);
13854 %}
13855 
13856 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
13857   match(Set dst (SubVF src1 src2));
13858   predicate(n->as_Vector()->length() == 4);
13859   format %{ "VSUBFP  $dst,$src1,$src2\t// sub packed4F" %}
13860   size(4);
13861   ins_encode %{
13862     __ vsubfp($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13863   %}
13864   ins_pipe(pipe_class_default);
13865 %}
13866 
13867 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
13868   match(Set dst (SubVL src1 src2));
13869   predicate(n->as_Vector()->length() == 2);
13870   format %{ "VSUBUDM  $dst,$src1,$src2\t// sub packed2L" %}
13871   size(4);
13872   ins_encode %{
13873     __ vsubudm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13874   %}
13875   ins_pipe(pipe_class_default);
13876 %}
13877 
13878 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
13879   match(Set dst (SubVD src1 src2));
13880   predicate(n->as_Vector()->length() == 2);
13881   format %{ "XVSUBDP  $dst,$src1,$src2\t// sub packed2D" %}
13882   size(4);
13883   ins_encode %{
13884     __ xvsubdp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13885   %}
13886   ins_pipe(pipe_class_default);
13887 %}
13888 
13889 // Vector Multiplication Instructions
13890 
13891 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2, vecX tmp) %{
13892   match(Set dst (MulVS src1 src2));
13893   predicate(n->as_Vector()->length() == 8);
13894   effect(TEMP tmp);
13895   format %{ "VSPLTISH  $tmp,0\t// mul packed8S" %}
13896   format %{ "VMLADDUHM  $dst,$src1,$src2\t// mul packed8S" %}
13897   size(8);
13898   ins_encode %{
13899     __ vspltish($tmp$$VectorSRegister->to_vr(), 0);
13900     __ vmladduhm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr(), $tmp$$VectorSRegister->to_vr());
13901   %}
13902   ins_pipe(pipe_class_default);
13903 %}
13904 
13905 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
13906   match(Set dst (MulVI src1 src2));
13907   predicate(n->as_Vector()->length() == 4);
13908   format %{ "VMULUWM  $dst,$src1,$src2\t// mul packed4I" %}
13909   size(4);
13910   ins_encode %{
13911     __ vmuluwm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13912   %}
13913   ins_pipe(pipe_class_default);
13914 %}
13915 
13916 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
13917   match(Set dst (MulVF src1 src2));
13918   predicate(n->as_Vector()->length() == 4);
13919   format %{ "XVMULSP  $dst,$src1,$src2\t// mul packed4F" %}
13920   size(4);
13921   ins_encode %{
13922     __ xvmulsp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13923   %}
13924   ins_pipe(pipe_class_default);
13925 %}
13926 
13927 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
13928   match(Set dst (MulVD src1 src2));
13929   predicate(n->as_Vector()->length() == 2);
13930   format %{ "XVMULDP  $dst,$src1,$src2\t// mul packed2D" %}
13931   size(4);
13932   ins_encode %{
13933     __ xvmuldp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13934   %}
13935   ins_pipe(pipe_class_default);
13936 %}
13937 
13938 // Vector Division Instructions
13939 
13940 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
13941   match(Set dst (DivVF src1 src2));
13942   predicate(n->as_Vector()->length() == 4);
13943   format %{ "XVDIVSP  $dst,$src1,$src2\t// div packed4F" %}
13944   size(4);
13945   ins_encode %{
13946     __ xvdivsp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13947   %}
13948   ins_pipe(pipe_class_default);
13949 %}
13950 
13951 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
13952   match(Set dst (DivVD src1 src2));
13953   predicate(n->as_Vector()->length() == 2);
13954   format %{ "XVDIVDP  $dst,$src1,$src2\t// div packed2D" %}
13955   size(4);
13956   ins_encode %{
13957     __ xvdivdp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13958   %}
13959   ins_pipe(pipe_class_default);
13960 %}
13961 
13962 // Vector Absolute Instructions
13963 
13964 instruct vabs4F_reg(vecX dst, vecX src) %{
13965   match(Set dst (AbsVF src));
13966   predicate(n->as_Vector()->length() == 4);
13967   format %{ "XVABSSP $dst,$src\t// absolute packed4F" %}
13968   size(4);
13969   ins_encode %{
13970     __ xvabssp($dst$$VectorSRegister, $src$$VectorSRegister);
13971   %}
13972   ins_pipe(pipe_class_default);
13973 %}
13974 
13975 instruct vabs2D_reg(vecX dst, vecX src) %{
13976   match(Set dst (AbsVD src));
13977   predicate(n->as_Vector()->length() == 2);
13978   format %{ "XVABSDP $dst,$src\t// absolute packed2D" %}
13979   size(4);
13980   ins_encode %{
13981     __ xvabsdp($dst$$VectorSRegister, $src$$VectorSRegister);
13982   %}
13983   ins_pipe(pipe_class_default);
13984 %}
13985 
13986 // Round Instructions
13987 instruct roundD_reg(regD dst, regD src, immI8 rmode) %{
13988   match(Set dst (RoundDoubleMode src rmode));
13989   format %{ "RoundDoubleMode $src,$rmode" %}
13990   size(4);
13991   ins_encode %{
13992     switch ($rmode$$constant) {
13993       case RoundDoubleModeNode::rmode_rint:
13994         __ xvrdpic($dst$$FloatRegister->to_vsr(), $src$$FloatRegister->to_vsr());
13995         break;
13996       case RoundDoubleModeNode::rmode_floor:
13997         __ frim($dst$$FloatRegister, $src$$FloatRegister);
13998         break;
13999       case RoundDoubleModeNode::rmode_ceil:
14000         __ frip($dst$$FloatRegister, $src$$FloatRegister);
14001         break;
14002       default:
14003         ShouldNotReachHere();
14004     }
14005   %}
14006   ins_pipe(pipe_class_default);
14007 %}
14008 
14009 // Vector Round Instructions
14010 instruct vround2D_reg(vecX dst, vecX src, immI8 rmode) %{
14011   match(Set dst (RoundDoubleModeV src rmode));
14012   predicate(n->as_Vector()->length() == 2);
14013   format %{ "RoundDoubleModeV $src,$rmode" %}
14014   size(4);
14015   ins_encode %{
14016     switch ($rmode$$constant) {
14017       case RoundDoubleModeNode::rmode_rint:
14018         __ xvrdpic($dst$$VectorSRegister, $src$$VectorSRegister);
14019         break;
14020       case RoundDoubleModeNode::rmode_floor:
14021         __ xvrdpim($dst$$VectorSRegister, $src$$VectorSRegister);
14022         break;
14023       case RoundDoubleModeNode::rmode_ceil:
14024         __ xvrdpip($dst$$VectorSRegister, $src$$VectorSRegister);
14025         break;
14026       default:
14027         ShouldNotReachHere();
14028     }
14029   %}
14030   ins_pipe(pipe_class_default);
14031 %}
14032 
14033 // Vector Negate Instructions
14034 
14035 instruct vneg4F_reg(vecX dst, vecX src) %{
14036   match(Set dst (NegVF src));
14037   predicate(n->as_Vector()->length() == 4);
14038   format %{ "XVNEGSP $dst,$src\t// negate packed4F" %}
14039   size(4);
14040   ins_encode %{
14041     __ xvnegsp($dst$$VectorSRegister, $src$$VectorSRegister);
14042   %}
14043   ins_pipe(pipe_class_default);
14044 %}
14045 
14046 instruct vneg2D_reg(vecX dst, vecX src) %{
14047   match(Set dst (NegVD src));
14048   predicate(n->as_Vector()->length() == 2);
14049   format %{ "XVNEGDP $dst,$src\t// negate packed2D" %}
14050   size(4);
14051   ins_encode %{
14052     __ xvnegdp($dst$$VectorSRegister, $src$$VectorSRegister);
14053   %}
14054   ins_pipe(pipe_class_default);
14055 %}
14056 
14057 // Vector Square Root Instructions
14058 
14059 instruct vsqrt4F_reg(vecX dst, vecX src) %{
14060   match(Set dst (SqrtVF src));
14061   predicate(n->as_Vector()->length() == 4);
14062   format %{ "XVSQRTSP $dst,$src\t// sqrt packed4F" %}
14063   size(4);
14064   ins_encode %{
14065     __ xvsqrtsp($dst$$VectorSRegister, $src$$VectorSRegister);
14066   %}
14067   ins_pipe(pipe_class_default);
14068 %}
14069 
14070 instruct vsqrt2D_reg(vecX dst, vecX src) %{
14071   match(Set dst (SqrtVD src));
14072   predicate(n->as_Vector()->length() == 2);
14073   format %{ "XVSQRTDP  $dst,$src\t// sqrt packed2D" %}
14074   size(4);
14075   ins_encode %{
14076     __ xvsqrtdp($dst$$VectorSRegister, $src$$VectorSRegister);
14077   %}
14078   ins_pipe(pipe_class_default);
14079 %}
14080 
14081 // Vector Population Count Instructions
14082 
14083 instruct vpopcnt_reg(vecX dst, vecX src) %{
14084   match(Set dst (PopCountVI src));
14085   format %{ "VPOPCNT $dst,$src\t// pop count packed" %}
14086   size(4);
14087   ins_encode %{
14088     BasicType bt = Matcher::vector_element_basic_type(this);
14089     switch (bt) {
14090       case T_BYTE:
14091         __ vpopcntb($dst$$VectorSRegister->to_vr(), $src$$VectorSRegister->to_vr());
14092         break;
14093       case T_SHORT:
14094         __ vpopcnth($dst$$VectorSRegister->to_vr(), $src$$VectorSRegister->to_vr());
14095         break;
14096       case T_INT:
14097         __ vpopcntw($dst$$VectorSRegister->to_vr(), $src$$VectorSRegister->to_vr());
14098         break;
14099       case T_LONG:
14100         __ vpopcntd($dst$$VectorSRegister->to_vr(), $src$$VectorSRegister->to_vr());
14101         break;
14102       default:
14103         ShouldNotReachHere();
14104     }
14105   %}
14106   ins_pipe(pipe_class_default);
14107 %}
14108 
14109 // --------------------------------- FMA --------------------------------------
14110 // dst + src1 * src2
14111 instruct vfma4F(vecX dst, vecX src1, vecX src2) %{
14112   match(Set dst (FmaVF dst (Binary src1 src2)));
14113   predicate(n->as_Vector()->length() == 4);
14114 
14115   format %{ "XVMADDASP   $dst, $src1, $src2" %}
14116 
14117   size(4);
14118   ins_encode %{
14119     __ xvmaddasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14120   %}
14121   ins_pipe(pipe_class_default);
14122 %}
14123 
14124 // dst - src1 * src2
14125 instruct vfma4F_neg1(vecX dst, vecX src1, vecX src2) %{
14126   match(Set dst (FmaVF dst (Binary (NegVF src1) src2)));
14127   match(Set dst (FmaVF dst (Binary src1 (NegVF src2))));
14128   predicate(n->as_Vector()->length() == 4);
14129 
14130   format %{ "XVNMSUBASP   $dst, $src1, $src2" %}
14131 
14132   size(4);
14133   ins_encode %{
14134     __ xvnmsubasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14135   %}
14136   ins_pipe(pipe_class_default);
14137 %}
14138 
14139 // - dst + src1 * src2
14140 instruct vfma4F_neg2(vecX dst, vecX src1, vecX src2) %{
14141   match(Set dst (FmaVF (NegVF dst) (Binary src1 src2)));
14142   predicate(n->as_Vector()->length() == 4);
14143 
14144   format %{ "XVMSUBASP   $dst, $src1, $src2" %}
14145 
14146   size(4);
14147   ins_encode %{
14148     __ xvmsubasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14149   %}
14150   ins_pipe(pipe_class_default);
14151 %}
14152 
14153 // dst + src1 * src2
14154 instruct vfma2D(vecX dst, vecX src1, vecX src2) %{
14155   match(Set dst (FmaVD  dst (Binary src1 src2)));
14156   predicate(n->as_Vector()->length() == 2);
14157 
14158   format %{ "XVMADDADP   $dst, $src1, $src2" %}
14159 
14160   size(4);
14161   ins_encode %{
14162     __ xvmaddadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14163   %}
14164   ins_pipe(pipe_class_default);
14165 %}
14166 
14167 // dst - src1 * src2
14168 instruct vfma2D_neg1(vecX dst, vecX src1, vecX src2) %{
14169   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
14170   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
14171   predicate(n->as_Vector()->length() == 2);
14172 
14173   format %{ "XVNMSUBADP   $dst, $src1, $src2" %}
14174 
14175   size(4);
14176   ins_encode %{
14177     __ xvnmsubadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14178   %}
14179   ins_pipe(pipe_class_default);
14180 %}
14181 
14182 // - dst + src1 * src2
14183 instruct vfma2D_neg2(vecX dst, vecX src1, vecX src2) %{
14184   match(Set dst (FmaVD (NegVD dst) (Binary src1 src2)));
14185   predicate(n->as_Vector()->length() == 2);
14186 
14187   format %{ "XVMSUBADP   $dst, $src1, $src2" %}
14188 
14189   size(4);
14190   ins_encode %{
14191     __ xvmsubadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14192   %}
14193   ins_pipe(pipe_class_default);
14194 %}
14195 
14196 //----------Overflow Math Instructions-----------------------------------------
14197 
14198 // Note that we have to make sure that XER.SO is reset before using overflow instructions.
14199 // Simple Overflow operations can be matched by very few instructions (e.g. addExact: xor, and_, bc).
14200 // Seems like only Long intrinsincs have an advantage. (The only expensive one is OverflowMulL.)
14201 
14202 instruct overflowAddL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
14203   match(Set cr0 (OverflowAddL op1 op2));
14204 
14205   format %{ "add_    $op1, $op2\t# overflow check long" %}
14206   ins_encode %{
14207     __ li(R0, 0);
14208     __ mtxer(R0); // clear XER.SO
14209     __ addo_(R0, $op1$$Register, $op2$$Register);
14210   %}
14211   ins_pipe(pipe_class_default);
14212 %}
14213 
14214 instruct overflowSubL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
14215   match(Set cr0 (OverflowSubL op1 op2));
14216 
14217   format %{ "subfo_  R0, $op2, $op1\t# overflow check long" %}
14218   ins_encode %{
14219     __ li(R0, 0);
14220     __ mtxer(R0); // clear XER.SO
14221     __ subfo_(R0, $op2$$Register, $op1$$Register);
14222   %}
14223   ins_pipe(pipe_class_default);
14224 %}
14225 
14226 instruct overflowNegL_reg(flagsRegCR0 cr0, immL_0 zero, iRegLsrc op2) %{
14227   match(Set cr0 (OverflowSubL zero op2));
14228 
14229   format %{ "nego_   R0, $op2\t# overflow check long" %}
14230   ins_encode %{
14231     __ li(R0, 0);
14232     __ mtxer(R0); // clear XER.SO
14233     __ nego_(R0, $op2$$Register);
14234   %}
14235   ins_pipe(pipe_class_default);
14236 %}
14237 
14238 instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
14239   match(Set cr0 (OverflowMulL op1 op2));
14240 
14241   format %{ "mulldo_ R0, $op1, $op2\t# overflow check long" %}
14242   ins_encode %{
14243     __ li(R0, 0);
14244     __ mtxer(R0); // clear XER.SO
14245     __ mulldo_(R0, $op1$$Register, $op2$$Register);
14246   %}
14247   ins_pipe(pipe_class_default);
14248 %}
14249 
14250 instruct repl4F_reg_Ex(vecX dst, regF src) %{
14251   match(Set dst (ReplicateF src));
14252   predicate(n->as_Vector()->length() == 4);
14253   ins_cost(DEFAULT_COST);
14254   expand %{
14255     vecX tmpV;
14256     immI8  zero %{ (int)  0 %}
14257 
14258     xscvdpspn_regF(tmpV, src);
14259     xxspltw(dst, tmpV, zero);
14260   %}
14261 %}
14262 
14263 instruct repl4F_immF_Ex(vecX dst, immF src, iRegLdst tmp) %{
14264   match(Set dst (ReplicateF src));
14265   predicate(n->as_Vector()->length() == 4);
14266   effect(TEMP tmp);
14267   ins_cost(10 * DEFAULT_COST);
14268 
14269   postalloc_expand( postalloc_expand_load_replF_constant_vsx(dst, src, constanttablebase, tmp) );
14270 %}
14271 
14272 instruct repl4F_immF0(vecX dst, immF_0 zero) %{
14273   match(Set dst (ReplicateF zero));
14274   predicate(n->as_Vector()->length() == 4);
14275 
14276   format %{ "XXLXOR      $dst, $zero \t// replicate4F" %}
14277   ins_encode %{
14278     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
14279   %}
14280   ins_pipe(pipe_class_default);
14281 %}
14282 
14283 instruct repl2D_reg_Ex(vecX dst, regD src) %{
14284   match(Set dst (ReplicateD src));
14285   predicate(n->as_Vector()->length() == 2);
14286 
14287   format %{ "XXPERMDI      $dst, $src, $src, 0 \t// Splat doubleword" %}
14288   size(4);
14289   ins_encode %{
14290     __ xxpermdi($dst$$VectorSRegister, $src$$FloatRegister->to_vsr(), $src$$FloatRegister->to_vsr(), 0);
14291   %}
14292   ins_pipe(pipe_class_default);
14293 %}
14294 
14295 instruct repl2D_immD0(vecX dst, immD_0 zero) %{
14296   match(Set dst (ReplicateD zero));
14297   predicate(n->as_Vector()->length() == 2);
14298 
14299   format %{ "XXLXOR      $dst, $zero \t// replicate2D" %}
14300   size(4);
14301   ins_encode %{
14302     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
14303   %}
14304   ins_pipe(pipe_class_default);
14305 %}
14306 
14307 instruct mtvsrd(vecX dst, iRegLsrc src) %{
14308   predicate(false);
14309   effect(DEF dst, USE src);
14310 
14311   format %{ "MTVSRD      $dst, $src \t// Move to 16-byte register" %}
14312   size(4);
14313   ins_encode %{
14314     __ mtvsrd($dst$$VectorSRegister, $src$$Register);
14315   %}
14316   ins_pipe(pipe_class_default);
14317 %}
14318 
14319 instruct xxspltd(vecX dst, vecX src, immI8 zero) %{
14320   effect(DEF dst, USE src, USE zero);
14321 
14322   format %{ "XXSPLATD      $dst, $src, $zero \t// Splat doubleword" %}
14323   size(4);
14324   ins_encode %{
14325     __ xxpermdi($dst$$VectorSRegister, $src$$VectorSRegister, $src$$VectorSRegister, $zero$$constant);
14326   %}
14327   ins_pipe(pipe_class_default);
14328 %}
14329 
14330 instruct xxpermdi(vecX dst, vecX src1, vecX src2, immI8 zero) %{
14331   effect(DEF dst, USE src1, USE src2, USE zero);
14332 
14333   format %{ "XXPERMDI      $dst, $src1, $src2, $zero \t// Splat doubleword" %}
14334   size(4);
14335   ins_encode %{
14336     __ xxpermdi($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister, $zero$$constant);
14337   %}
14338   ins_pipe(pipe_class_default);
14339 %}
14340 
14341 instruct repl2L_reg_Ex(vecX dst, iRegLsrc src) %{
14342   match(Set dst (ReplicateL src));
14343   predicate(n->as_Vector()->length() == 2);
14344   expand %{
14345     vecX tmpV;
14346     immI8  zero %{ (int)  0 %}
14347     mtvsrd(tmpV, src);
14348     xxpermdi(dst, tmpV, tmpV, zero);
14349   %}
14350 %}
14351 
14352 instruct repl2L_immI0(vecX dst, immI_0 zero) %{
14353   match(Set dst (ReplicateL zero));
14354   predicate(n->as_Vector()->length() == 2);
14355 
14356   format %{ "XXLXOR      $dst, $zero \t// replicate2L" %}
14357   size(4);
14358   ins_encode %{
14359     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
14360   %}
14361   ins_pipe(pipe_class_default);
14362 %}
14363 
14364 instruct repl2L_immIminus1(vecX dst, immI_minus1 src) %{
14365   match(Set dst (ReplicateL src));
14366   predicate(n->as_Vector()->length() == 2);
14367 
14368   format %{ "XXLEQV      $dst, $src \t// replicate2L" %}
14369   size(4);
14370   ins_encode %{
14371     __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
14372   %}
14373   ins_pipe(pipe_class_default);
14374 %}
14375 
14376 // ============================================================================
14377 // Safepoint Instruction
14378 
14379 instruct safePoint_poll(iRegPdst poll) %{
14380   match(SafePoint poll);
14381 
14382   // It caused problems to add the effect that r0 is killed, but this
14383   // effect no longer needs to be mentioned, since r0 is not contained
14384   // in a reg_class.
14385 
14386   format %{ "LD      R0, #0, $poll \t// Safepoint poll for GC" %}
14387   size(4);
14388   ins_encode( enc_poll(0x0, poll) );
14389   ins_pipe(pipe_class_default);
14390 %}
14391 
14392 // ============================================================================
14393 // Call Instructions
14394 
14395 // Call Java Static Instruction
14396 
14397 source %{
14398 
14399 #include "runtime/continuation.hpp"
14400 
14401 %}
14402 
14403 // Schedulable version of call static node.
14404 instruct CallStaticJavaDirect(method meth) %{
14405   match(CallStaticJava);
14406   effect(USE meth);
14407   ins_cost(CALL_COST);
14408 
14409   ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */);
14410 
14411   format %{ "CALL,static $meth \t// ==> " %}
14412   size((Continuations::enabled() ? 8 : 4));
14413   ins_encode( enc_java_static_call(meth) );
14414   ins_pipe(pipe_class_call);
14415 %}
14416 
14417 // Call Java Dynamic Instruction
14418 
14419 // Used by postalloc expand of CallDynamicJavaDirectSchedEx (actual call).
14420 // Loading of IC was postalloc expanded. The nodes loading the IC are reachable
14421 // via fields ins_field_load_ic_hi_node and ins_field_load_ic_node.
14422 // The call destination must still be placed in the constant pool.
14423 instruct CallDynamicJavaDirectSched(method meth) %{
14424   match(CallDynamicJava); // To get all the data fields we need ...
14425   effect(USE meth);
14426   predicate(false);       // ... but never match.
14427 
14428   ins_field_load_ic_hi_node(loadConL_hiNode*);
14429   ins_field_load_ic_node(loadConLNode*);
14430   ins_num_consts(1 /* 1 patchable constant: call destination */);
14431 
14432   format %{ "BL        \t// dynamic $meth ==> " %}
14433   size((Continuations::enabled() ? 8 : 4));
14434   ins_encode( enc_java_dynamic_call_sched(meth) );
14435   ins_pipe(pipe_class_call);
14436 %}
14437 
14438 // Schedulable (i.e. postalloc expanded) version of call dynamic java.
14439 // We use postalloc expanded calls if we use inline caches
14440 // and do not update method data.
14441 //
14442 // This instruction has two constants: inline cache (IC) and call destination.
14443 // Loading the inline cache will be postalloc expanded, thus leaving a call with
14444 // one constant.
14445 instruct CallDynamicJavaDirectSched_Ex(method meth) %{
14446   match(CallDynamicJava);
14447   effect(USE meth);
14448   predicate(UseInlineCaches);
14449   ins_cost(CALL_COST);
14450 
14451   ins_num_consts(2 /* 2 patchable constants: inline cache, call destination. */);
14452 
14453   format %{ "CALL,dynamic $meth \t// postalloc expanded" %}
14454   postalloc_expand( postalloc_expand_java_dynamic_call_sched(meth, constanttablebase) );
14455 %}
14456 
14457 // Compound version of call dynamic java
14458 // We use postalloc expanded calls if we use inline caches
14459 // and do not update method data.
14460 instruct CallDynamicJavaDirect(method meth) %{
14461   match(CallDynamicJava);
14462   effect(USE meth);
14463   predicate(!UseInlineCaches);
14464   ins_cost(CALL_COST);
14465 
14466   // Enc_java_to_runtime_call needs up to 4 constants (method data oop).
14467   ins_num_consts(4);
14468 
14469   format %{ "CALL,dynamic $meth \t// ==> " %}
14470   ins_encode( enc_java_dynamic_call(meth, constanttablebase) );
14471   ins_pipe(pipe_class_call);
14472 %}
14473 
14474 // Call Runtime Instruction
14475 
14476 instruct CallRuntimeDirect(method meth) %{
14477   match(CallRuntime);
14478   effect(USE meth);
14479   ins_cost(CALL_COST);
14480 
14481   // Enc_java_to_runtime_call needs up to 3 constants: call target,
14482   // env for callee, C-toc.
14483   ins_num_consts(3);
14484 
14485   format %{ "CALL,runtime" %}
14486   ins_encode( enc_java_to_runtime_call(meth) );
14487   ins_pipe(pipe_class_call);
14488 %}
14489 
14490 // Call Leaf
14491 
14492 // Used by postalloc expand of CallLeafDirect_Ex (mtctr).
14493 instruct CallLeafDirect_mtctr(iRegLdst dst, iRegLsrc src) %{
14494   effect(DEF dst, USE src);
14495 
14496   ins_num_consts(1);
14497 
14498   format %{ "MTCTR   $src" %}
14499   size(4);
14500   ins_encode( enc_leaf_call_mtctr(src) );
14501   ins_pipe(pipe_class_default);
14502 %}
14503 
14504 // Used by postalloc expand of CallLeafDirect_Ex (actual call).
14505 instruct CallLeafDirect(method meth) %{
14506   match(CallLeaf);   // To get the data all the data fields we need ...
14507   effect(USE meth);
14508   predicate(false);  // but never match.
14509 
14510   format %{ "BCTRL     \t// leaf call $meth ==> " %}
14511   size((Continuations::enabled() ? 8 : 4));
14512   ins_encode %{
14513     __ bctrl();
14514     __ post_call_nop();
14515   %}
14516   ins_pipe(pipe_class_call);
14517 %}
14518 
14519 // postalloc expand of CallLeafDirect.
14520 // Load address to call from TOC, then bl to it.
14521 instruct CallLeafDirect_Ex(method meth) %{
14522   match(CallLeaf);
14523   effect(USE meth);
14524   ins_cost(CALL_COST);
14525 
14526   // Postalloc_expand_java_to_runtime_call needs up to 3 constants: call target,
14527   // env for callee, C-toc.
14528   ins_num_consts(3);
14529 
14530   format %{ "CALL,runtime leaf $meth \t// postalloc expanded" %}
14531   postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) );
14532 %}
14533 
14534 // Call runtime without safepoint - same as CallLeaf.
14535 // postalloc expand of CallLeafNoFPDirect.
14536 // Load address to call from TOC, then bl to it.
14537 instruct CallLeafNoFPDirect_Ex(method meth) %{
14538   match(CallLeafNoFP);
14539   effect(USE meth);
14540   ins_cost(CALL_COST);
14541 
14542   // Enc_java_to_runtime_call needs up to 3 constants: call target,
14543   // env for callee, C-toc.
14544   ins_num_consts(3);
14545 
14546   format %{ "CALL,runtime leaf nofp $meth \t// postalloc expanded" %}
14547   postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) );
14548 %}
14549 
14550 // Tail Call; Jump from runtime stub to Java code.
14551 // Also known as an 'interprocedural jump'.
14552 // Target of jump will eventually return to caller.
14553 // TailJump below removes the return address.
14554 instruct TailCalljmpInd(iRegPdstNoScratch jump_target, inline_cache_regP method_ptr) %{
14555   match(TailCall jump_target method_ptr);
14556   ins_cost(CALL_COST);
14557 
14558   format %{ "MTCTR   $jump_target \t// $method_ptr holds method\n\t"
14559             "BCTR         \t// tail call" %}
14560   size(8);
14561   ins_encode %{
14562     __ mtctr($jump_target$$Register);
14563     __ bctr();
14564   %}
14565   ins_pipe(pipe_class_call);
14566 %}
14567 
14568 // Return Instruction
14569 instruct Ret() %{
14570   match(Return);
14571   format %{ "BLR      \t// branch to link register" %}
14572   size(4);
14573   ins_encode %{
14574     // LR is restored in MachEpilogNode. Just do the RET here.
14575     __ blr();
14576   %}
14577   ins_pipe(pipe_class_default);
14578 %}
14579 
14580 // Tail Jump; remove the return address; jump to target.
14581 // TailCall above leaves the return address around.
14582 // TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
14583 // ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
14584 // "restore" before this instruction (in Epilogue), we need to materialize it
14585 // in %i0.
14586 instruct tailjmpInd(iRegPdstNoScratch jump_target, rarg1RegP ex_oop) %{
14587   match(TailJump jump_target ex_oop);
14588   ins_cost(CALL_COST);
14589 
14590   format %{ "LD      R4_ARG2 = LR\n\t"
14591             "MTCTR   $jump_target\n\t"
14592             "BCTR     \t// TailJump, exception oop: $ex_oop" %}
14593   size(12);
14594   ins_encode %{
14595     __ ld(R4_ARG2/* issuing pc */, _abi0(lr), R1_SP);
14596     __ mtctr($jump_target$$Register);
14597     __ bctr();
14598   %}
14599   ins_pipe(pipe_class_call);
14600 %}
14601 
14602 // Create exception oop: created by stack-crawling runtime code.
14603 // Created exception is now available to this handler, and is setup
14604 // just prior to jumping to this handler. No code emitted.
14605 instruct CreateException(rarg1RegP ex_oop) %{
14606   match(Set ex_oop (CreateEx));
14607   ins_cost(0);
14608 
14609   format %{ " -- \t// exception oop; no code emitted" %}
14610   size(0);
14611   ins_encode( /*empty*/ );
14612   ins_pipe(pipe_class_default);
14613 %}
14614 
14615 // Rethrow exception: The exception oop will come in the first
14616 // argument position. Then JUMP (not call) to the rethrow stub code.
14617 instruct RethrowException() %{
14618   match(Rethrow);
14619   ins_cost(CALL_COST);
14620 
14621   format %{ "Jmp     rethrow_stub" %}
14622   ins_encode %{
14623     cbuf.set_insts_mark();
14624     __ b64_patchable((address)OptoRuntime::rethrow_stub(), relocInfo::runtime_call_type);
14625   %}
14626   ins_pipe(pipe_class_call);
14627 %}
14628 
14629 // Die now.
14630 instruct ShouldNotReachHere() %{
14631   match(Halt);
14632   ins_cost(CALL_COST);
14633 
14634   format %{ "ShouldNotReachHere" %}
14635   ins_encode %{
14636     if (is_reachable()) {
14637       __ stop(_halt_reason);
14638     }
14639   %}
14640   ins_pipe(pipe_class_default);
14641 %}
14642 
14643 // This name is KNOWN by the ADLC and cannot be changed.  The ADLC
14644 // forces a 'TypeRawPtr::BOTTOM' output type for this guy.
14645 // Get a DEF on threadRegP, no costs, no encoding, use
14646 // 'ins_should_rematerialize(true)' to avoid spilling.
14647 instruct tlsLoadP(threadRegP dst) %{
14648   match(Set dst (ThreadLocal));
14649   ins_cost(0);
14650 
14651   ins_should_rematerialize(true);
14652 
14653   format %{ " -- \t// $dst=Thread::current(), empty" %}
14654   size(0);
14655   ins_encode( /*empty*/ );
14656   ins_pipe(pipe_class_empty);
14657 %}
14658 
14659 //---Some PPC specific nodes---------------------------------------------------
14660 
14661 // Stop a group.
14662 instruct endGroup() %{
14663   ins_cost(0);
14664 
14665   ins_is_nop(true);
14666 
14667   format %{ "End Bundle (ori r1, r1, 0)" %}
14668   size(4);
14669   ins_encode %{
14670     __ endgroup();
14671   %}
14672   ins_pipe(pipe_class_default);
14673 %}
14674 
14675 // Nop instructions
14676 
14677 instruct fxNop() %{
14678   ins_cost(0);
14679 
14680   ins_is_nop(true);
14681 
14682   format %{ "fxNop" %}
14683   size(4);
14684   ins_encode %{
14685     __ nop();
14686   %}
14687   ins_pipe(pipe_class_default);
14688 %}
14689 
14690 instruct fpNop0() %{
14691   ins_cost(0);
14692 
14693   ins_is_nop(true);
14694 
14695   format %{ "fpNop0" %}
14696   size(4);
14697   ins_encode %{
14698     __ fpnop0();
14699   %}
14700   ins_pipe(pipe_class_default);
14701 %}
14702 
14703 instruct fpNop1() %{
14704   ins_cost(0);
14705 
14706   ins_is_nop(true);
14707 
14708   format %{ "fpNop1" %}
14709   size(4);
14710   ins_encode %{
14711     __ fpnop1();
14712   %}
14713   ins_pipe(pipe_class_default);
14714 %}
14715 
14716 instruct brNop0() %{
14717   ins_cost(0);
14718   size(4);
14719   format %{ "brNop0" %}
14720   ins_encode %{
14721     __ brnop0();
14722   %}
14723   ins_is_nop(true);
14724   ins_pipe(pipe_class_default);
14725 %}
14726 
14727 instruct brNop1() %{
14728   ins_cost(0);
14729 
14730   ins_is_nop(true);
14731 
14732   format %{ "brNop1" %}
14733   size(4);
14734   ins_encode %{
14735     __ brnop1();
14736   %}
14737   ins_pipe(pipe_class_default);
14738 %}
14739 
14740 instruct brNop2() %{
14741   ins_cost(0);
14742 
14743   ins_is_nop(true);
14744 
14745   format %{ "brNop2" %}
14746   size(4);
14747   ins_encode %{
14748     __ brnop2();
14749   %}
14750   ins_pipe(pipe_class_default);
14751 %}
14752 
14753 instruct cacheWB(indirect addr)
14754 %{
14755   match(CacheWB addr);
14756 
14757   ins_cost(100);
14758   format %{ "cache writeback, address = $addr" %}
14759   ins_encode %{
14760     assert($addr->index_position() < 0, "should be");
14761     assert($addr$$disp == 0, "should be");
14762     __ cache_wb(Address($addr$$base$$Register));
14763   %}
14764   ins_pipe(pipe_class_default);
14765 %}
14766 
14767 instruct cacheWBPreSync()
14768 %{
14769   match(CacheWBPreSync);
14770 
14771   ins_cost(0);
14772   format %{ "cache writeback presync" %}
14773   ins_encode %{
14774     __ cache_wbsync(true);
14775   %}
14776   ins_pipe(pipe_class_default);
14777 %}
14778 
14779 instruct cacheWBPostSync()
14780 %{
14781   match(CacheWBPostSync);
14782 
14783   ins_cost(100);
14784   format %{ "cache writeback postsync" %}
14785   ins_encode %{
14786     __ cache_wbsync(false);
14787   %}
14788   ins_pipe(pipe_class_default);
14789 %}
14790 
14791 //----------PEEPHOLE RULES-----------------------------------------------------
14792 // These must follow all instruction definitions as they use the names
14793 // defined in the instructions definitions.
14794 //
14795 // peepmatch ( root_instr_name [preceeding_instruction]* );
14796 //
14797 // peepconstraint %{
14798 // (instruction_number.operand_name relational_op instruction_number.operand_name
14799 //  [, ...] );
14800 // // instruction numbers are zero-based using left to right order in peepmatch
14801 //
14802 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
14803 // // provide an instruction_number.operand_name for each operand that appears
14804 // // in the replacement instruction's match rule
14805 //
14806 // ---------VM FLAGS---------------------------------------------------------
14807 //
14808 // All peephole optimizations can be turned off using -XX:-OptoPeephole
14809 //
14810 // Each peephole rule is given an identifying number starting with zero and
14811 // increasing by one in the order seen by the parser. An individual peephole
14812 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
14813 // on the command-line.
14814 //
14815 // ---------CURRENT LIMITATIONS----------------------------------------------
14816 //
14817 // Only match adjacent instructions in same basic block
14818 // Only equality constraints
14819 // Only constraints between operands, not (0.dest_reg == EAX_enc)
14820 // Only one replacement instruction
14821 //
14822 // ---------EXAMPLE----------------------------------------------------------
14823 //
14824 // // pertinent parts of existing instructions in architecture description
14825 // instruct movI(eRegI dst, eRegI src) %{
14826 //   match(Set dst (CopyI src));
14827 // %}
14828 //
14829 // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
14830 //   match(Set dst (AddI dst src));
14831 //   effect(KILL cr);
14832 // %}
14833 //
14834 // // Change (inc mov) to lea
14835 // peephole %{
14836 //   // increment preceded by register-register move
14837 //   peepmatch ( incI_eReg movI );
14838 //   // require that the destination register of the increment
14839 //   // match the destination register of the move
14840 //   peepconstraint ( 0.dst == 1.dst );
14841 //   // construct a replacement instruction that sets
14842 //   // the destination to ( move's source register + one )
14843 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14844 // %}
14845 //
14846 // Implementation no longer uses movX instructions since
14847 // machine-independent system no longer uses CopyX nodes.
14848 //
14849 // peephole %{
14850 //   peepmatch ( incI_eReg movI );
14851 //   peepconstraint ( 0.dst == 1.dst );
14852 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14853 // %}
14854 //
14855 // peephole %{
14856 //   peepmatch ( decI_eReg movI );
14857 //   peepconstraint ( 0.dst == 1.dst );
14858 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14859 // %}
14860 //
14861 // peephole %{
14862 //   peepmatch ( addI_eReg_imm movI );
14863 //   peepconstraint ( 0.dst == 1.dst );
14864 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14865 // %}
14866 //
14867 // peephole %{
14868 //   peepmatch ( addP_eReg_imm movP );
14869 //   peepconstraint ( 0.dst == 1.dst );
14870 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
14871 // %}
14872 
14873 // // Change load of spilled value to only a spill
14874 // instruct storeI(memory mem, eRegI src) %{
14875 //   match(Set mem (StoreI mem src));
14876 // %}
14877 //
14878 // instruct loadI(eRegI dst, memory mem) %{
14879 //   match(Set dst (LoadI mem));
14880 // %}
14881 //
14882 peephole %{
14883   peepmatch ( loadI storeI );
14884   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14885   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14886 %}
14887 
14888 peephole %{
14889   peepmatch ( loadL storeL );
14890   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14891   peepreplace ( storeL( 1.mem 1.mem 1.src ) );
14892 %}
14893 
14894 peephole %{
14895   peepmatch ( loadP storeP );
14896   peepconstraint ( 1.src == 0.dst, 1.dst == 0.mem );
14897   peepreplace ( storeP( 1.dst 1.dst 1.src ) );
14898 %}
14899 
14900 //----------SMARTSPILL RULES---------------------------------------------------
14901 // These must follow all instruction definitions as they use the names
14902 // defined in the instructions definitions.