1 //
    2 // Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved.
    3 // Copyright (c) 2012, 2021 SAP SE. All rights reserved.
    4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    5 //
    6 // This code is free software; you can redistribute it and/or modify it
    7 // under the terms of the GNU General Public License version 2 only, as
    8 // published by the Free Software Foundation.
    9 //
   10 // This code is distributed in the hope that it will be useful, but WITHOUT
   11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   13 // version 2 for more details (a copy is included in the LICENSE file that
   14 // accompanied this code).
   15 //
   16 // You should have received a copy of the GNU General Public License version
   17 // 2 along with this work; if not, write to the Free Software Foundation,
   18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   19 //
   20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   21 // or visit www.oracle.com if you need additional information or have any
   22 // questions.
   23 //
   24 //
   25 
   26 //
   27 // PPC64 Architecture Description File
   28 //
   29 
   30 //----------REGISTER DEFINITION BLOCK------------------------------------------
   31 // This information is used by the matcher and the register allocator to
   32 // describe individual registers and classes of registers within the target
   33 // architecture.
   34 register %{
   35 //----------Architecture Description Register Definitions----------------------
   36 // General Registers
   37 // "reg_def"  name (register save type, C convention save type,
   38 //                  ideal register type, encoding);
   39 //
   40 // Register Save Types:
   41 //
   42 //   NS  = No-Save:     The register allocator assumes that these registers
   43 //                      can be used without saving upon entry to the method, &
   44 //                      that they do not need to be saved at call sites.
   45 //
   46 //   SOC = Save-On-Call: The register allocator assumes that these registers
   47 //                      can be used without saving upon entry to the method,
   48 //                      but that they must be saved at call sites.
   49 //                      These are called "volatiles" on ppc.
   50 //
   51 //   SOE = Save-On-Entry: The register allocator assumes that these registers
   52 //                      must be saved before using them upon entry to the
   53 //                      method, but they do not need to be saved at call
   54 //                      sites.
   55 //                      These are called "nonvolatiles" on ppc.
   56 //
   57 //   AS  = Always-Save:   The register allocator assumes that these registers
   58 //                      must be saved before using them upon entry to the
   59 //                      method, & that they must be saved at call sites.
   60 //
   61 // Ideal Register Type is used to determine how to save & restore a
   62 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   63 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
   64 //
   65 // The encoding number is the actual bit-pattern placed into the opcodes.
   66 //
   67 // PPC64 register definitions, based on the 64-bit PowerPC ELF ABI
   68 // Supplement Version 1.7 as of 2003-10-29.
   69 //
   70 // For each 64-bit register we must define two registers: the register
   71 // itself, e.g. R3, and a corresponding virtual other (32-bit-)'half',
   72 // e.g. R3_H, which is needed by the allocator, but is not used
   73 // for stores, loads, etc.
   74 
   75 // ----------------------------
   76 // Integer/Long Registers
   77 // ----------------------------
   78 
   79   // PPC64 has 32 64-bit integer registers.
   80 
   81   // types: v = volatile, nv = non-volatile, s = system
   82   reg_def R0   ( SOC, SOC, Op_RegI,  0, R0->as_VMReg()         );  // v   used in prologs
   83   reg_def R0_H ( SOC, SOC, Op_RegI, 99, R0->as_VMReg()->next() );
   84   reg_def R1   ( NS,  NS,  Op_RegI,  1, R1->as_VMReg()         );  // s   SP
   85   reg_def R1_H ( NS,  NS,  Op_RegI, 99, R1->as_VMReg()->next() );
   86   reg_def R2   ( SOC, SOC, Op_RegI,  2, R2->as_VMReg()         );  // v   TOC
   87   reg_def R2_H ( SOC, SOC, Op_RegI, 99, R2->as_VMReg()->next() );
   88   reg_def R3   ( SOC, SOC, Op_RegI,  3, R3->as_VMReg()         );  // v   iarg1 & iret
   89   reg_def R3_H ( SOC, SOC, Op_RegI, 99, R3->as_VMReg()->next() );
   90   reg_def R4   ( SOC, SOC, Op_RegI,  4, R4->as_VMReg()         );  //     iarg2
   91   reg_def R4_H ( SOC, SOC, Op_RegI, 99, R4->as_VMReg()->next() );
   92   reg_def R5   ( SOC, SOC, Op_RegI,  5, R5->as_VMReg()         );  // v   iarg3
   93   reg_def R5_H ( SOC, SOC, Op_RegI, 99, R5->as_VMReg()->next() );
   94   reg_def R6   ( SOC, SOC, Op_RegI,  6, R6->as_VMReg()         );  // v   iarg4
   95   reg_def R6_H ( SOC, SOC, Op_RegI, 99, R6->as_VMReg()->next() );
   96   reg_def R7   ( SOC, SOC, Op_RegI,  7, R7->as_VMReg()         );  // v   iarg5
   97   reg_def R7_H ( SOC, SOC, Op_RegI, 99, R7->as_VMReg()->next() );
   98   reg_def R8   ( SOC, SOC, Op_RegI,  8, R8->as_VMReg()         );  // v   iarg6
   99   reg_def R8_H ( SOC, SOC, Op_RegI, 99, R8->as_VMReg()->next() );
  100   reg_def R9   ( SOC, SOC, Op_RegI,  9, R9->as_VMReg()         );  // v   iarg7
  101   reg_def R9_H ( SOC, SOC, Op_RegI, 99, R9->as_VMReg()->next() );
  102   reg_def R10  ( SOC, SOC, Op_RegI, 10, R10->as_VMReg()        );  // v   iarg8
  103   reg_def R10_H( SOC, SOC, Op_RegI, 99, R10->as_VMReg()->next());
  104   reg_def R11  ( SOC, SOC, Op_RegI, 11, R11->as_VMReg()        );  // v   ENV / scratch
  105   reg_def R11_H( SOC, SOC, Op_RegI, 99, R11->as_VMReg()->next());
  106   reg_def R12  ( SOC, SOC, Op_RegI, 12, R12->as_VMReg()        );  // v   scratch
  107   reg_def R12_H( SOC, SOC, Op_RegI, 99, R12->as_VMReg()->next());
  108   reg_def R13  ( NS,  NS,  Op_RegI, 13, R13->as_VMReg()        );  // s   system thread id
  109   reg_def R13_H( NS,  NS,  Op_RegI, 99, R13->as_VMReg()->next());
  110   reg_def R14  ( SOC, SOE, Op_RegI, 14, R14->as_VMReg()        );  // nv
  111   reg_def R14_H( SOC, SOE, Op_RegI, 99, R14->as_VMReg()->next());
  112   reg_def R15  ( SOC, SOE, Op_RegI, 15, R15->as_VMReg()        );  // nv
  113   reg_def R15_H( SOC, SOE, Op_RegI, 99, R15->as_VMReg()->next());
  114   reg_def R16  ( SOC, SOE, Op_RegI, 16, R16->as_VMReg()        );  // nv
  115   reg_def R16_H( SOC, SOE, Op_RegI, 99, R16->as_VMReg()->next());
  116   reg_def R17  ( SOC, SOE, Op_RegI, 17, R17->as_VMReg()        );  // nv
  117   reg_def R17_H( SOC, SOE, Op_RegI, 99, R17->as_VMReg()->next());
  118   reg_def R18  ( SOC, SOE, Op_RegI, 18, R18->as_VMReg()        );  // nv
  119   reg_def R18_H( SOC, SOE, Op_RegI, 99, R18->as_VMReg()->next());
  120   reg_def R19  ( SOC, SOE, Op_RegI, 19, R19->as_VMReg()        );  // nv
  121   reg_def R19_H( SOC, SOE, Op_RegI, 99, R19->as_VMReg()->next());
  122   reg_def R20  ( SOC, SOE, Op_RegI, 20, R20->as_VMReg()        );  // nv
  123   reg_def R20_H( SOC, SOE, Op_RegI, 99, R20->as_VMReg()->next());
  124   reg_def R21  ( SOC, SOE, Op_RegI, 21, R21->as_VMReg()        );  // nv
  125   reg_def R21_H( SOC, SOE, Op_RegI, 99, R21->as_VMReg()->next());
  126   reg_def R22  ( SOC, SOE, Op_RegI, 22, R22->as_VMReg()        );  // nv
  127   reg_def R22_H( SOC, SOE, Op_RegI, 99, R22->as_VMReg()->next());
  128   reg_def R23  ( SOC, SOE, Op_RegI, 23, R23->as_VMReg()        );  // nv
  129   reg_def R23_H( SOC, SOE, Op_RegI, 99, R23->as_VMReg()->next());
  130   reg_def R24  ( SOC, SOE, Op_RegI, 24, R24->as_VMReg()        );  // nv
  131   reg_def R24_H( SOC, SOE, Op_RegI, 99, R24->as_VMReg()->next());
  132   reg_def R25  ( SOC, SOE, Op_RegI, 25, R25->as_VMReg()        );  // nv
  133   reg_def R25_H( SOC, SOE, Op_RegI, 99, R25->as_VMReg()->next());
  134   reg_def R26  ( SOC, SOE, Op_RegI, 26, R26->as_VMReg()        );  // nv
  135   reg_def R26_H( SOC, SOE, Op_RegI, 99, R26->as_VMReg()->next());
  136   reg_def R27  ( SOC, SOE, Op_RegI, 27, R27->as_VMReg()        );  // nv
  137   reg_def R27_H( SOC, SOE, Op_RegI, 99, R27->as_VMReg()->next());
  138   reg_def R28  ( SOC, SOE, Op_RegI, 28, R28->as_VMReg()        );  // nv
  139   reg_def R28_H( SOC, SOE, Op_RegI, 99, R28->as_VMReg()->next());
  140   reg_def R29  ( SOC, SOE, Op_RegI, 29, R29->as_VMReg()        );  // nv
  141   reg_def R29_H( SOC, SOE, Op_RegI, 99, R29->as_VMReg()->next());
  142   reg_def R30  ( SOC, SOE, Op_RegI, 30, R30->as_VMReg()        );  // nv
  143   reg_def R30_H( SOC, SOE, Op_RegI, 99, R30->as_VMReg()->next());
  144   reg_def R31  ( SOC, SOE, Op_RegI, 31, R31->as_VMReg()        );  // nv
  145   reg_def R31_H( SOC, SOE, Op_RegI, 99, R31->as_VMReg()->next());
  146 
  147 
  148 // ----------------------------
  149 // Float/Double Registers
  150 // ----------------------------
  151 
  152   // Double Registers
  153   // The rules of ADL require that double registers be defined in pairs.
  154   // Each pair must be two 32-bit values, but not necessarily a pair of
  155   // single float registers. In each pair, ADLC-assigned register numbers
  156   // must be adjacent, with the lower number even. Finally, when the
  157   // CPU stores such a register pair to memory, the word associated with
  158   // the lower ADLC-assigned number must be stored to the lower address.
  159 
  160   // PPC64 has 32 64-bit floating-point registers. Each can store a single
  161   // or double precision floating-point value.
  162 
  163   // types: v = volatile, nv = non-volatile, s = system
  164   reg_def F0   ( SOC, SOC, Op_RegF,  0, F0->as_VMReg()         );  // v   scratch
  165   reg_def F0_H ( SOC, SOC, Op_RegF, 99, F0->as_VMReg()->next() );
  166   reg_def F1   ( SOC, SOC, Op_RegF,  1, F1->as_VMReg()         );  // v   farg1 & fret
  167   reg_def F1_H ( SOC, SOC, Op_RegF, 99, F1->as_VMReg()->next() );
  168   reg_def F2   ( SOC, SOC, Op_RegF,  2, F2->as_VMReg()         );  // v   farg2
  169   reg_def F2_H ( SOC, SOC, Op_RegF, 99, F2->as_VMReg()->next() );
  170   reg_def F3   ( SOC, SOC, Op_RegF,  3, F3->as_VMReg()         );  // v   farg3
  171   reg_def F3_H ( SOC, SOC, Op_RegF, 99, F3->as_VMReg()->next() );
  172   reg_def F4   ( SOC, SOC, Op_RegF,  4, F4->as_VMReg()         );  // v   farg4
  173   reg_def F4_H ( SOC, SOC, Op_RegF, 99, F4->as_VMReg()->next() );
  174   reg_def F5   ( SOC, SOC, Op_RegF,  5, F5->as_VMReg()         );  // v   farg5
  175   reg_def F5_H ( SOC, SOC, Op_RegF, 99, F5->as_VMReg()->next() );
  176   reg_def F6   ( SOC, SOC, Op_RegF,  6, F6->as_VMReg()         );  // v   farg6
  177   reg_def F6_H ( SOC, SOC, Op_RegF, 99, F6->as_VMReg()->next() );
  178   reg_def F7   ( SOC, SOC, Op_RegF,  7, F7->as_VMReg()         );  // v   farg7
  179   reg_def F7_H ( SOC, SOC, Op_RegF, 99, F7->as_VMReg()->next() );
  180   reg_def F8   ( SOC, SOC, Op_RegF,  8, F8->as_VMReg()         );  // v   farg8
  181   reg_def F8_H ( SOC, SOC, Op_RegF, 99, F8->as_VMReg()->next() );
  182   reg_def F9   ( SOC, SOC, Op_RegF,  9, F9->as_VMReg()         );  // v   farg9
  183   reg_def F9_H ( SOC, SOC, Op_RegF, 99, F9->as_VMReg()->next() );
  184   reg_def F10  ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()        );  // v   farg10
  185   reg_def F10_H( SOC, SOC, Op_RegF, 99, F10->as_VMReg()->next());
  186   reg_def F11  ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()        );  // v   farg11
  187   reg_def F11_H( SOC, SOC, Op_RegF, 99, F11->as_VMReg()->next());
  188   reg_def F12  ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()        );  // v   farg12
  189   reg_def F12_H( SOC, SOC, Op_RegF, 99, F12->as_VMReg()->next());
  190   reg_def F13  ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()        );  // v   farg13
  191   reg_def F13_H( SOC, SOC, Op_RegF, 99, F13->as_VMReg()->next());
  192   reg_def F14  ( SOC, SOE, Op_RegF, 14, F14->as_VMReg()        );  // nv
  193   reg_def F14_H( SOC, SOE, Op_RegF, 99, F14->as_VMReg()->next());
  194   reg_def F15  ( SOC, SOE, Op_RegF, 15, F15->as_VMReg()        );  // nv
  195   reg_def F15_H( SOC, SOE, Op_RegF, 99, F15->as_VMReg()->next());
  196   reg_def F16  ( SOC, SOE, Op_RegF, 16, F16->as_VMReg()        );  // nv
  197   reg_def F16_H( SOC, SOE, Op_RegF, 99, F16->as_VMReg()->next());
  198   reg_def F17  ( SOC, SOE, Op_RegF, 17, F17->as_VMReg()        );  // nv
  199   reg_def F17_H( SOC, SOE, Op_RegF, 99, F17->as_VMReg()->next());
  200   reg_def F18  ( SOC, SOE, Op_RegF, 18, F18->as_VMReg()        );  // nv
  201   reg_def F18_H( SOC, SOE, Op_RegF, 99, F18->as_VMReg()->next());
  202   reg_def F19  ( SOC, SOE, Op_RegF, 19, F19->as_VMReg()        );  // nv
  203   reg_def F19_H( SOC, SOE, Op_RegF, 99, F19->as_VMReg()->next());
  204   reg_def F20  ( SOC, SOE, Op_RegF, 20, F20->as_VMReg()        );  // nv
  205   reg_def F20_H( SOC, SOE, Op_RegF, 99, F20->as_VMReg()->next());
  206   reg_def F21  ( SOC, SOE, Op_RegF, 21, F21->as_VMReg()        );  // nv
  207   reg_def F21_H( SOC, SOE, Op_RegF, 99, F21->as_VMReg()->next());
  208   reg_def F22  ( SOC, SOE, Op_RegF, 22, F22->as_VMReg()        );  // nv
  209   reg_def F22_H( SOC, SOE, Op_RegF, 99, F22->as_VMReg()->next());
  210   reg_def F23  ( SOC, SOE, Op_RegF, 23, F23->as_VMReg()        );  // nv
  211   reg_def F23_H( SOC, SOE, Op_RegF, 99, F23->as_VMReg()->next());
  212   reg_def F24  ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()        );  // nv
  213   reg_def F24_H( SOC, SOE, Op_RegF, 99, F24->as_VMReg()->next());
  214   reg_def F25  ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()        );  // nv
  215   reg_def F25_H( SOC, SOE, Op_RegF, 99, F25->as_VMReg()->next());
  216   reg_def F26  ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()        );  // nv
  217   reg_def F26_H( SOC, SOE, Op_RegF, 99, F26->as_VMReg()->next());
  218   reg_def F27  ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()        );  // nv
  219   reg_def F27_H( SOC, SOE, Op_RegF, 99, F27->as_VMReg()->next());
  220   reg_def F28  ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()        );  // nv
  221   reg_def F28_H( SOC, SOE, Op_RegF, 99, F28->as_VMReg()->next());
  222   reg_def F29  ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()        );  // nv
  223   reg_def F29_H( SOC, SOE, Op_RegF, 99, F29->as_VMReg()->next());
  224   reg_def F30  ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()        );  // nv
  225   reg_def F30_H( SOC, SOE, Op_RegF, 99, F30->as_VMReg()->next());
  226   reg_def F31  ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()        );  // nv
  227   reg_def F31_H( SOC, SOE, Op_RegF, 99, F31->as_VMReg()->next());
  228 
  229 // ----------------------------
  230 // Special Registers
  231 // ----------------------------
  232 
  233 // Condition Codes Flag Registers
  234 
  235   // PPC64 has 8 condition code "registers" which are all contained
  236   // in the CR register.
  237 
  238   // types: v = volatile, nv = non-volatile, s = system
  239   reg_def CCR0(SOC, SOC, Op_RegFlags, 0, CCR0->as_VMReg());  // v
  240   reg_def CCR1(SOC, SOC, Op_RegFlags, 1, CCR1->as_VMReg());  // v
  241   reg_def CCR2(SOC, SOC, Op_RegFlags, 2, CCR2->as_VMReg());  // nv
  242   reg_def CCR3(SOC, SOC, Op_RegFlags, 3, CCR3->as_VMReg());  // nv
  243   reg_def CCR4(SOC, SOC, Op_RegFlags, 4, CCR4->as_VMReg());  // nv
  244   reg_def CCR5(SOC, SOC, Op_RegFlags, 5, CCR5->as_VMReg());  // v
  245   reg_def CCR6(SOC, SOC, Op_RegFlags, 6, CCR6->as_VMReg());  // v
  246   reg_def CCR7(SOC, SOC, Op_RegFlags, 7, CCR7->as_VMReg());  // v
  247 
  248   // Special registers of PPC64
  249 
  250   reg_def SR_XER(    SOC, SOC, Op_RegP, 0, SR_XER->as_VMReg());     // v
  251   reg_def SR_LR(     SOC, SOC, Op_RegP, 1, SR_LR->as_VMReg());      // v
  252   reg_def SR_CTR(    SOC, SOC, Op_RegP, 2, SR_CTR->as_VMReg());     // v
  253   reg_def SR_VRSAVE( SOC, SOC, Op_RegP, 3, SR_VRSAVE->as_VMReg());  // v
  254   reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v
  255   reg_def SR_PPR(    SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg());     // v
  256 
  257 // ----------------------------
  258 // Vector-Scalar Registers
  259 // ----------------------------
  260   reg_def VSR0 ( SOC, SOC, Op_VecX, 0, NULL);
  261   reg_def VSR1 ( SOC, SOC, Op_VecX, 1, NULL);
  262   reg_def VSR2 ( SOC, SOC, Op_VecX, 2, NULL);
  263   reg_def VSR3 ( SOC, SOC, Op_VecX, 3, NULL);
  264   reg_def VSR4 ( SOC, SOC, Op_VecX, 4, NULL);
  265   reg_def VSR5 ( SOC, SOC, Op_VecX, 5, NULL);
  266   reg_def VSR6 ( SOC, SOC, Op_VecX, 6, NULL);
  267   reg_def VSR7 ( SOC, SOC, Op_VecX, 7, NULL);
  268   reg_def VSR8 ( SOC, SOC, Op_VecX, 8, NULL);
  269   reg_def VSR9 ( SOC, SOC, Op_VecX, 9, NULL);
  270   reg_def VSR10 ( SOC, SOC, Op_VecX, 10, NULL);
  271   reg_def VSR11 ( SOC, SOC, Op_VecX, 11, NULL);
  272   reg_def VSR12 ( SOC, SOC, Op_VecX, 12, NULL);
  273   reg_def VSR13 ( SOC, SOC, Op_VecX, 13, NULL);
  274   reg_def VSR14 ( SOC, SOC, Op_VecX, 14, NULL);
  275   reg_def VSR15 ( SOC, SOC, Op_VecX, 15, NULL);
  276   reg_def VSR16 ( SOC, SOC, Op_VecX, 16, NULL);
  277   reg_def VSR17 ( SOC, SOC, Op_VecX, 17, NULL);
  278   reg_def VSR18 ( SOC, SOC, Op_VecX, 18, NULL);
  279   reg_def VSR19 ( SOC, SOC, Op_VecX, 19, NULL);
  280   reg_def VSR20 ( SOC, SOC, Op_VecX, 20, NULL);
  281   reg_def VSR21 ( SOC, SOC, Op_VecX, 21, NULL);
  282   reg_def VSR22 ( SOC, SOC, Op_VecX, 22, NULL);
  283   reg_def VSR23 ( SOC, SOC, Op_VecX, 23, NULL);
  284   reg_def VSR24 ( SOC, SOC, Op_VecX, 24, NULL);
  285   reg_def VSR25 ( SOC, SOC, Op_VecX, 25, NULL);
  286   reg_def VSR26 ( SOC, SOC, Op_VecX, 26, NULL);
  287   reg_def VSR27 ( SOC, SOC, Op_VecX, 27, NULL);
  288   reg_def VSR28 ( SOC, SOC, Op_VecX, 28, NULL);
  289   reg_def VSR29 ( SOC, SOC, Op_VecX, 29, NULL);
  290   reg_def VSR30 ( SOC, SOC, Op_VecX, 30, NULL);
  291   reg_def VSR31 ( SOC, SOC, Op_VecX, 31, NULL);
  292   reg_def VSR32 ( SOC, SOC, Op_VecX, 32, NULL);
  293   reg_def VSR33 ( SOC, SOC, Op_VecX, 33, NULL);
  294   reg_def VSR34 ( SOC, SOC, Op_VecX, 34, NULL);
  295   reg_def VSR35 ( SOC, SOC, Op_VecX, 35, NULL);
  296   reg_def VSR36 ( SOC, SOC, Op_VecX, 36, NULL);
  297   reg_def VSR37 ( SOC, SOC, Op_VecX, 37, NULL);
  298   reg_def VSR38 ( SOC, SOC, Op_VecX, 38, NULL);
  299   reg_def VSR39 ( SOC, SOC, Op_VecX, 39, NULL);
  300   reg_def VSR40 ( SOC, SOC, Op_VecX, 40, NULL);
  301   reg_def VSR41 ( SOC, SOC, Op_VecX, 41, NULL);
  302   reg_def VSR42 ( SOC, SOC, Op_VecX, 42, NULL);
  303   reg_def VSR43 ( SOC, SOC, Op_VecX, 43, NULL);
  304   reg_def VSR44 ( SOC, SOC, Op_VecX, 44, NULL);
  305   reg_def VSR45 ( SOC, SOC, Op_VecX, 45, NULL);
  306   reg_def VSR46 ( SOC, SOC, Op_VecX, 46, NULL);
  307   reg_def VSR47 ( SOC, SOC, Op_VecX, 47, NULL);
  308   reg_def VSR48 ( SOC, SOC, Op_VecX, 48, NULL);
  309   reg_def VSR49 ( SOC, SOC, Op_VecX, 49, NULL);
  310   reg_def VSR50 ( SOC, SOC, Op_VecX, 50, NULL);
  311   reg_def VSR51 ( SOC, SOC, Op_VecX, 51, NULL);
  312   reg_def VSR52 ( SOC, SOC, Op_VecX, 52, NULL);
  313   reg_def VSR53 ( SOC, SOC, Op_VecX, 53, NULL);
  314   reg_def VSR54 ( SOC, SOC, Op_VecX, 54, NULL);
  315   reg_def VSR55 ( SOC, SOC, Op_VecX, 55, NULL);
  316   reg_def VSR56 ( SOC, SOC, Op_VecX, 56, NULL);
  317   reg_def VSR57 ( SOC, SOC, Op_VecX, 57, NULL);
  318   reg_def VSR58 ( SOC, SOC, Op_VecX, 58, NULL);
  319   reg_def VSR59 ( SOC, SOC, Op_VecX, 59, NULL);
  320   reg_def VSR60 ( SOC, SOC, Op_VecX, 60, NULL);
  321   reg_def VSR61 ( SOC, SOC, Op_VecX, 61, NULL);
  322   reg_def VSR62 ( SOC, SOC, Op_VecX, 62, NULL);
  323   reg_def VSR63 ( SOC, SOC, Op_VecX, 63, NULL);
  324 
  325 // ----------------------------
  326 // Specify priority of register selection within phases of register
  327 // allocation. Highest priority is first. A useful heuristic is to
  328 // give registers a low priority when they are required by machine
  329 // instructions, like EAX and EDX on I486, and choose no-save registers
  330 // before save-on-call, & save-on-call before save-on-entry. Registers
  331 // which participate in fixed calling sequences should come last.
  332 // Registers which are used as pairs must fall on an even boundary.
  333 
  334 // It's worth about 1% on SPEC geomean to get this right.
  335 
  336 // Chunk0, chunk1, and chunk2 form the MachRegisterNumbers enumeration
  337 // in adGlobals_ppc.hpp which defines the <register>_num values, e.g.
  338 // R3_num. Therefore, R3_num may not be (and in reality is not)
  339 // the same as R3->encoding()! Furthermore, we cannot make any
  340 // assumptions on ordering, e.g. R3_num may be less than R2_num.
  341 // Additionally, the function
  342 //   static enum RC rc_class(OptoReg::Name reg )
  343 // maps a given <register>_num value to its chunk type (except for flags)
  344 // and its current implementation relies on chunk0 and chunk1 having a
  345 // size of 64 each.
  346 
  347 // If you change this allocation class, please have a look at the
  348 // default values for the parameters RoundRobinIntegerRegIntervalStart
  349 // and RoundRobinFloatRegIntervalStart
  350 
  351 alloc_class chunk0 (
  352   // Chunk0 contains *all* 64 integer registers halves.
  353 
  354   // "non-volatile" registers
  355   R14, R14_H,
  356   R15, R15_H,
  357   R17, R17_H,
  358   R18, R18_H,
  359   R19, R19_H,
  360   R20, R20_H,
  361   R21, R21_H,
  362   R22, R22_H,
  363   R23, R23_H,
  364   R24, R24_H,
  365   R25, R25_H,
  366   R26, R26_H,
  367   R27, R27_H,
  368   R28, R28_H,
  369   R29, R29_H,
  370   R30, R30_H,
  371   R31, R31_H,
  372 
  373   // scratch/special registers
  374   R11, R11_H,
  375   R12, R12_H,
  376 
  377   // argument registers
  378   R10, R10_H,
  379   R9,  R9_H,
  380   R8,  R8_H,
  381   R7,  R7_H,
  382   R6,  R6_H,
  383   R5,  R5_H,
  384   R4,  R4_H,
  385   R3,  R3_H,
  386 
  387   // special registers, not available for allocation
  388   R16, R16_H,     // R16_thread
  389   R13, R13_H,     // system thread id
  390   R2,  R2_H,      // may be used for TOC
  391   R1,  R1_H,      // SP
  392   R0,  R0_H       // R0 (scratch)
  393 );
  394 
  395 // If you change this allocation class, please have a look at the
  396 // default values for the parameters RoundRobinIntegerRegIntervalStart
  397 // and RoundRobinFloatRegIntervalStart
  398 
  399 alloc_class chunk1 (
  400   // Chunk1 contains *all* 64 floating-point registers halves.
  401 
  402   // scratch register
  403   F0,  F0_H,
  404 
  405   // argument registers
  406   F13, F13_H,
  407   F12, F12_H,
  408   F11, F11_H,
  409   F10, F10_H,
  410   F9,  F9_H,
  411   F8,  F8_H,
  412   F7,  F7_H,
  413   F6,  F6_H,
  414   F5,  F5_H,
  415   F4,  F4_H,
  416   F3,  F3_H,
  417   F2,  F2_H,
  418   F1,  F1_H,
  419 
  420   // non-volatile registers
  421   F14, F14_H,
  422   F15, F15_H,
  423   F16, F16_H,
  424   F17, F17_H,
  425   F18, F18_H,
  426   F19, F19_H,
  427   F20, F20_H,
  428   F21, F21_H,
  429   F22, F22_H,
  430   F23, F23_H,
  431   F24, F24_H,
  432   F25, F25_H,
  433   F26, F26_H,
  434   F27, F27_H,
  435   F28, F28_H,
  436   F29, F29_H,
  437   F30, F30_H,
  438   F31, F31_H
  439 );
  440 
  441 alloc_class chunk2 (
  442   // Chunk2 contains *all* 8 condition code registers.
  443 
  444   CCR0,
  445   CCR1,
  446   CCR2,
  447   CCR3,
  448   CCR4,
  449   CCR5,
  450   CCR6,
  451   CCR7
  452 );
  453 
  454 alloc_class chunk3 (
  455   VSR0,
  456   VSR1,
  457   VSR2,
  458   VSR3,
  459   VSR4,
  460   VSR5,
  461   VSR6,
  462   VSR7,
  463   VSR8,
  464   VSR9,
  465   VSR10,
  466   VSR11,
  467   VSR12,
  468   VSR13,
  469   VSR14,
  470   VSR15,
  471   VSR16,
  472   VSR17,
  473   VSR18,
  474   VSR19,
  475   VSR20,
  476   VSR21,
  477   VSR22,
  478   VSR23,
  479   VSR24,
  480   VSR25,
  481   VSR26,
  482   VSR27,
  483   VSR28,
  484   VSR29,
  485   VSR30,
  486   VSR31,
  487   VSR32,
  488   VSR33,
  489   VSR34,
  490   VSR35,
  491   VSR36,
  492   VSR37,
  493   VSR38,
  494   VSR39,
  495   VSR40,
  496   VSR41,
  497   VSR42,
  498   VSR43,
  499   VSR44,
  500   VSR45,
  501   VSR46,
  502   VSR47,
  503   VSR48,
  504   VSR49,
  505   VSR50,
  506   VSR51,
  507   VSR52,
  508   VSR53,
  509   VSR54,
  510   VSR55,
  511   VSR56,
  512   VSR57,
  513   VSR58,
  514   VSR59,
  515   VSR60,
  516   VSR61,
  517   VSR62,
  518   VSR63
  519 );
  520 
  521 alloc_class chunk4 (
  522   // special registers
  523   // These registers are not allocated, but used for nodes generated by postalloc expand.
  524   SR_XER,
  525   SR_LR,
  526   SR_CTR,
  527   SR_VRSAVE,
  528   SR_SPEFSCR,
  529   SR_PPR
  530 );
  531 
  532 //-------Architecture Description Register Classes-----------------------
  533 
  534 // Several register classes are automatically defined based upon
  535 // information in this architecture description.
  536 
  537 // 1) reg_class inline_cache_reg           ( as defined in frame section )
  538 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  539 //
  540 
  541 // ----------------------------
  542 // 32 Bit Register Classes
  543 // ----------------------------
  544 
  545 // We specify registers twice, once as read/write, and once read-only.
  546 // We use the read-only registers for source operands. With this, we
  547 // can include preset read only registers in this class, as a hard-coded
  548 // '0'-register. (We used to simulate this on ppc.)
  549 
  550 // 32 bit registers that can be read and written i.e. these registers
  551 // can be dest (or src) of normal instructions.
  552 reg_class bits32_reg_rw(
  553 /*R0*/              // R0
  554 /*R1*/              // SP
  555   R2,               // TOC
  556   R3,
  557   R4,
  558   R5,
  559   R6,
  560   R7,
  561   R8,
  562   R9,
  563   R10,
  564   R11,
  565   R12,
  566 /*R13*/             // system thread id
  567   R14,
  568   R15,
  569 /*R16*/             // R16_thread
  570   R17,
  571   R18,
  572   R19,
  573   R20,
  574   R21,
  575   R22,
  576   R23,
  577   R24,
  578   R25,
  579   R26,
  580   R27,
  581   R28,
  582 /*R29,*/             // global TOC
  583   R30,
  584   R31
  585 );
  586 
  587 // 32 bit registers that can only be read i.e. these registers can
  588 // only be src of all instructions.
  589 reg_class bits32_reg_ro(
  590 /*R0*/              // R0
  591 /*R1*/              // SP
  592   R2                // TOC
  593   R3,
  594   R4,
  595   R5,
  596   R6,
  597   R7,
  598   R8,
  599   R9,
  600   R10,
  601   R11,
  602   R12,
  603 /*R13*/             // system thread id
  604   R14,
  605   R15,
  606 /*R16*/             // R16_thread
  607   R17,
  608   R18,
  609   R19,
  610   R20,
  611   R21,
  612   R22,
  613   R23,
  614   R24,
  615   R25,
  616   R26,
  617   R27,
  618   R28,
  619 /*R29,*/
  620   R30,
  621   R31
  622 );
  623 
  624 reg_class rscratch1_bits32_reg(R11);
  625 reg_class rscratch2_bits32_reg(R12);
  626 reg_class rarg1_bits32_reg(R3);
  627 reg_class rarg2_bits32_reg(R4);
  628 reg_class rarg3_bits32_reg(R5);
  629 reg_class rarg4_bits32_reg(R6);
  630 
  631 // ----------------------------
  632 // 64 Bit Register Classes
  633 // ----------------------------
  634 // 64-bit build means 64-bit pointers means hi/lo pairs
  635 
  636 reg_class rscratch1_bits64_reg(R11_H, R11);
  637 reg_class rscratch2_bits64_reg(R12_H, R12);
  638 reg_class rarg1_bits64_reg(R3_H, R3);
  639 reg_class rarg2_bits64_reg(R4_H, R4);
  640 reg_class rarg3_bits64_reg(R5_H, R5);
  641 reg_class rarg4_bits64_reg(R6_H, R6);
  642 // Thread register, 'written' by tlsLoadP, see there.
  643 reg_class thread_bits64_reg(R16_H, R16);
  644 
  645 reg_class r19_bits64_reg(R19_H, R19);
  646 
  647 // 64 bit registers that can be read and written i.e. these registers
  648 // can be dest (or src) of normal instructions.
  649 reg_class bits64_reg_rw(
  650 /*R0_H,  R0*/     // R0
  651 /*R1_H,  R1*/     // SP
  652   R2_H,  R2,      // TOC
  653   R3_H,  R3,
  654   R4_H,  R4,
  655   R5_H,  R5,
  656   R6_H,  R6,
  657   R7_H,  R7,
  658   R8_H,  R8,
  659   R9_H,  R9,
  660   R10_H, R10,
  661   R11_H, R11,
  662   R12_H, R12,
  663 /*R13_H, R13*/   // system thread id
  664   R14_H, R14,
  665   R15_H, R15,
  666 /*R16_H, R16*/   // R16_thread
  667   R17_H, R17,
  668   R18_H, R18,
  669   R19_H, R19,
  670   R20_H, R20,
  671   R21_H, R21,
  672   R22_H, R22,
  673   R23_H, R23,
  674   R24_H, R24,
  675   R25_H, R25,
  676   R26_H, R26,
  677   R27_H, R27,
  678   R28_H, R28,
  679 /*R29_H, R29,*/
  680   R30_H, R30,
  681   R31_H, R31
  682 );
  683 
  684 // 64 bit registers used excluding r2, r11 and r12
  685 // Used to hold the TOC to avoid collisions with expanded LeafCall which uses
  686 // r2, r11 and r12 internally.
  687 reg_class bits64_reg_leaf_call(
  688 /*R0_H,  R0*/     // R0
  689 /*R1_H,  R1*/     // SP
  690 /*R2_H,  R2*/     // TOC
  691   R3_H,  R3,
  692   R4_H,  R4,
  693   R5_H,  R5,
  694   R6_H,  R6,
  695   R7_H,  R7,
  696   R8_H,  R8,
  697   R9_H,  R9,
  698   R10_H, R10,
  699 /*R11_H, R11*/
  700 /*R12_H, R12*/
  701 /*R13_H, R13*/   // system thread id
  702   R14_H, R14,
  703   R15_H, R15,
  704 /*R16_H, R16*/   // R16_thread
  705   R17_H, R17,
  706   R18_H, R18,
  707   R19_H, R19,
  708   R20_H, R20,
  709   R21_H, R21,
  710   R22_H, R22,
  711   R23_H, R23,
  712   R24_H, R24,
  713   R25_H, R25,
  714   R26_H, R26,
  715   R27_H, R27,
  716   R28_H, R28,
  717 /*R29_H, R29,*/
  718   R30_H, R30,
  719   R31_H, R31
  720 );
  721 
  722 // Used to hold the TOC to avoid collisions with expanded DynamicCall
  723 // which uses r19 as inline cache internally and expanded LeafCall which uses
  724 // r2, r11 and r12 internally.
  725 reg_class bits64_constant_table_base(
  726 /*R0_H,  R0*/     // R0
  727 /*R1_H,  R1*/     // SP
  728 /*R2_H,  R2*/     // TOC
  729   R3_H,  R3,
  730   R4_H,  R4,
  731   R5_H,  R5,
  732   R6_H,  R6,
  733   R7_H,  R7,
  734   R8_H,  R8,
  735   R9_H,  R9,
  736   R10_H, R10,
  737 /*R11_H, R11*/
  738 /*R12_H, R12*/
  739 /*R13_H, R13*/   // system thread id
  740   R14_H, R14,
  741   R15_H, R15,
  742 /*R16_H, R16*/   // R16_thread
  743   R17_H, R17,
  744   R18_H, R18,
  745 /*R19_H, R19*/
  746   R20_H, R20,
  747   R21_H, R21,
  748   R22_H, R22,
  749   R23_H, R23,
  750   R24_H, R24,
  751   R25_H, R25,
  752   R26_H, R26,
  753   R27_H, R27,
  754   R28_H, R28,
  755 /*R29_H, R29,*/
  756   R30_H, R30,
  757   R31_H, R31
  758 );
  759 
  760 // 64 bit registers that can only be read i.e. these registers can
  761 // only be src of all instructions.
  762 reg_class bits64_reg_ro(
  763 /*R0_H,  R0*/     // R0
  764   R1_H,  R1,
  765   R2_H,  R2,       // TOC
  766   R3_H,  R3,
  767   R4_H,  R4,
  768   R5_H,  R5,
  769   R6_H,  R6,
  770   R7_H,  R7,
  771   R8_H,  R8,
  772   R9_H,  R9,
  773   R10_H, R10,
  774   R11_H, R11,
  775   R12_H, R12,
  776 /*R13_H, R13*/   // system thread id
  777   R14_H, R14,
  778   R15_H, R15,
  779   R16_H, R16,    // R16_thread
  780   R17_H, R17,
  781   R18_H, R18,
  782   R19_H, R19,
  783   R20_H, R20,
  784   R21_H, R21,
  785   R22_H, R22,
  786   R23_H, R23,
  787   R24_H, R24,
  788   R25_H, R25,
  789   R26_H, R26,
  790   R27_H, R27,
  791   R28_H, R28,
  792 /*R29_H, R29,*/ // TODO: let allocator handle TOC!!
  793   R30_H, R30,
  794   R31_H, R31
  795 );
  796 
  797 
  798 // ----------------------------
  799 // Special Class for Condition Code Flags Register
  800 
  801 reg_class int_flags(
  802 /*CCR0*/             // scratch
  803 /*CCR1*/             // scratch
  804 /*CCR2*/             // nv!
  805 /*CCR3*/             // nv!
  806 /*CCR4*/             // nv!
  807   CCR5,
  808   CCR6,
  809   CCR7
  810 );
  811 
  812 reg_class int_flags_ro(
  813   CCR0,
  814   CCR1,
  815   CCR2,
  816   CCR3,
  817   CCR4,
  818   CCR5,
  819   CCR6,
  820   CCR7
  821 );
  822 
  823 reg_class int_flags_CR0(CCR0);
  824 reg_class int_flags_CR1(CCR1);
  825 reg_class int_flags_CR6(CCR6);
  826 reg_class ctr_reg(SR_CTR);
  827 
  828 // ----------------------------
  829 // Float Register Classes
  830 // ----------------------------
  831 
  832 reg_class flt_reg(
  833   F0,
  834   F1,
  835   F2,
  836   F3,
  837   F4,
  838   F5,
  839   F6,
  840   F7,
  841   F8,
  842   F9,
  843   F10,
  844   F11,
  845   F12,
  846   F13,
  847   F14,              // nv!
  848   F15,              // nv!
  849   F16,              // nv!
  850   F17,              // nv!
  851   F18,              // nv!
  852   F19,              // nv!
  853   F20,              // nv!
  854   F21,              // nv!
  855   F22,              // nv!
  856   F23,              // nv!
  857   F24,              // nv!
  858   F25,              // nv!
  859   F26,              // nv!
  860   F27,              // nv!
  861   F28,              // nv!
  862   F29,              // nv!
  863   F30,              // nv!
  864   F31               // nv!
  865 );
  866 
  867 // Double precision float registers have virtual `high halves' that
  868 // are needed by the allocator.
  869 reg_class dbl_reg(
  870   F0,  F0_H,
  871   F1,  F1_H,
  872   F2,  F2_H,
  873   F3,  F3_H,
  874   F4,  F4_H,
  875   F5,  F5_H,
  876   F6,  F6_H,
  877   F7,  F7_H,
  878   F8,  F8_H,
  879   F9,  F9_H,
  880   F10, F10_H,
  881   F11, F11_H,
  882   F12, F12_H,
  883   F13, F13_H,
  884   F14, F14_H,    // nv!
  885   F15, F15_H,    // nv!
  886   F16, F16_H,    // nv!
  887   F17, F17_H,    // nv!
  888   F18, F18_H,    // nv!
  889   F19, F19_H,    // nv!
  890   F20, F20_H,    // nv!
  891   F21, F21_H,    // nv!
  892   F22, F22_H,    // nv!
  893   F23, F23_H,    // nv!
  894   F24, F24_H,    // nv!
  895   F25, F25_H,    // nv!
  896   F26, F26_H,    // nv!
  897   F27, F27_H,    // nv!
  898   F28, F28_H,    // nv!
  899   F29, F29_H,    // nv!
  900   F30, F30_H,    // nv!
  901   F31, F31_H     // nv!
  902 );
  903 
  904 // ----------------------------
  905 // Vector-Scalar Register Class
  906 // ----------------------------
  907 
  908 reg_class vs_reg(
  909   // Attention: Only these ones are saved & restored at safepoint by RegisterSaver.
  910   VSR32,
  911   VSR33,
  912   VSR34,
  913   VSR35,
  914   VSR36,
  915   VSR37,
  916   VSR38,
  917   VSR39,
  918   VSR40,
  919   VSR41,
  920   VSR42,
  921   VSR43,
  922   VSR44,
  923   VSR45,
  924   VSR46,
  925   VSR47,
  926   VSR48,
  927   VSR49,
  928   VSR50,
  929   VSR51
  930   // VSR52-VSR63 // nv!
  931 );
  932 
  933  %}
  934 
  935 //----------DEFINITION BLOCK---------------------------------------------------
  936 // Define name --> value mappings to inform the ADLC of an integer valued name
  937 // Current support includes integer values in the range [0, 0x7FFFFFFF]
  938 // Format:
  939 //        int_def  <name>         ( <int_value>, <expression>);
  940 // Generated Code in ad_<arch>.hpp
  941 //        #define  <name>   (<expression>)
  942 //        // value == <int_value>
  943 // Generated code in ad_<arch>.cpp adlc_verification()
  944 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
  945 //
  946 definitions %{
  947   // The default cost (of an ALU instruction).
  948   int_def DEFAULT_COST_LOW        (     30,      30);
  949   int_def DEFAULT_COST            (    100,     100);
  950   int_def HUGE_COST               (1000000, 1000000);
  951 
  952   // Memory refs
  953   int_def MEMORY_REF_COST_LOW     (    200, DEFAULT_COST * 2);
  954   int_def MEMORY_REF_COST         (    300, DEFAULT_COST * 3);
  955 
  956   // Branches are even more expensive.
  957   int_def BRANCH_COST             (    900, DEFAULT_COST * 9);
  958   int_def CALL_COST               (   1300, DEFAULT_COST * 13);
  959 %}
  960 
  961 
  962 //----------SOURCE BLOCK-------------------------------------------------------
  963 // This is a block of C++ code which provides values, functions, and
  964 // definitions necessary in the rest of the architecture description.
  965 source_hpp %{
  966   // Header information of the source block.
  967   // Method declarations/definitions which are used outside
  968   // the ad-scope can conveniently be defined here.
  969   //
  970   // To keep related declarations/definitions/uses close together,
  971   // we switch between source %{ }% and source_hpp %{ }% freely as needed.
  972 
  973 #include "opto/convertnode.hpp"
  974 
  975   // Returns true if Node n is followed by a MemBar node that
  976   // will do an acquire. If so, this node must not do the acquire
  977   // operation.
  978   bool followed_by_acquire(const Node *n);
  979 %}
  980 
  981 source %{
  982 
  983 #include "oops/klass.inline.hpp"
  984 
  985 void PhaseOutput::pd_perform_mach_node_analysis() {
  986 }
  987 
  988 int MachNode::pd_alignment_required() const {
  989   return 1;
  990 }
  991 
  992 int MachNode::compute_padding(int current_offset) const {
  993   return 0;
  994 }
  995 
  996 // Should the matcher clone input 'm' of node 'n'?
  997 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
  998   return false;
  999 }
 1000 
 1001 // Should the Matcher clone shifts on addressing modes, expecting them
 1002 // to be subsumed into complex addressing expressions or compute them
 1003 // into registers?
 1004 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 1005   return clone_base_plus_offset_address(m, mstack, address_visited);
 1006 }
 1007 
 1008 // Optimize load-acquire.
 1009 //
 1010 // Check if acquire is unnecessary due to following operation that does
 1011 // acquire anyways.
 1012 // Walk the pattern:
 1013 //
 1014 //      n: Load.acq
 1015 //           |
 1016 //      MemBarAcquire
 1017 //       |         |
 1018 //  Proj(ctrl)  Proj(mem)
 1019 //       |         |
 1020 //   MemBarRelease/Volatile
 1021 //
 1022 bool followed_by_acquire(const Node *load) {
 1023   assert(load->is_Load(), "So far implemented only for loads.");
 1024 
 1025   // Find MemBarAcquire.
 1026   const Node *mba = NULL;
 1027   for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) {
 1028     const Node *out = load->fast_out(i);
 1029     if (out->Opcode() == Op_MemBarAcquire) {
 1030       if (out->in(0) == load) continue; // Skip control edge, membar should be found via precedence edge.
 1031       mba = out;
 1032       break;
 1033     }
 1034   }
 1035   if (!mba) return false;
 1036 
 1037   // Find following MemBar node.
 1038   //
 1039   // The following node must be reachable by control AND memory
 1040   // edge to assure no other operations are in between the two nodes.
 1041   //
 1042   // So first get the Proj node, mem_proj, to use it to iterate forward.
 1043   Node *mem_proj = NULL;
 1044   for (DUIterator_Fast imax, i = mba->fast_outs(imax); i < imax; i++) {
 1045     mem_proj = mba->fast_out(i);      // Runs out of bounds and asserts if Proj not found.
 1046     assert(mem_proj->is_Proj(), "only projections here");
 1047     ProjNode *proj = mem_proj->as_Proj();
 1048     if (proj->_con == TypeFunc::Memory &&
 1049         !Compile::current()->node_arena()->contains(mem_proj)) // Unmatched old-space only
 1050       break;
 1051   }
 1052   assert(mem_proj->as_Proj()->_con == TypeFunc::Memory, "Graph broken");
 1053 
 1054   // Search MemBar behind Proj. If there are other memory operations
 1055   // behind the Proj we lost.
 1056   for (DUIterator_Fast jmax, j = mem_proj->fast_outs(jmax); j < jmax; j++) {
 1057     Node *x = mem_proj->fast_out(j);
 1058     // Proj might have an edge to a store or load node which precedes the membar.
 1059     if (x->is_Mem()) return false;
 1060 
 1061     // On PPC64 release and volatile are implemented by an instruction
 1062     // that also has acquire semantics. I.e. there is no need for an
 1063     // acquire before these.
 1064     int xop = x->Opcode();
 1065     if (xop == Op_MemBarRelease || xop == Op_MemBarVolatile) {
 1066       // Make sure we're not missing Call/Phi/MergeMem by checking
 1067       // control edges. The control edge must directly lead back
 1068       // to the MemBarAcquire
 1069       Node *ctrl_proj = x->in(0);
 1070       if (ctrl_proj->is_Proj() && ctrl_proj->in(0) == mba) {
 1071         return true;
 1072       }
 1073     }
 1074   }
 1075 
 1076   return false;
 1077 }
 1078 
 1079 #define __ _masm.
 1080 
 1081 // Tertiary op of a LoadP or StoreP encoding.
 1082 #define REGP_OP true
 1083 
 1084 // ****************************************************************************
 1085 
 1086 // REQUIRED FUNCTIONALITY
 1087 
 1088 // !!!!! Special hack to get all type of calls to specify the byte offset
 1089 //       from the start of the call to the point where the return address
 1090 //       will point.
 1091 
 1092 // PPC port: Removed use of lazy constant construct.
 1093 
 1094 int MachCallStaticJavaNode::ret_addr_offset() {
 1095   // It's only a single branch-and-link instruction.
 1096   return 4;
 1097 }
 1098 
 1099 int MachCallDynamicJavaNode::ret_addr_offset() {
 1100   // Offset is 4 with postalloc expanded calls (bl is one instruction). We use
 1101   // postalloc expanded calls if we use inline caches and do not update method data.
 1102   if (UseInlineCaches) return 4;
 1103 
 1104   int vtable_index = this->_vtable_index;
 1105   if (vtable_index < 0) {
 1106     // Must be invalid_vtable_index, not nonvirtual_vtable_index.
 1107     assert(vtable_index == Method::invalid_vtable_index, "correct sentinel value");
 1108     return 12;
 1109   } else {
 1110     return 24 + MacroAssembler::instr_size_for_decode_klass_not_null();
 1111   }
 1112 }
 1113 
 1114 int MachCallRuntimeNode::ret_addr_offset() {
 1115   if (rule() == CallRuntimeDirect_rule) {
 1116     // CallRuntimeDirectNode uses call_c.
 1117 #if defined(ABI_ELFv2)
 1118     return 28;
 1119 #else
 1120     return 40;
 1121 #endif
 1122   }
 1123   assert(rule() == CallLeafDirect_rule, "unexpected node with rule %u", rule());
 1124   // CallLeafDirectNode uses bl.
 1125   return 4;
 1126 }
 1127 
 1128 int MachCallNativeNode::ret_addr_offset() {
 1129   Unimplemented();
 1130   return -1;
 1131 }
 1132 
 1133 //=============================================================================
 1134 
 1135 // condition code conversions
 1136 
 1137 static int cc_to_boint(int cc) {
 1138   return Assembler::bcondCRbiIs0 | (cc & 8);
 1139 }
 1140 
 1141 static int cc_to_inverse_boint(int cc) {
 1142   return Assembler::bcondCRbiIs0 | (8-(cc & 8));
 1143 }
 1144 
 1145 static int cc_to_biint(int cc, int flags_reg) {
 1146   return (flags_reg << 2) | (cc & 3);
 1147 }
 1148 
 1149 //=============================================================================
 1150 
 1151 // Compute padding required for nodes which need alignment. The padding
 1152 // is the number of bytes (not instructions) which will be inserted before
 1153 // the instruction. The padding must match the size of a NOP instruction.
 1154 
 1155 // Add nop if a prefixed (two-word) instruction is going to cross a 64-byte boundary.
 1156 // (See Section 1.6 of Power ISA Version 3.1)
 1157 static int compute_prefix_padding(int current_offset) {
 1158   assert(PowerArchitecturePPC64 >= 10 && (CodeEntryAlignment & 63) == 0,
 1159          "Code buffer must be aligned to a multiple of 64 bytes");
 1160   if (is_aligned(current_offset + BytesPerInstWord, 64)) {
 1161     return BytesPerInstWord;
 1162   }
 1163   return 0;
 1164 }
 1165 
 1166 int loadConI32Node::compute_padding(int current_offset) const {
 1167   return compute_prefix_padding(current_offset);
 1168 }
 1169 
 1170 int loadConL34Node::compute_padding(int current_offset) const {
 1171   return compute_prefix_padding(current_offset);
 1172 }
 1173 
 1174 int addI_reg_imm32Node::compute_padding(int current_offset) const {
 1175   return compute_prefix_padding(current_offset);
 1176 }
 1177 
 1178 int addL_reg_imm34Node::compute_padding(int current_offset) const {
 1179   return compute_prefix_padding(current_offset);
 1180 }
 1181 
 1182 int addP_reg_imm34Node::compute_padding(int current_offset) const {
 1183   return compute_prefix_padding(current_offset);
 1184 }
 1185 
 1186 int cmprb_Whitespace_reg_reg_prefixedNode::compute_padding(int current_offset) const {
 1187   return compute_prefix_padding(current_offset);
 1188 }
 1189 
 1190 
 1191 //=============================================================================
 1192 
 1193 // Emit an interrupt that is caught by the debugger (for debugging compiler).
 1194 void emit_break(CodeBuffer &cbuf) {
 1195   C2_MacroAssembler _masm(&cbuf);
 1196   __ illtrap();
 1197 }
 1198 
 1199 #ifndef PRODUCT
 1200 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1201   st->print("BREAKPOINT");
 1202 }
 1203 #endif
 1204 
 1205 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1206   emit_break(cbuf);
 1207 }
 1208 
 1209 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 1210   return MachNode::size(ra_);
 1211 }
 1212 
 1213 //=============================================================================
 1214 
 1215 void emit_nop(CodeBuffer &cbuf) {
 1216   C2_MacroAssembler _masm(&cbuf);
 1217   __ nop();
 1218 }
 1219 
 1220 static inline void emit_long(CodeBuffer &cbuf, int value) {
 1221   *((int*)(cbuf.insts_end())) = value;
 1222   cbuf.set_insts_end(cbuf.insts_end() + BytesPerInstWord);
 1223 }
 1224 
 1225 //=============================================================================
 1226 
 1227 %} // interrupt source
 1228 
 1229 source_hpp %{ // Header information of the source block.
 1230 
 1231 //--------------------------------------------------------------
 1232 //---<  Used for optimization in Compile::Shorten_branches  >---
 1233 //--------------------------------------------------------------
 1234 
 1235 class C2_MacroAssembler;
 1236 
 1237 class CallStubImpl {
 1238 
 1239  public:
 1240 
 1241   // Emit call stub, compiled java to interpreter.
 1242   static void emit_trampoline_stub(C2_MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset);
 1243 
 1244   // Size of call trampoline stub.
 1245   // This doesn't need to be accurate to the byte, but it
 1246   // must be larger than or equal to the real size of the stub.
 1247   static uint size_call_trampoline() {
 1248     return MacroAssembler::trampoline_stub_size;
 1249   }
 1250 
 1251   // number of relocations needed by a call trampoline stub
 1252   static uint reloc_call_trampoline() {
 1253     return 5;
 1254   }
 1255 
 1256 };
 1257 
 1258 %} // end source_hpp
 1259 
 1260 source %{
 1261 
 1262 // Emit a trampoline stub for a call to a target which is too far away.
 1263 //
 1264 // code sequences:
 1265 //
 1266 // call-site:
 1267 //   branch-and-link to <destination> or <trampoline stub>
 1268 //
 1269 // Related trampoline stub for this call-site in the stub section:
 1270 //   load the call target from the constant pool
 1271 //   branch via CTR (LR/link still points to the call-site above)
 1272 
 1273 void CallStubImpl::emit_trampoline_stub(C2_MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
 1274   address stub = __ emit_trampoline_stub(destination_toc_offset, insts_call_instruction_offset);
 1275   if (stub == NULL) {
 1276     ciEnv::current()->record_out_of_memory_failure();
 1277   }
 1278 }
 1279 
 1280 //=============================================================================
 1281 
 1282 // Emit an inline branch-and-link call and a related trampoline stub.
 1283 //
 1284 // code sequences:
 1285 //
 1286 // call-site:
 1287 //   branch-and-link to <destination> or <trampoline stub>
 1288 //
 1289 // Related trampoline stub for this call-site in the stub section:
 1290 //   load the call target from the constant pool
 1291 //   branch via CTR (LR/link still points to the call-site above)
 1292 //
 1293 
 1294 typedef struct {
 1295   int insts_call_instruction_offset;
 1296   int ret_addr_offset;
 1297 } EmitCallOffsets;
 1298 
 1299 // Emit a branch-and-link instruction that branches to a trampoline.
 1300 // - Remember the offset of the branch-and-link instruction.
 1301 // - Add a relocation at the branch-and-link instruction.
 1302 // - Emit a branch-and-link.
 1303 // - Remember the return pc offset.
 1304 EmitCallOffsets emit_call_with_trampoline_stub(C2_MacroAssembler &_masm, address entry_point, relocInfo::relocType rtype) {
 1305   EmitCallOffsets offsets = { -1, -1 };
 1306   const int start_offset = __ offset();
 1307   offsets.insts_call_instruction_offset = __ offset();
 1308 
 1309   // No entry point given, use the current pc.
 1310   if (entry_point == NULL) entry_point = __ pc();
 1311 
 1312   // Put the entry point as a constant into the constant pool.
 1313   const address entry_point_toc_addr   = __ address_constant(entry_point, RelocationHolder::none);
 1314   if (entry_point_toc_addr == NULL) {
 1315     ciEnv::current()->record_out_of_memory_failure();
 1316     return offsets;
 1317   }
 1318   const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
 1319 
 1320   // Emit the trampoline stub which will be related to the branch-and-link below.
 1321   CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, offsets.insts_call_instruction_offset);
 1322   if (ciEnv::current()->failing()) { return offsets; } // Code cache may be full.
 1323   __ relocate(rtype);
 1324 
 1325   // Note: At this point we do not have the address of the trampoline
 1326   // stub, and the entry point might be too far away for bl, so __ pc()
 1327   // serves as dummy and the bl will be patched later.
 1328   __ bl((address) __ pc());
 1329 
 1330   offsets.ret_addr_offset = __ offset() - start_offset;
 1331 
 1332   return offsets;
 1333 }
 1334 
 1335 //=============================================================================
 1336 
 1337 // Factory for creating loadConL* nodes for large/small constant pool.
 1338 
 1339 static inline jlong replicate_immF(float con) {
 1340   // Replicate float con 2 times and pack into vector.
 1341   int val = *((int*)&con);
 1342   jlong lval = val;
 1343   lval = (lval << 32) | (lval & 0xFFFFFFFFl);
 1344   return lval;
 1345 }
 1346 
 1347 //=============================================================================
 1348 
 1349 const RegMask& MachConstantBaseNode::_out_RegMask = BITS64_CONSTANT_TABLE_BASE_mask();
 1350 int ConstantTable::calculate_table_base_offset() const {
 1351   return 0;  // absolute addressing, no offset
 1352 }
 1353 
 1354 bool MachConstantBaseNode::requires_postalloc_expand() const { return true; }
 1355 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1356   iRegPdstOper *op_dst = new iRegPdstOper();
 1357   MachNode *m1 = new loadToc_hiNode();
 1358   MachNode *m2 = new loadToc_loNode();
 1359 
 1360   m1->add_req(NULL);
 1361   m2->add_req(NULL, m1);
 1362   m1->_opnds[0] = op_dst;
 1363   m2->_opnds[0] = op_dst;
 1364   m2->_opnds[1] = op_dst;
 1365   ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 1366   ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 1367   nodes->push(m1);
 1368   nodes->push(m2);
 1369 }
 1370 
 1371 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 1372   // Is postalloc expanded.
 1373   ShouldNotReachHere();
 1374 }
 1375 
 1376 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1377   return 0;
 1378 }
 1379 
 1380 #ifndef PRODUCT
 1381 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1382   st->print("-- \t// MachConstantBaseNode (empty encoding)");
 1383 }
 1384 #endif
 1385 
 1386 //=============================================================================
 1387 
 1388 #ifndef PRODUCT
 1389 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1390   Compile* C = ra_->C;
 1391   const long framesize = C->output()->frame_slots() << LogBytesPerInt;
 1392 
 1393   st->print("PROLOG\n\t");
 1394   if (C->output()->need_stack_bang(framesize)) {
 1395     st->print("stack_overflow_check\n\t");
 1396   }
 1397 
 1398   if (!false /* TODO: PPC port C->is_frameless_method()*/) {
 1399     st->print("save return pc\n\t");
 1400     st->print("push frame %ld\n\t", -framesize);
 1401   }
 1402 
 1403   if (C->stub_function() == NULL) {
 1404     st->print("nmethod entry barrier\n\t");
 1405   }
 1406 }
 1407 #endif
 1408 
 1409 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1410   Compile* C = ra_->C;
 1411   C2_MacroAssembler _masm(&cbuf);
 1412 
 1413   const long framesize = C->output()->frame_size_in_bytes();
 1414   assert(framesize % (2 * wordSize) == 0, "must preserve 2*wordSize alignment");
 1415 
 1416   const bool method_is_frameless      = false /* TODO: PPC port C->is_frameless_method()*/;
 1417 
 1418   const Register return_pc            = R20; // Must match return_addr() in frame section.
 1419   const Register callers_sp           = R21;
 1420   const Register push_frame_temp      = R22;
 1421   const Register toc_temp             = R23;
 1422   assert_different_registers(R11, return_pc, callers_sp, push_frame_temp, toc_temp);
 1423 
 1424   if (method_is_frameless) {
 1425     // Add nop at beginning of all frameless methods to prevent any
 1426     // oop instructions from getting overwritten by make_not_entrant
 1427     // (patching attempt would fail).
 1428     __ nop();
 1429   } else {
 1430     // Get return pc.
 1431     __ mflr(return_pc);
 1432   }
 1433 
 1434   if (C->clinit_barrier_on_entry()) {
 1435     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1436 
 1437     Label L_skip_barrier;
 1438     Register klass = toc_temp;
 1439 
 1440     // Notify OOP recorder (don't need the relocation)
 1441     AddressLiteral md = __ constant_metadata_address(C->method()->holder()->constant_encoding());
 1442     __ load_const_optimized(klass, md.value(), R0);
 1443     __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/);
 1444 
 1445     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0);
 1446     __ mtctr(klass);
 1447     __ bctr();
 1448 
 1449     __ bind(L_skip_barrier);
 1450   }
 1451 
 1452   // Calls to C2R adapters often do not accept exceptional returns.
 1453   // We require that their callers must bang for them. But be
 1454   // careful, because some VM calls (such as call site linkage) can
 1455   // use several kilobytes of stack. But the stack safety zone should
 1456   // account for that. See bugs 4446381, 4468289, 4497237.
 1457 
 1458   int bangsize = C->output()->bang_size_in_bytes();
 1459   assert(bangsize >= framesize || bangsize <= 0, "stack bang size incorrect");
 1460   if (C->output()->need_stack_bang(bangsize)) {
 1461     // Unfortunately we cannot use the function provided in
 1462     // assembler.cpp as we have to emulate the pipes. So I had to
 1463     // insert the code of generate_stack_overflow_check(), see
 1464     // assembler.cpp for some illuminative comments.
 1465     const int page_size = os::vm_page_size();
 1466     int bang_end = StackOverflow::stack_shadow_zone_size();
 1467 
 1468     // This is how far the previous frame's stack banging extended.
 1469     const int bang_end_safe = bang_end;
 1470 
 1471     if (bangsize > page_size) {
 1472       bang_end += bangsize;
 1473     }
 1474 
 1475     int bang_offset = bang_end_safe;
 1476 
 1477     while (bang_offset <= bang_end) {
 1478       // Need at least one stack bang at end of shadow zone.
 1479 
 1480       // Again I had to copy code, this time from assembler_ppc.cpp,
 1481       // bang_stack_with_offset - see there for comments.
 1482 
 1483       // Stack grows down, caller passes positive offset.
 1484       assert(bang_offset > 0, "must bang with positive offset");
 1485 
 1486       long stdoffset = -bang_offset;
 1487 
 1488       if (Assembler::is_simm(stdoffset, 16)) {
 1489         // Signed 16 bit offset, a simple std is ok.
 1490         if (UseLoadInstructionsForStackBangingPPC64) {
 1491           __ ld(R0,  (int)(signed short)stdoffset, R1_SP);
 1492         } else {
 1493           __ std(R0, (int)(signed short)stdoffset, R1_SP);
 1494         }
 1495       } else if (Assembler::is_simm(stdoffset, 31)) {
 1496         // Use largeoffset calculations for addis & ld/std.
 1497         const int hi = MacroAssembler::largeoffset_si16_si16_hi(stdoffset);
 1498         const int lo = MacroAssembler::largeoffset_si16_si16_lo(stdoffset);
 1499 
 1500         Register tmp = R11;
 1501         __ addis(tmp, R1_SP, hi);
 1502         if (UseLoadInstructionsForStackBangingPPC64) {
 1503           __ ld(R0, lo, tmp);
 1504         } else {
 1505           __ std(R0, lo, tmp);
 1506         }
 1507       } else {
 1508         ShouldNotReachHere();
 1509       }
 1510 
 1511       bang_offset += page_size;
 1512     }
 1513     // R11 trashed
 1514   } // C->output()->need_stack_bang(framesize)
 1515 
 1516   unsigned int bytes = (unsigned int)framesize;
 1517   long offset = Assembler::align_addr(bytes, frame::alignment_in_bytes);
 1518   ciMethod *currMethod = C->method();
 1519 
 1520   if (!method_is_frameless) {
 1521     // Get callers sp.
 1522     __ mr(callers_sp, R1_SP);
 1523 
 1524     // Push method's frame, modifies SP.
 1525     assert(Assembler::is_uimm(framesize, 32U), "wrong type");
 1526     // The ABI is already accounted for in 'framesize' via the
 1527     // 'out_preserve' area.
 1528     Register tmp = push_frame_temp;
 1529     // Had to insert code of push_frame((unsigned int)framesize, push_frame_temp).
 1530     if (Assembler::is_simm(-offset, 16)) {
 1531       __ stdu(R1_SP, -offset, R1_SP);
 1532     } else {
 1533       long x = -offset;
 1534       // Had to insert load_const(tmp, -offset).
 1535       __ lis( tmp, (int)((signed short)(((x >> 32) & 0xffff0000) >> 16)));
 1536       __ ori( tmp, tmp, ((x >> 32) & 0x0000ffff));
 1537       __ sldi(tmp, tmp, 32);
 1538       __ oris(tmp, tmp, (x & 0xffff0000) >> 16);
 1539       __ ori( tmp, tmp, (x & 0x0000ffff));
 1540 
 1541       __ stdux(R1_SP, R1_SP, tmp);
 1542     }
 1543   }
 1544 #if 0 // TODO: PPC port
 1545   // For testing large constant pools, emit a lot of constants to constant pool.
 1546   // "Randomize" const_size.
 1547   if (ConstantsALot) {
 1548     const int num_consts = const_size();
 1549     for (int i = 0; i < num_consts; i++) {
 1550       __ long_constant(0xB0B5B00BBABE);
 1551     }
 1552   }
 1553 #endif
 1554   if (!method_is_frameless) {
 1555     // Save return pc.
 1556     __ std(return_pc, _abi0(lr), callers_sp);
 1557   }
 1558 
 1559   if (C->stub_function() == NULL) {
 1560     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1561     bs->nmethod_entry_barrier(&_masm, push_frame_temp);
 1562   }
 1563 
 1564   C->output()->set_frame_complete(cbuf.insts_size());
 1565 }
 1566 
 1567 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 1568   // Variable size. determine dynamically.
 1569   return MachNode::size(ra_);
 1570 }
 1571 
 1572 int MachPrologNode::reloc() const {
 1573   // Return number of relocatable values contained in this instruction.
 1574   return 1; // 1 reloc entry for load_const(toc).
 1575 }
 1576 
 1577 //=============================================================================
 1578 
 1579 #ifndef PRODUCT
 1580 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1581   Compile* C = ra_->C;
 1582 
 1583   st->print("EPILOG\n\t");
 1584   st->print("restore return pc\n\t");
 1585   st->print("pop frame\n\t");
 1586 
 1587   if (do_polling() && C->is_method_compilation()) {
 1588     st->print("safepoint poll\n\t");
 1589   }
 1590 }
 1591 #endif
 1592 
 1593 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1594   Compile* C = ra_->C;
 1595   C2_MacroAssembler _masm(&cbuf);
 1596 
 1597   const long framesize = ((long)C->output()->frame_slots()) << LogBytesPerInt;
 1598   assert(framesize >= 0, "negative frame-size?");
 1599 
 1600   const bool method_needs_polling = do_polling() && C->is_method_compilation();
 1601   const bool method_is_frameless  = false /* TODO: PPC port C->is_frameless_method()*/;
 1602   const Register return_pc        = R31;  // Must survive C-call to enable_stack_reserved_zone().
 1603   const Register temp             = R12;
 1604 
 1605   if (!method_is_frameless) {
 1606     // Restore return pc relative to callers' sp.
 1607     __ ld(return_pc, ((int)framesize) + _abi0(lr), R1_SP);
 1608     // Move return pc to LR.
 1609     __ mtlr(return_pc);
 1610     // Pop frame (fixed frame-size).
 1611     __ addi(R1_SP, R1_SP, (int)framesize);
 1612   }
 1613 
 1614   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1615     __ reserved_stack_check(return_pc);
 1616   }
 1617 
 1618   if (method_needs_polling) {
 1619     Label dummy_label;
 1620     Label* code_stub = &dummy_label;
 1621     if (!UseSIGTRAP && !C->output()->in_scratch_emit_size()) {
 1622       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
 1623       __ relocate(relocInfo::poll_return_type);
 1624     }
 1625     __ safepoint_poll(*code_stub, temp, true /* at_return */, true /* in_nmethod */);
 1626   }
 1627 }
 1628 
 1629 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 1630   // Variable size. Determine dynamically.
 1631   return MachNode::size(ra_);
 1632 }
 1633 
 1634 int MachEpilogNode::reloc() const {
 1635   // Return number of relocatable values contained in this instruction.
 1636   return 1; // 1 for load_from_polling_page.
 1637 }
 1638 
 1639 const Pipeline * MachEpilogNode::pipeline() const {
 1640   return MachNode::pipeline_class();
 1641 }
 1642 
 1643 // =============================================================================
 1644 
 1645 // Figure out which register class each belongs in: rc_int, rc_float, rc_vs or
 1646 // rc_stack.
 1647 enum RC { rc_bad, rc_int, rc_float, rc_vs, rc_stack };
 1648 
 1649 static enum RC rc_class(OptoReg::Name reg) {
 1650   // Return the register class for the given register. The given register
 1651   // reg is a <register>_num value, which is an index into the MachRegisterNumbers
 1652   // enumeration in adGlobals_ppc.hpp.
 1653 
 1654   if (reg == OptoReg::Bad) return rc_bad;
 1655 
 1656   // We have 64 integer register halves, starting at index 0.
 1657   if (reg < 64) return rc_int;
 1658 
 1659   // We have 64 floating-point register halves, starting at index 64.
 1660   if (reg < 64+64) return rc_float;
 1661 
 1662   // We have 64 vector-scalar registers, starting at index 128.
 1663   if (reg < 64+64+64) return rc_vs;
 1664 
 1665   // Between float regs & stack are the flags regs.
 1666   assert(OptoReg::is_stack(reg) || reg < 64+64+64, "blow up if spilling flags");
 1667 
 1668   return rc_stack;
 1669 }
 1670 
 1671 static int ld_st_helper(CodeBuffer *cbuf, const char *op_str, uint opcode, int reg, int offset,
 1672                         bool do_print, Compile* C, outputStream *st) {
 1673 
 1674   assert(opcode == Assembler::LD_OPCODE   ||
 1675          opcode == Assembler::STD_OPCODE  ||
 1676          opcode == Assembler::LWZ_OPCODE  ||
 1677          opcode == Assembler::STW_OPCODE  ||
 1678          opcode == Assembler::LFD_OPCODE  ||
 1679          opcode == Assembler::STFD_OPCODE ||
 1680          opcode == Assembler::LFS_OPCODE  ||
 1681          opcode == Assembler::STFS_OPCODE,
 1682          "opcode not supported");
 1683 
 1684   if (cbuf) {
 1685     int d =
 1686       (Assembler::LD_OPCODE == opcode || Assembler::STD_OPCODE == opcode) ?
 1687         Assembler::ds(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/)
 1688       : Assembler::d1(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/); // Makes no difference in opt build.
 1689     emit_long(*cbuf, opcode | Assembler::rt(Matcher::_regEncode[reg]) | d | Assembler::ra(R1_SP));
 1690   }
 1691 #ifndef PRODUCT
 1692   else if (do_print) {
 1693     st->print("%-7s %s, [R1_SP + #%d+%d] \t// spill copy",
 1694               op_str,
 1695               Matcher::regName[reg],
 1696               offset, 0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/);
 1697   }
 1698 #endif
 1699   return 4; // size
 1700 }
 1701 
 1702 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
 1703   Compile* C = ra_->C;
 1704 
 1705   // Get registers to move.
 1706   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
 1707   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
 1708   OptoReg::Name dst_hi = ra_->get_reg_second(this);
 1709   OptoReg::Name dst_lo = ra_->get_reg_first(this);
 1710 
 1711   enum RC src_hi_rc = rc_class(src_hi);
 1712   enum RC src_lo_rc = rc_class(src_lo);
 1713   enum RC dst_hi_rc = rc_class(dst_hi);
 1714   enum RC dst_lo_rc = rc_class(dst_lo);
 1715 
 1716   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
 1717   if (src_hi != OptoReg::Bad)
 1718     assert((src_lo&1)==0 && src_lo+1==src_hi &&
 1719            (dst_lo&1)==0 && dst_lo+1==dst_hi,
 1720            "expected aligned-adjacent pairs");
 1721   // Generate spill code!
 1722   int size = 0;
 1723 
 1724   if (src_lo == dst_lo && src_hi == dst_hi)
 1725     return size;            // Self copy, no move.
 1726 
 1727   if (bottom_type()->isa_vect() != NULL && ideal_reg() == Op_VecX) {
 1728     // Memory->Memory Spill.
 1729     if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
 1730       int src_offset = ra_->reg2offset(src_lo);
 1731       int dst_offset = ra_->reg2offset(dst_lo);
 1732       if (cbuf) {
 1733         C2_MacroAssembler _masm(cbuf);
 1734         __ ld(R0, src_offset, R1_SP);
 1735         __ std(R0, dst_offset, R1_SP);
 1736         __ ld(R0, src_offset+8, R1_SP);
 1737         __ std(R0, dst_offset+8, R1_SP);
 1738       }
 1739       size += 16;
 1740     }
 1741     // VectorSRegister->Memory Spill.
 1742     else if (src_lo_rc == rc_vs && dst_lo_rc == rc_stack) {
 1743       VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]);
 1744       int dst_offset = ra_->reg2offset(dst_lo);
 1745       if (cbuf) {
 1746         C2_MacroAssembler _masm(cbuf);
 1747         __ addi(R0, R1_SP, dst_offset);
 1748         __ stxvd2x(Rsrc, R0);
 1749       }
 1750       size += 8;
 1751     }
 1752     // Memory->VectorSRegister Spill.
 1753     else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vs) {
 1754       VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]);
 1755       int src_offset = ra_->reg2offset(src_lo);
 1756       if (cbuf) {
 1757         C2_MacroAssembler _masm(cbuf);
 1758         __ addi(R0, R1_SP, src_offset);
 1759         __ lxvd2x(Rdst, R0);
 1760       }
 1761       size += 8;
 1762     }
 1763     // VectorSRegister->VectorSRegister.
 1764     else if (src_lo_rc == rc_vs && dst_lo_rc == rc_vs) {
 1765       VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]);
 1766       VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]);
 1767       if (cbuf) {
 1768         C2_MacroAssembler _masm(cbuf);
 1769         __ xxlor(Rdst, Rsrc, Rsrc);
 1770       }
 1771       size += 4;
 1772     }
 1773     else {
 1774       ShouldNotReachHere(); // No VSR spill.
 1775     }
 1776     return size;
 1777   }
 1778 
 1779   // --------------------------------------
 1780   // Memory->Memory Spill. Use R0 to hold the value.
 1781   if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
 1782     int src_offset = ra_->reg2offset(src_lo);
 1783     int dst_offset = ra_->reg2offset(dst_lo);
 1784     if (src_hi != OptoReg::Bad) {
 1785       assert(src_hi_rc==rc_stack && dst_hi_rc==rc_stack,
 1786              "expected same type of move for high parts");
 1787       size += ld_st_helper(cbuf, "LD  ", Assembler::LD_OPCODE,  R0_num, src_offset, !do_size, C, st);
 1788       if (!cbuf && !do_size) st->print("\n\t");
 1789       size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, R0_num, dst_offset, !do_size, C, st);
 1790     } else {
 1791       size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, R0_num, src_offset, !do_size, C, st);
 1792       if (!cbuf && !do_size) st->print("\n\t");
 1793       size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, R0_num, dst_offset, !do_size, C, st);
 1794     }
 1795     return size;
 1796   }
 1797 
 1798   // --------------------------------------
 1799   // Check for float->int copy; requires a trip through memory.
 1800   if (src_lo_rc == rc_float && dst_lo_rc == rc_int) {
 1801     Unimplemented();
 1802   }
 1803 
 1804   // --------------------------------------
 1805   // Check for integer reg-reg copy.
 1806   if (src_lo_rc == rc_int && dst_lo_rc == rc_int) {
 1807       Register Rsrc = as_Register(Matcher::_regEncode[src_lo]);
 1808       Register Rdst = as_Register(Matcher::_regEncode[dst_lo]);
 1809       size = (Rsrc != Rdst) ? 4 : 0;
 1810 
 1811       if (cbuf) {
 1812         C2_MacroAssembler _masm(cbuf);
 1813         if (size) {
 1814           __ mr(Rdst, Rsrc);
 1815         }
 1816       }
 1817 #ifndef PRODUCT
 1818       else if (!do_size) {
 1819         if (size) {
 1820           st->print("%-7s %s, %s \t// spill copy", "MR", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 1821         } else {
 1822           st->print("%-7s %s, %s \t// spill copy", "MR-NOP", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 1823         }
 1824       }
 1825 #endif
 1826       return size;
 1827   }
 1828 
 1829   // Check for integer store.
 1830   if (src_lo_rc == rc_int && dst_lo_rc == rc_stack) {
 1831     int dst_offset = ra_->reg2offset(dst_lo);
 1832     if (src_hi != OptoReg::Bad) {
 1833       assert(src_hi_rc==rc_int && dst_hi_rc==rc_stack,
 1834              "expected same type of move for high parts");
 1835       size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1836     } else {
 1837       size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1838     }
 1839     return size;
 1840   }
 1841 
 1842   // Check for integer load.
 1843   if (dst_lo_rc == rc_int && src_lo_rc == rc_stack) {
 1844     int src_offset = ra_->reg2offset(src_lo);
 1845     if (src_hi != OptoReg::Bad) {
 1846       assert(dst_hi_rc==rc_int && src_hi_rc==rc_stack,
 1847              "expected same type of move for high parts");
 1848       size += ld_st_helper(cbuf, "LD  ", Assembler::LD_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1849     } else {
 1850       size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1851     }
 1852     return size;
 1853   }
 1854 
 1855   // Check for float reg-reg copy.
 1856   if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
 1857     if (cbuf) {
 1858       C2_MacroAssembler _masm(cbuf);
 1859       FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
 1860       FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
 1861       __ fmr(Rdst, Rsrc);
 1862     }
 1863 #ifndef PRODUCT
 1864     else if (!do_size) {
 1865       st->print("%-7s %s, %s \t// spill copy", "FMR", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 1866     }
 1867 #endif
 1868     return 4;
 1869   }
 1870 
 1871   // Check for float store.
 1872   if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
 1873     int dst_offset = ra_->reg2offset(dst_lo);
 1874     if (src_hi != OptoReg::Bad) {
 1875       assert(src_hi_rc==rc_float && dst_hi_rc==rc_stack,
 1876              "expected same type of move for high parts");
 1877       size += ld_st_helper(cbuf, "STFD", Assembler::STFD_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1878     } else {
 1879       size += ld_st_helper(cbuf, "STFS", Assembler::STFS_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1880     }
 1881     return size;
 1882   }
 1883 
 1884   // Check for float load.
 1885   if (dst_lo_rc == rc_float && src_lo_rc == rc_stack) {
 1886     int src_offset = ra_->reg2offset(src_lo);
 1887     if (src_hi != OptoReg::Bad) {
 1888       assert(dst_hi_rc==rc_float && src_hi_rc==rc_stack,
 1889              "expected same type of move for high parts");
 1890       size += ld_st_helper(cbuf, "LFD ", Assembler::LFD_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1891     } else {
 1892       size += ld_st_helper(cbuf, "LFS ", Assembler::LFS_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1893     }
 1894     return size;
 1895   }
 1896 
 1897   // --------------------------------------------------------------------
 1898   // Check for hi bits still needing moving. Only happens for misaligned
 1899   // arguments to native calls.
 1900   if (src_hi == dst_hi)
 1901     return size;               // Self copy; no move.
 1902 
 1903   assert(src_hi_rc != rc_bad && dst_hi_rc != rc_bad, "src_hi & dst_hi cannot be Bad");
 1904   ShouldNotReachHere(); // Unimplemented
 1905   return 0;
 1906 }
 1907 
 1908 #ifndef PRODUCT
 1909 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1910   if (!ra_)
 1911     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
 1912   else
 1913     implementation(NULL, ra_, false, st);
 1914 }
 1915 #endif
 1916 
 1917 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1918   implementation(&cbuf, ra_, false, NULL);
 1919 }
 1920 
 1921 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1922   return implementation(NULL, ra_, true, NULL);
 1923 }
 1924 
 1925 #ifndef PRODUCT
 1926 void MachNopNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1927   st->print("NOP \t// %d nops to pad for loops or prefixed instructions.", _count);
 1928 }
 1929 #endif
 1930 
 1931 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const {
 1932   C2_MacroAssembler _masm(&cbuf);
 1933   // _count contains the number of nops needed for padding.
 1934   for (int i = 0; i < _count; i++) {
 1935     __ nop();
 1936   }
 1937 }
 1938 
 1939 uint MachNopNode::size(PhaseRegAlloc *ra_) const {
 1940   return _count * 4;
 1941 }
 1942 
 1943 #ifndef PRODUCT
 1944 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1945   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1946   char reg_str[128];
 1947   ra_->dump_register(this, reg_str);
 1948   st->print("ADDI    %s, SP, %d \t// box node", reg_str, offset);
 1949 }
 1950 #endif
 1951 
 1952 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1953   C2_MacroAssembler _masm(&cbuf);
 1954 
 1955   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1956   int reg    = ra_->get_encode(this);
 1957 
 1958   if (Assembler::is_simm(offset, 16)) {
 1959     __ addi(as_Register(reg), R1, offset);
 1960   } else {
 1961     ShouldNotReachHere();
 1962   }
 1963 }
 1964 
 1965 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1966   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 1967   return 4;
 1968 }
 1969 
 1970 #ifndef PRODUCT
 1971 void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1972   st->print_cr("---- MachUEPNode ----");
 1973   st->print_cr("...");
 1974 }
 1975 #endif
 1976 
 1977 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1978   // This is the unverified entry point.
 1979   C2_MacroAssembler _masm(&cbuf);
 1980 
 1981   // Inline_cache contains a klass.
 1982   Register ic_klass       = as_Register(Matcher::inline_cache_reg_encode());
 1983   Register receiver_klass = R12_scratch2;  // tmp
 1984 
 1985   assert_different_registers(ic_klass, receiver_klass, R11_scratch1, R3_ARG1);
 1986   assert(R11_scratch1 == R11, "need prologue scratch register");
 1987 
 1988   // Check for NULL argument if we don't have implicit null checks.
 1989   if (!ImplicitNullChecks || !os::zero_page_read_protected()) {
 1990     if (TrapBasedNullChecks) {
 1991       __ trap_null_check(R3_ARG1);
 1992     } else {
 1993       Label valid;
 1994       __ cmpdi(CCR0, R3_ARG1, 0);
 1995       __ bne_predict_taken(CCR0, valid);
 1996       // We have a null argument, branch to ic_miss_stub.
 1997       __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
 1998                            relocInfo::runtime_call_type);
 1999       __ bind(valid);
 2000     }
 2001   }
 2002   // Assume argument is not NULL, load klass from receiver.
 2003   __ load_klass(receiver_klass, R3_ARG1);
 2004 
 2005   if (TrapBasedICMissChecks) {
 2006     __ trap_ic_miss_check(receiver_klass, ic_klass);
 2007   } else {
 2008     Label valid;
 2009     __ cmpd(CCR0, receiver_klass, ic_klass);
 2010     __ beq_predict_taken(CCR0, valid);
 2011     // We have an unexpected klass, branch to ic_miss_stub.
 2012     __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
 2013                          relocInfo::runtime_call_type);
 2014     __ bind(valid);
 2015   }
 2016 
 2017   // Argument is valid and klass is as expected, continue.
 2018 }
 2019 
 2020 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 2021   // Variable size. Determine dynamically.
 2022   return MachNode::size(ra_);
 2023 }
 2024 
 2025 //=============================================================================
 2026 
 2027 %} // interrupt source
 2028 
 2029 source_hpp %{ // Header information of the source block.
 2030 
 2031 class HandlerImpl {
 2032 
 2033  public:
 2034 
 2035   static int emit_exception_handler(CodeBuffer &cbuf);
 2036   static int emit_deopt_handler(CodeBuffer& cbuf);
 2037 
 2038   static uint size_exception_handler() {
 2039     // The exception_handler is a b64_patchable.
 2040     return MacroAssembler::b64_patchable_size;
 2041   }
 2042 
 2043   static uint size_deopt_handler() {
 2044     // The deopt_handler is a bl64_patchable.
 2045     return MacroAssembler::bl64_patchable_size;
 2046   }
 2047 
 2048 };
 2049 
 2050 class Node::PD {
 2051 public:
 2052   enum NodeFlags {
 2053     _last_flag = Node::_last_flag
 2054   };
 2055 };
 2056 
 2057 %} // end source_hpp
 2058 
 2059 source %{
 2060 
 2061 int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
 2062   C2_MacroAssembler _masm(&cbuf);
 2063 
 2064   address base = __ start_a_stub(size_exception_handler());
 2065   if (base == NULL) return 0; // CodeBuffer::expand failed
 2066 
 2067   int offset = __ offset();
 2068   __ b64_patchable((address)OptoRuntime::exception_blob()->content_begin(),
 2069                        relocInfo::runtime_call_type);
 2070   assert(__ offset() - offset == (int)size_exception_handler(), "must be fixed size");
 2071   __ end_a_stub();
 2072 
 2073   return offset;
 2074 }
 2075 
 2076 // The deopt_handler is like the exception handler, but it calls to
 2077 // the deoptimization blob instead of jumping to the exception blob.
 2078 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
 2079   C2_MacroAssembler _masm(&cbuf);
 2080 
 2081   address base = __ start_a_stub(size_deopt_handler());
 2082   if (base == NULL) return 0; // CodeBuffer::expand failed
 2083 
 2084   int offset = __ offset();
 2085   __ bl64_patchable((address)SharedRuntime::deopt_blob()->unpack(),
 2086                         relocInfo::runtime_call_type);
 2087   assert(__ offset() - offset == (int) size_deopt_handler(), "must be fixed size");
 2088   __ end_a_stub();
 2089 
 2090   return offset;
 2091 }
 2092 
 2093 //=============================================================================
 2094 
 2095 // Use a frame slots bias for frameless methods if accessing the stack.
 2096 static int frame_slots_bias(int reg_enc, PhaseRegAlloc* ra_) {
 2097   if (as_Register(reg_enc) == R1_SP) {
 2098     return 0; // TODO: PPC port ra_->C->frame_slots_sp_bias_in_bytes();
 2099   }
 2100   return 0;
 2101 }
 2102 
 2103 const bool Matcher::match_rule_supported(int opcode) {
 2104   if (!has_match_rule(opcode)) {
 2105     return false; // no match rule present
 2106   }
 2107 
 2108   switch (opcode) {
 2109     case Op_SqrtD:
 2110       return VM_Version::has_fsqrt();
 2111     case Op_RoundDoubleMode:
 2112       return VM_Version::has_vsx();
 2113     case Op_CountLeadingZerosI:
 2114     case Op_CountLeadingZerosL:
 2115       return UseCountLeadingZerosInstructionsPPC64;
 2116     case Op_CountTrailingZerosI:
 2117     case Op_CountTrailingZerosL:
 2118       return (UseCountLeadingZerosInstructionsPPC64 || UseCountTrailingZerosInstructionsPPC64);
 2119     case Op_PopCountI:
 2120     case Op_PopCountL:
 2121       return (UsePopCountInstruction && VM_Version::has_popcntw());
 2122 
 2123     case Op_AddVB:
 2124     case Op_AddVS:
 2125     case Op_AddVI:
 2126     case Op_AddVF:
 2127     case Op_AddVD:
 2128     case Op_SubVB:
 2129     case Op_SubVS:
 2130     case Op_SubVI:
 2131     case Op_SubVF:
 2132     case Op_SubVD:
 2133     case Op_MulVS:
 2134     case Op_MulVF:
 2135     case Op_MulVD:
 2136     case Op_DivVF:
 2137     case Op_DivVD:
 2138     case Op_AbsVF:
 2139     case Op_AbsVD:
 2140     case Op_NegVF:
 2141     case Op_NegVD:
 2142     case Op_SqrtVF:
 2143     case Op_SqrtVD:
 2144     case Op_AddVL:
 2145     case Op_SubVL:
 2146     case Op_MulVI:
 2147     case Op_RoundDoubleModeV:
 2148       return SuperwordUseVSX;
 2149     case Op_PopCountVI:
 2150       return (SuperwordUseVSX && UsePopCountInstruction);
 2151     case Op_FmaVF:
 2152     case Op_FmaVD:
 2153       return (SuperwordUseVSX && UseFMA);
 2154 
 2155     case Op_Digit:
 2156       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isDigit);
 2157     case Op_LowerCase:
 2158       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isLowerCase);
 2159     case Op_UpperCase:
 2160       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isUpperCase);
 2161     case Op_Whitespace:
 2162       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isWhitespace);
 2163 
 2164     case Op_CacheWB:
 2165     case Op_CacheWBPreSync:
 2166     case Op_CacheWBPostSync:
 2167       return VM_Version::supports_data_cache_line_flush();
 2168   }
 2169 
 2170   return true; // Per default match rules are supported.
 2171 }
 2172 
 2173 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 2174   if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
 2175     return false;
 2176   }
 2177   return true; // Per default match rules are supported.
 2178 }
 2179 
 2180 const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 2181   return false;
 2182 }
 2183 
 2184 const RegMask* Matcher::predicate_reg_mask(void) {
 2185   return NULL;
 2186 }
 2187 
 2188 const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
 2189   return NULL;
 2190 }
 2191 
 2192 // Vector calling convention not yet implemented.
 2193 const bool Matcher::supports_vector_calling_convention(void) {
 2194   return false;
 2195 }
 2196 
 2197 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2198   Unimplemented();
 2199   return OptoRegPair(0, 0);
 2200 }
 2201 
 2202 // Vector width in bytes.
 2203 const int Matcher::vector_width_in_bytes(BasicType bt) {
 2204   if (SuperwordUseVSX) {
 2205     assert(MaxVectorSize == 16, "");
 2206     return 16;
 2207   } else {
 2208     assert(MaxVectorSize == 8, "");
 2209     return 8;
 2210   }
 2211 }
 2212 
 2213 // Vector ideal reg.
 2214 const uint Matcher::vector_ideal_reg(int size) {
 2215   if (SuperwordUseVSX) {
 2216     assert(MaxVectorSize == 16 && size == 16, "");
 2217     return Op_VecX;
 2218   } else {
 2219     assert(MaxVectorSize == 8 && size == 8, "");
 2220     return Op_RegL;
 2221   }
 2222 }
 2223 
 2224 // Limits on vector size (number of elements) loaded into vector.
 2225 const int Matcher::max_vector_size(const BasicType bt) {
 2226   assert(is_java_primitive(bt), "only primitive type vectors");
 2227   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 2228 }
 2229 
 2230 const int Matcher::min_vector_size(const BasicType bt) {
 2231   return max_vector_size(bt); // Same as max.
 2232 }
 2233 
 2234 const int Matcher::scalable_vector_reg_size(const BasicType bt) {
 2235   return -1;
 2236 }
 2237 
 2238 // RETURNS: whether this branch offset is short enough that a short
 2239 // branch can be used.
 2240 //
 2241 // If the platform does not provide any short branch variants, then
 2242 // this method should return `false' for offset 0.
 2243 //
 2244 // `Compile::Fill_buffer' will decide on basis of this information
 2245 // whether to do the pass `Compile::Shorten_branches' at all.
 2246 //
 2247 // And `Compile::Shorten_branches' will decide on basis of this
 2248 // information whether to replace particular branch sites by short
 2249 // ones.
 2250 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2251   // Is the offset within the range of a ppc64 pc relative branch?
 2252   bool b;
 2253 
 2254   const int safety_zone = 3 * BytesPerInstWord;
 2255   b = Assembler::is_simm((offset<0 ? offset-safety_zone : offset+safety_zone),
 2256                          29 - 16 + 1 + 2);
 2257   return b;
 2258 }
 2259 
 2260 /* TODO: PPC port
 2261 // Make a new machine dependent decode node (with its operands).
 2262 MachTypeNode *Matcher::make_decode_node() {
 2263   assert(CompressedOops::base() == NULL && CompressedOops::shift() == 0,
 2264          "This method is only implemented for unscaled cOops mode so far");
 2265   MachTypeNode *decode = new decodeN_unscaledNode();
 2266   decode->set_opnd_array(0, new iRegPdstOper());
 2267   decode->set_opnd_array(1, new iRegNsrcOper());
 2268   return decode;
 2269 }
 2270 */
 2271 
 2272 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
 2273   ShouldNotReachHere(); // generic vector operands not supported
 2274   return NULL;
 2275 }
 2276 
 2277 bool Matcher::is_reg2reg_move(MachNode* m) {
 2278   ShouldNotReachHere();  // generic vector operands not supported
 2279   return false;
 2280 }
 2281 
 2282 bool Matcher::is_generic_vector(MachOper* opnd)  {
 2283   ShouldNotReachHere();  // generic vector operands not supported
 2284   return false;
 2285 }
 2286 
 2287 // Constants for c2c and c calling conventions.
 2288 
 2289 const MachRegisterNumbers iarg_reg[8] = {
 2290   R3_num, R4_num, R5_num, R6_num,
 2291   R7_num, R8_num, R9_num, R10_num
 2292 };
 2293 
 2294 const MachRegisterNumbers farg_reg[13] = {
 2295   F1_num, F2_num, F3_num, F4_num,
 2296   F5_num, F6_num, F7_num, F8_num,
 2297   F9_num, F10_num, F11_num, F12_num,
 2298   F13_num
 2299 };
 2300 
 2301 const MachRegisterNumbers vsarg_reg[64] = {
 2302   VSR0_num, VSR1_num, VSR2_num, VSR3_num,
 2303   VSR4_num, VSR5_num, VSR6_num, VSR7_num,
 2304   VSR8_num, VSR9_num, VSR10_num, VSR11_num,
 2305   VSR12_num, VSR13_num, VSR14_num, VSR15_num,
 2306   VSR16_num, VSR17_num, VSR18_num, VSR19_num,
 2307   VSR20_num, VSR21_num, VSR22_num, VSR23_num,
 2308   VSR24_num, VSR23_num, VSR24_num, VSR25_num,
 2309   VSR28_num, VSR29_num, VSR30_num, VSR31_num,
 2310   VSR32_num, VSR33_num, VSR34_num, VSR35_num,
 2311   VSR36_num, VSR37_num, VSR38_num, VSR39_num,
 2312   VSR40_num, VSR41_num, VSR42_num, VSR43_num,
 2313   VSR44_num, VSR45_num, VSR46_num, VSR47_num,
 2314   VSR48_num, VSR49_num, VSR50_num, VSR51_num,
 2315   VSR52_num, VSR53_num, VSR54_num, VSR55_num,
 2316   VSR56_num, VSR57_num, VSR58_num, VSR59_num,
 2317   VSR60_num, VSR61_num, VSR62_num, VSR63_num
 2318 };
 2319 
 2320 const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);
 2321 
 2322 const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);
 2323 
 2324 const int num_vsarg_registers = sizeof(vsarg_reg) / sizeof(vsarg_reg[0]);
 2325 
 2326 // Return whether or not this register is ever used as an argument. This
 2327 // function is used on startup to build the trampoline stubs in generateOptoStub.
 2328 // Registers not mentioned will be killed by the VM call in the trampoline, and
 2329 // arguments in those registers not be available to the callee.
 2330 bool Matcher::can_be_java_arg(int reg) {
 2331   // We return true for all registers contained in iarg_reg[] and
 2332   // farg_reg[] and their virtual halves.
 2333   // We must include the virtual halves in order to get STDs and LDs
 2334   // instead of STWs and LWs in the trampoline stubs.
 2335 
 2336   if (   reg == R3_num  || reg == R3_H_num
 2337       || reg == R4_num  || reg == R4_H_num
 2338       || reg == R5_num  || reg == R5_H_num
 2339       || reg == R6_num  || reg == R6_H_num
 2340       || reg == R7_num  || reg == R7_H_num
 2341       || reg == R8_num  || reg == R8_H_num
 2342       || reg == R9_num  || reg == R9_H_num
 2343       || reg == R10_num || reg == R10_H_num)
 2344     return true;
 2345 
 2346   if (   reg == F1_num  || reg == F1_H_num
 2347       || reg == F2_num  || reg == F2_H_num
 2348       || reg == F3_num  || reg == F3_H_num
 2349       || reg == F4_num  || reg == F4_H_num
 2350       || reg == F5_num  || reg == F5_H_num
 2351       || reg == F6_num  || reg == F6_H_num
 2352       || reg == F7_num  || reg == F7_H_num
 2353       || reg == F8_num  || reg == F8_H_num
 2354       || reg == F9_num  || reg == F9_H_num
 2355       || reg == F10_num || reg == F10_H_num
 2356       || reg == F11_num || reg == F11_H_num
 2357       || reg == F12_num || reg == F12_H_num
 2358       || reg == F13_num || reg == F13_H_num)
 2359     return true;
 2360 
 2361   return false;
 2362 }
 2363 
 2364 bool Matcher::is_spillable_arg(int reg) {
 2365   return can_be_java_arg(reg);
 2366 }
 2367 
 2368 uint Matcher::int_pressure_limit()
 2369 {
 2370   return (INTPRESSURE == -1) ? 26 : INTPRESSURE;
 2371 }
 2372 
 2373 uint Matcher::float_pressure_limit()
 2374 {
 2375   return (FLOATPRESSURE == -1) ? 28 : FLOATPRESSURE;
 2376 }
 2377 
 2378 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
 2379   return false;
 2380 }
 2381 
 2382 // Register for DIVI projection of divmodI.
 2383 RegMask Matcher::divI_proj_mask() {
 2384   ShouldNotReachHere();
 2385   return RegMask();
 2386 }
 2387 
 2388 // Register for MODI projection of divmodI.
 2389 RegMask Matcher::modI_proj_mask() {
 2390   ShouldNotReachHere();
 2391   return RegMask();
 2392 }
 2393 
 2394 // Register for DIVL projection of divmodL.
 2395 RegMask Matcher::divL_proj_mask() {
 2396   ShouldNotReachHere();
 2397   return RegMask();
 2398 }
 2399 
 2400 // Register for MODL projection of divmodL.
 2401 RegMask Matcher::modL_proj_mask() {
 2402   ShouldNotReachHere();
 2403   return RegMask();
 2404 }
 2405 
 2406 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 2407   return RegMask();
 2408 }
 2409 
 2410 %}
 2411 
 2412 //----------ENCODING BLOCK-----------------------------------------------------
 2413 // This block specifies the encoding classes used by the compiler to output
 2414 // byte streams. Encoding classes are parameterized macros used by
 2415 // Machine Instruction Nodes in order to generate the bit encoding of the
 2416 // instruction. Operands specify their base encoding interface with the
 2417 // interface keyword. There are currently supported four interfaces,
 2418 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
 2419 // operand to generate a function which returns its register number when
 2420 // queried. CONST_INTER causes an operand to generate a function which
 2421 // returns the value of the constant when queried. MEMORY_INTER causes an
 2422 // operand to generate four functions which return the Base Register, the
 2423 // Index Register, the Scale Value, and the Offset Value of the operand when
 2424 // queried. COND_INTER causes an operand to generate six functions which
 2425 // return the encoding code (ie - encoding bits for the instruction)
 2426 // associated with each basic boolean condition for a conditional instruction.
 2427 //
 2428 // Instructions specify two basic values for encoding. Again, a function
 2429 // is available to check if the constant displacement is an oop. They use the
 2430 // ins_encode keyword to specify their encoding classes (which must be
 2431 // a sequence of enc_class names, and their parameters, specified in
 2432 // the encoding block), and they use the
 2433 // opcode keyword to specify, in order, their primary, secondary, and
 2434 // tertiary opcode. Only the opcode sections which a particular instruction
 2435 // needs for encoding need to be specified.
 2436 encode %{
 2437   enc_class enc_unimplemented %{
 2438     C2_MacroAssembler _masm(&cbuf);
 2439     __ unimplemented("Unimplemented mach node encoding in AD file.", 13);
 2440   %}
 2441 
 2442   enc_class enc_untested %{
 2443 #ifdef ASSERT
 2444     C2_MacroAssembler _masm(&cbuf);
 2445     __ untested("Untested mach node encoding in AD file.");
 2446 #else
 2447 #endif
 2448   %}
 2449 
 2450   enc_class enc_lbz(iRegIdst dst, memory mem) %{
 2451     C2_MacroAssembler _masm(&cbuf);
 2452     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2453     __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
 2454   %}
 2455 
 2456   // Load acquire.
 2457   enc_class enc_lbz_ac(iRegIdst dst, memory mem) %{
 2458     C2_MacroAssembler _masm(&cbuf);
 2459     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2460     __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
 2461     __ twi_0($dst$$Register);
 2462     __ isync();
 2463   %}
 2464 
 2465   enc_class enc_lhz(iRegIdst dst, memory mem) %{
 2466 
 2467     C2_MacroAssembler _masm(&cbuf);
 2468     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2469     __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
 2470   %}
 2471 
 2472   // Load acquire.
 2473   enc_class enc_lhz_ac(iRegIdst dst, memory mem) %{
 2474 
 2475     C2_MacroAssembler _masm(&cbuf);
 2476     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2477     __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
 2478     __ twi_0($dst$$Register);
 2479     __ isync();
 2480   %}
 2481 
 2482   enc_class enc_lwz(iRegIdst dst, memory mem) %{
 2483 
 2484     C2_MacroAssembler _masm(&cbuf);
 2485     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2486     __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
 2487   %}
 2488 
 2489   // Load acquire.
 2490   enc_class enc_lwz_ac(iRegIdst dst, memory mem) %{
 2491 
 2492     C2_MacroAssembler _masm(&cbuf);
 2493     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2494     __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
 2495     __ twi_0($dst$$Register);
 2496     __ isync();
 2497   %}
 2498 
 2499   enc_class enc_ld(iRegLdst dst, memoryAlg4 mem) %{
 2500     C2_MacroAssembler _masm(&cbuf);
 2501     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2502     // Operand 'ds' requires 4-alignment.
 2503     assert((Idisp & 0x3) == 0, "unaligned offset");
 2504     __ ld($dst$$Register, Idisp, $mem$$base$$Register);
 2505   %}
 2506 
 2507   // Load acquire.
 2508   enc_class enc_ld_ac(iRegLdst dst, memoryAlg4 mem) %{
 2509     C2_MacroAssembler _masm(&cbuf);
 2510     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2511     // Operand 'ds' requires 4-alignment.
 2512     assert((Idisp & 0x3) == 0, "unaligned offset");
 2513     __ ld($dst$$Register, Idisp, $mem$$base$$Register);
 2514     __ twi_0($dst$$Register);
 2515     __ isync();
 2516   %}
 2517 
 2518   enc_class enc_lfd(RegF dst, memory mem) %{
 2519     C2_MacroAssembler _masm(&cbuf);
 2520     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2521     __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 2522   %}
 2523 
 2524   enc_class enc_load_long_constL(iRegLdst dst, immL src, iRegLdst toc) %{
 2525 
 2526     C2_MacroAssembler _masm(&cbuf);
 2527     int toc_offset = 0;
 2528 
 2529     address const_toc_addr;
 2530     // Create a non-oop constant, no relocation needed.
 2531     // If it is an IC, it has a virtual_call_Relocation.
 2532     const_toc_addr = __ long_constant((jlong)$src$$constant);
 2533     if (const_toc_addr == NULL) {
 2534       ciEnv::current()->record_out_of_memory_failure();
 2535       return;
 2536     }
 2537 
 2538     // Get the constant's TOC offset.
 2539     toc_offset = __ offset_to_method_toc(const_toc_addr);
 2540 
 2541     // Keep the current instruction offset in mind.
 2542     ((loadConLNode*)this)->_cbuf_insts_offset = __ offset();
 2543 
 2544     __ ld($dst$$Register, toc_offset, $toc$$Register);
 2545   %}
 2546 
 2547   enc_class enc_load_long_constL_hi(iRegLdst dst, iRegLdst toc, immL src) %{
 2548 
 2549     C2_MacroAssembler _masm(&cbuf);
 2550 
 2551     if (!ra_->C->output()->in_scratch_emit_size()) {
 2552       address const_toc_addr;
 2553       // Create a non-oop constant, no relocation needed.
 2554       // If it is an IC, it has a virtual_call_Relocation.
 2555       const_toc_addr = __ long_constant((jlong)$src$$constant);
 2556       if (const_toc_addr == NULL) {
 2557         ciEnv::current()->record_out_of_memory_failure();
 2558         return;
 2559       }
 2560 
 2561       // Get the constant's TOC offset.
 2562       const int toc_offset = __ offset_to_method_toc(const_toc_addr);
 2563       // Store the toc offset of the constant.
 2564       ((loadConL_hiNode*)this)->_const_toc_offset = toc_offset;
 2565 
 2566       // Also keep the current instruction offset in mind.
 2567       ((loadConL_hiNode*)this)->_cbuf_insts_offset = __ offset();
 2568     }
 2569 
 2570     __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset));
 2571   %}
 2572 
 2573 %} // encode
 2574 
 2575 source %{
 2576 
 2577 typedef struct {
 2578   loadConL_hiNode *_large_hi;
 2579   loadConL_loNode *_large_lo;
 2580   loadConLNode    *_small;
 2581   MachNode        *_last;
 2582 } loadConLNodesTuple;
 2583 
 2584 loadConLNodesTuple loadConLNodesTuple_create(PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
 2585                                              OptoReg::Name reg_second, OptoReg::Name reg_first) {
 2586   loadConLNodesTuple nodes;
 2587 
 2588   const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2589   if (large_constant_pool) {
 2590     // Create new nodes.
 2591     loadConL_hiNode *m1 = new loadConL_hiNode();
 2592     loadConL_loNode *m2 = new loadConL_loNode();
 2593 
 2594     // inputs for new nodes
 2595     m1->add_req(NULL, toc);
 2596     m2->add_req(NULL, m1);
 2597 
 2598     // operands for new nodes
 2599     m1->_opnds[0] = new iRegLdstOper(); // dst
 2600     m1->_opnds[1] = immSrc;             // src
 2601     m1->_opnds[2] = new iRegPdstOper(); // toc
 2602     m2->_opnds[0] = new iRegLdstOper(); // dst
 2603     m2->_opnds[1] = immSrc;             // src
 2604     m2->_opnds[2] = new iRegLdstOper(); // base
 2605 
 2606     // Initialize ins_attrib TOC fields.
 2607     m1->_const_toc_offset = -1;
 2608     m2->_const_toc_offset_hi_node = m1;
 2609 
 2610     // Initialize ins_attrib instruction offset.
 2611     m1->_cbuf_insts_offset = -1;
 2612 
 2613     // register allocation for new nodes
 2614     ra_->set_pair(m1->_idx, reg_second, reg_first);
 2615     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2616 
 2617     // Create result.
 2618     nodes._large_hi = m1;
 2619     nodes._large_lo = m2;
 2620     nodes._small = NULL;
 2621     nodes._last = nodes._large_lo;
 2622     assert(m2->bottom_type()->isa_long(), "must be long");
 2623   } else {
 2624     loadConLNode *m2 = new loadConLNode();
 2625 
 2626     // inputs for new nodes
 2627     m2->add_req(NULL, toc);
 2628 
 2629     // operands for new nodes
 2630     m2->_opnds[0] = new iRegLdstOper(); // dst
 2631     m2->_opnds[1] = immSrc;             // src
 2632     m2->_opnds[2] = new iRegPdstOper(); // toc
 2633 
 2634     // Initialize ins_attrib instruction offset.
 2635     m2->_cbuf_insts_offset = -1;
 2636 
 2637     // register allocation for new nodes
 2638     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2639 
 2640     // Create result.
 2641     nodes._large_hi = NULL;
 2642     nodes._large_lo = NULL;
 2643     nodes._small = m2;
 2644     nodes._last = nodes._small;
 2645     assert(m2->bottom_type()->isa_long(), "must be long");
 2646   }
 2647 
 2648   return nodes;
 2649 }
 2650 
 2651 typedef struct {
 2652   loadConL_hiNode *_large_hi;
 2653   loadConL_loNode *_large_lo;
 2654   mtvsrdNode      *_moved;
 2655   xxspltdNode     *_replicated;
 2656   loadConLNode    *_small;
 2657   MachNode        *_last;
 2658 } loadConLReplicatedNodesTuple;
 2659 
 2660 loadConLReplicatedNodesTuple loadConLReplicatedNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
 2661                                                  vecXOper *dst, immI_0Oper *zero,
 2662                                                  OptoReg::Name reg_second, OptoReg::Name reg_first,
 2663                                                  OptoReg::Name reg_vec_second, OptoReg::Name reg_vec_first) {
 2664   loadConLReplicatedNodesTuple nodes;
 2665 
 2666   const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2667   if (large_constant_pool) {
 2668     // Create new nodes.
 2669     loadConL_hiNode *m1 = new  loadConL_hiNode();
 2670     loadConL_loNode *m2 = new  loadConL_loNode();
 2671     mtvsrdNode *m3 = new  mtvsrdNode();
 2672     xxspltdNode *m4 = new  xxspltdNode();
 2673 
 2674     // inputs for new nodes
 2675     m1->add_req(NULL, toc);
 2676     m2->add_req(NULL, m1);
 2677     m3->add_req(NULL, m2);
 2678     m4->add_req(NULL, m3);
 2679 
 2680     // operands for new nodes
 2681     m1->_opnds[0] = new  iRegLdstOper(); // dst
 2682     m1->_opnds[1] = immSrc;              // src
 2683     m1->_opnds[2] = new  iRegPdstOper(); // toc
 2684 
 2685     m2->_opnds[0] = new  iRegLdstOper(); // dst
 2686     m2->_opnds[1] = immSrc;              // src
 2687     m2->_opnds[2] = new  iRegLdstOper(); // base
 2688 
 2689     m3->_opnds[0] = new  vecXOper();     // dst
 2690     m3->_opnds[1] = new  iRegLdstOper(); // src
 2691 
 2692     m4->_opnds[0] = new  vecXOper();     // dst
 2693     m4->_opnds[1] = new  vecXOper();     // src
 2694     m4->_opnds[2] = zero;
 2695 
 2696     // Initialize ins_attrib TOC fields.
 2697     m1->_const_toc_offset = -1;
 2698     m2->_const_toc_offset_hi_node = m1;
 2699 
 2700     // Initialize ins_attrib instruction offset.
 2701     m1->_cbuf_insts_offset = -1;
 2702 
 2703     // register allocation for new nodes
 2704     ra_->set_pair(m1->_idx, reg_second, reg_first);
 2705     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2706     ra_->set1(m3->_idx, reg_second);
 2707     ra_->set2(m3->_idx, reg_vec_first);
 2708     ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
 2709 
 2710     // Create result.
 2711     nodes._large_hi = m1;
 2712     nodes._large_lo = m2;
 2713     nodes._moved = m3;
 2714     nodes._replicated = m4;
 2715     nodes._small = NULL;
 2716     nodes._last = nodes._replicated;
 2717     assert(m2->bottom_type()->isa_long(), "must be long");
 2718   } else {
 2719     loadConLNode *m2 = new  loadConLNode();
 2720     mtvsrdNode *m3 = new  mtvsrdNode();
 2721     xxspltdNode *m4 = new  xxspltdNode();
 2722 
 2723     // inputs for new nodes
 2724     m2->add_req(NULL, toc);
 2725 
 2726     // operands for new nodes
 2727     m2->_opnds[0] = new  iRegLdstOper(); // dst
 2728     m2->_opnds[1] = immSrc;              // src
 2729     m2->_opnds[2] = new  iRegPdstOper(); // toc
 2730 
 2731     m3->_opnds[0] = new  vecXOper();     // dst
 2732     m3->_opnds[1] = new  iRegLdstOper(); // src
 2733 
 2734     m4->_opnds[0] = new  vecXOper();     // dst
 2735     m4->_opnds[1] = new  vecXOper();     // src
 2736     m4->_opnds[2] = zero;
 2737 
 2738     // Initialize ins_attrib instruction offset.
 2739     m2->_cbuf_insts_offset = -1;
 2740     ra_->set1(m3->_idx, reg_second);
 2741     ra_->set2(m3->_idx, reg_vec_first);
 2742     ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
 2743 
 2744     // register allocation for new nodes
 2745     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2746 
 2747     // Create result.
 2748     nodes._large_hi = NULL;
 2749     nodes._large_lo = NULL;
 2750     nodes._small = m2;
 2751     nodes._moved = m3;
 2752     nodes._replicated = m4;
 2753     nodes._last = nodes._replicated;
 2754     assert(m2->bottom_type()->isa_long(), "must be long");
 2755   }
 2756 
 2757   return nodes;
 2758 }
 2759 
 2760 %} // source
 2761 
 2762 encode %{
 2763   // Postalloc expand emitter for loading a long constant from the method's TOC.
 2764   // Enc_class needed as consttanttablebase is not supported by postalloc
 2765   // expand.
 2766   enc_class postalloc_expand_load_long_constant(iRegLdst dst, immL src, iRegLdst toc) %{
 2767     // Create new nodes.
 2768     loadConLNodesTuple loadConLNodes =
 2769       loadConLNodesTuple_create(ra_, n_toc, op_src,
 2770                                 ra_->get_reg_second(this), ra_->get_reg_first(this));
 2771 
 2772     // Push new nodes.
 2773     if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi);
 2774     if (loadConLNodes._last)     nodes->push(loadConLNodes._last);
 2775 
 2776     // some asserts
 2777     assert(nodes->length() >= 1, "must have created at least 1 node");
 2778     assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
 2779   %}
 2780 
 2781   enc_class enc_load_long_constP(iRegLdst dst, immP src, iRegLdst toc) %{
 2782 
 2783     C2_MacroAssembler _masm(&cbuf);
 2784     int toc_offset = 0;
 2785 
 2786     intptr_t val = $src$$constant;
 2787     relocInfo::relocType constant_reloc = $src->constant_reloc();  // src
 2788     address const_toc_addr;
 2789     if (constant_reloc == relocInfo::oop_type) {
 2790       // Create an oop constant and a corresponding relocation.
 2791       AddressLiteral a = __ allocate_oop_address((jobject)val);
 2792       const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2793       __ relocate(a.rspec());
 2794     } else if (constant_reloc == relocInfo::metadata_type) {
 2795       AddressLiteral a = __ constant_metadata_address((Metadata *)val);
 2796       const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2797       __ relocate(a.rspec());
 2798     } else {
 2799       // Create a non-oop constant, no relocation needed.
 2800       const_toc_addr = __ long_constant((jlong)$src$$constant);
 2801     }
 2802 
 2803     if (const_toc_addr == NULL) {
 2804       ciEnv::current()->record_out_of_memory_failure();
 2805       return;
 2806     }
 2807     // Get the constant's TOC offset.
 2808     toc_offset = __ offset_to_method_toc(const_toc_addr);
 2809 
 2810     __ ld($dst$$Register, toc_offset, $toc$$Register);
 2811   %}
 2812 
 2813   enc_class enc_load_long_constP_hi(iRegLdst dst, immP src, iRegLdst toc) %{
 2814 
 2815     C2_MacroAssembler _masm(&cbuf);
 2816     if (!ra_->C->output()->in_scratch_emit_size()) {
 2817       intptr_t val = $src$$constant;
 2818       relocInfo::relocType constant_reloc = $src->constant_reloc();  // src
 2819       address const_toc_addr;
 2820       if (constant_reloc == relocInfo::oop_type) {
 2821         // Create an oop constant and a corresponding relocation.
 2822         AddressLiteral a = __ allocate_oop_address((jobject)val);
 2823         const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2824         __ relocate(a.rspec());
 2825       } else if (constant_reloc == relocInfo::metadata_type) {
 2826         AddressLiteral a = __ constant_metadata_address((Metadata *)val);
 2827         const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2828         __ relocate(a.rspec());
 2829       } else {  // non-oop pointers, e.g. card mark base, heap top
 2830         // Create a non-oop constant, no relocation needed.
 2831         const_toc_addr = __ long_constant((jlong)$src$$constant);
 2832       }
 2833 
 2834       if (const_toc_addr == NULL) {
 2835         ciEnv::current()->record_out_of_memory_failure();
 2836         return;
 2837       }
 2838       // Get the constant's TOC offset.
 2839       const int toc_offset = __ offset_to_method_toc(const_toc_addr);
 2840       // Store the toc offset of the constant.
 2841       ((loadConP_hiNode*)this)->_const_toc_offset = toc_offset;
 2842     }
 2843 
 2844     __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset));
 2845   %}
 2846 
 2847   // Postalloc expand emitter for loading a ptr constant from the method's TOC.
 2848   // Enc_class needed as consttanttablebase is not supported by postalloc
 2849   // expand.
 2850   enc_class postalloc_expand_load_ptr_constant(iRegPdst dst, immP src, iRegLdst toc) %{
 2851     const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2852     if (large_constant_pool) {
 2853       // Create new nodes.
 2854       loadConP_hiNode *m1 = new loadConP_hiNode();
 2855       loadConP_loNode *m2 = new loadConP_loNode();
 2856 
 2857       // inputs for new nodes
 2858       m1->add_req(NULL, n_toc);
 2859       m2->add_req(NULL, m1);
 2860 
 2861       // operands for new nodes
 2862       m1->_opnds[0] = new iRegPdstOper(); // dst
 2863       m1->_opnds[1] = op_src;             // src
 2864       m1->_opnds[2] = new iRegPdstOper(); // toc
 2865       m2->_opnds[0] = new iRegPdstOper(); // dst
 2866       m2->_opnds[1] = op_src;             // src
 2867       m2->_opnds[2] = new iRegLdstOper(); // base
 2868 
 2869       // Initialize ins_attrib TOC fields.
 2870       m1->_const_toc_offset = -1;
 2871       m2->_const_toc_offset_hi_node = m1;
 2872 
 2873       // Register allocation for new nodes.
 2874       ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2875       ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2876 
 2877       nodes->push(m1);
 2878       nodes->push(m2);
 2879       assert(m2->bottom_type()->isa_ptr(), "must be ptr");
 2880     } else {
 2881       loadConPNode *m2 = new loadConPNode();
 2882 
 2883       // inputs for new nodes
 2884       m2->add_req(NULL, n_toc);
 2885 
 2886       // operands for new nodes
 2887       m2->_opnds[0] = new iRegPdstOper(); // dst
 2888       m2->_opnds[1] = op_src;             // src
 2889       m2->_opnds[2] = new iRegPdstOper(); // toc
 2890 
 2891       // Register allocation for new nodes.
 2892       ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2893 
 2894       nodes->push(m2);
 2895       assert(m2->bottom_type()->isa_ptr(), "must be ptr");
 2896     }
 2897   %}
 2898 
 2899   // Enc_class needed as consttanttablebase is not supported by postalloc
 2900   // expand.
 2901   enc_class postalloc_expand_load_float_constant(regF dst, immF src, iRegLdst toc) %{
 2902     bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2903 
 2904     MachNode *m2;
 2905     if (large_constant_pool) {
 2906       m2 = new loadConFCompNode();
 2907     } else {
 2908       m2 = new loadConFNode();
 2909     }
 2910     // inputs for new nodes
 2911     m2->add_req(NULL, n_toc);
 2912 
 2913     // operands for new nodes
 2914     m2->_opnds[0] = op_dst;
 2915     m2->_opnds[1] = op_src;
 2916     m2->_opnds[2] = new iRegPdstOper(); // constanttablebase
 2917 
 2918     // register allocation for new nodes
 2919     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2920     nodes->push(m2);
 2921   %}
 2922 
 2923   // Enc_class needed as consttanttablebase is not supported by postalloc
 2924   // expand.
 2925   enc_class postalloc_expand_load_double_constant(regD dst, immD src, iRegLdst toc) %{
 2926     bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2927 
 2928     MachNode *m2;
 2929     if (large_constant_pool) {
 2930       m2 = new loadConDCompNode();
 2931     } else {
 2932       m2 = new loadConDNode();
 2933     }
 2934     // inputs for new nodes
 2935     m2->add_req(NULL, n_toc);
 2936 
 2937     // operands for new nodes
 2938     m2->_opnds[0] = op_dst;
 2939     m2->_opnds[1] = op_src;
 2940     m2->_opnds[2] = new iRegPdstOper(); // constanttablebase
 2941 
 2942     // register allocation for new nodes
 2943     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2944     nodes->push(m2);
 2945   %}
 2946 
 2947   enc_class enc_stw(iRegIsrc src, memory mem) %{
 2948     C2_MacroAssembler _masm(&cbuf);
 2949     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2950     __ stw($src$$Register, Idisp, $mem$$base$$Register);
 2951   %}
 2952 
 2953   enc_class enc_std(iRegIsrc src, memoryAlg4 mem) %{
 2954     C2_MacroAssembler _masm(&cbuf);
 2955     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2956     // Operand 'ds' requires 4-alignment.
 2957     assert((Idisp & 0x3) == 0, "unaligned offset");
 2958     __ std($src$$Register, Idisp, $mem$$base$$Register);
 2959   %}
 2960 
 2961   enc_class enc_stfs(RegF src, memory mem) %{
 2962     C2_MacroAssembler _masm(&cbuf);
 2963     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2964     __ stfs($src$$FloatRegister, Idisp, $mem$$base$$Register);
 2965   %}
 2966 
 2967   enc_class enc_stfd(RegF src, memory mem) %{
 2968     C2_MacroAssembler _masm(&cbuf);
 2969     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2970     __ stfd($src$$FloatRegister, Idisp, $mem$$base$$Register);
 2971   %}
 2972 
 2973   enc_class postalloc_expand_encode_oop(iRegNdst dst, iRegPdst src, flagsReg crx) %{
 2974 
 2975     if (VM_Version::has_isel()) {
 2976       // use isel instruction with Power 7
 2977       cmpP_reg_imm16Node *n_compare  = new cmpP_reg_imm16Node();
 2978       encodeP_subNode    *n_sub_base = new encodeP_subNode();
 2979       encodeP_shiftNode  *n_shift    = new encodeP_shiftNode();
 2980       cond_set_0_oopNode *n_cond_set = new cond_set_0_oopNode();
 2981 
 2982       n_compare->add_req(n_region, n_src);
 2983       n_compare->_opnds[0] = op_crx;
 2984       n_compare->_opnds[1] = op_src;
 2985       n_compare->_opnds[2] = new immL16Oper(0);
 2986 
 2987       n_sub_base->add_req(n_region, n_src);
 2988       n_sub_base->_opnds[0] = op_dst;
 2989       n_sub_base->_opnds[1] = op_src;
 2990       n_sub_base->_bottom_type = _bottom_type;
 2991 
 2992       n_shift->add_req(n_region, n_sub_base);
 2993       n_shift->_opnds[0] = op_dst;
 2994       n_shift->_opnds[1] = op_dst;
 2995       n_shift->_bottom_type = _bottom_type;
 2996 
 2997       n_cond_set->add_req(n_region, n_compare, n_shift);
 2998       n_cond_set->_opnds[0] = op_dst;
 2999       n_cond_set->_opnds[1] = op_crx;
 3000       n_cond_set->_opnds[2] = op_dst;
 3001       n_cond_set->_bottom_type = _bottom_type;
 3002 
 3003       ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 3004       ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3005       ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3006       ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3007 
 3008       nodes->push(n_compare);
 3009       nodes->push(n_sub_base);
 3010       nodes->push(n_shift);
 3011       nodes->push(n_cond_set);
 3012 
 3013     } else {
 3014       // before Power 7
 3015       moveRegNode        *n_move     = new moveRegNode();
 3016       cmpP_reg_imm16Node *n_compare  = new cmpP_reg_imm16Node();
 3017       encodeP_shiftNode  *n_shift    = new encodeP_shiftNode();
 3018       cond_sub_baseNode  *n_sub_base = new cond_sub_baseNode();
 3019 
 3020       n_move->add_req(n_region, n_src);
 3021       n_move->_opnds[0] = op_dst;
 3022       n_move->_opnds[1] = op_src;
 3023       ra_->set_oop(n_move, true); // Until here, 'n_move' still produces an oop.
 3024 
 3025       n_compare->add_req(n_region, n_src);
 3026       n_compare->add_prec(n_move);
 3027 
 3028       n_compare->_opnds[0] = op_crx;
 3029       n_compare->_opnds[1] = op_src;
 3030       n_compare->_opnds[2] = new immL16Oper(0);
 3031 
 3032       n_sub_base->add_req(n_region, n_compare, n_src);
 3033       n_sub_base->_opnds[0] = op_dst;
 3034       n_sub_base->_opnds[1] = op_crx;
 3035       n_sub_base->_opnds[2] = op_src;
 3036       n_sub_base->_bottom_type = _bottom_type;
 3037 
 3038       n_shift->add_req(n_region, n_sub_base);
 3039       n_shift->_opnds[0] = op_dst;
 3040       n_shift->_opnds[1] = op_dst;
 3041       n_shift->_bottom_type = _bottom_type;
 3042 
 3043       ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3044       ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 3045       ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3046       ra_->set_pair(n_move->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3047 
 3048       nodes->push(n_move);
 3049       nodes->push(n_compare);
 3050       nodes->push(n_sub_base);
 3051       nodes->push(n_shift);
 3052     }
 3053 
 3054     assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed.
 3055   %}
 3056 
 3057   enc_class postalloc_expand_encode_oop_not_null(iRegNdst dst, iRegPdst src) %{
 3058 
 3059     encodeP_subNode *n1 = new encodeP_subNode();
 3060     n1->add_req(n_region, n_src);
 3061     n1->_opnds[0] = op_dst;
 3062     n1->_opnds[1] = op_src;
 3063     n1->_bottom_type = _bottom_type;
 3064 
 3065     encodeP_shiftNode *n2 = new encodeP_shiftNode();
 3066     n2->add_req(n_region, n1);
 3067     n2->_opnds[0] = op_dst;
 3068     n2->_opnds[1] = op_dst;
 3069     n2->_bottom_type = _bottom_type;
 3070     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3071     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3072 
 3073     nodes->push(n1);
 3074     nodes->push(n2);
 3075     assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed.
 3076   %}
 3077 
 3078   enc_class postalloc_expand_decode_oop(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
 3079     decodeN_shiftNode *n_shift    = new decodeN_shiftNode();
 3080     cmpN_reg_imm0Node *n_compare  = new cmpN_reg_imm0Node();
 3081 
 3082     n_compare->add_req(n_region, n_src);
 3083     n_compare->_opnds[0] = op_crx;
 3084     n_compare->_opnds[1] = op_src;
 3085     n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR);
 3086 
 3087     n_shift->add_req(n_region, n_src);
 3088     n_shift->_opnds[0] = op_dst;
 3089     n_shift->_opnds[1] = op_src;
 3090     n_shift->_bottom_type = _bottom_type;
 3091 
 3092     if (VM_Version::has_isel()) {
 3093       // use isel instruction with Power 7
 3094 
 3095       decodeN_addNode *n_add_base = new decodeN_addNode();
 3096       n_add_base->add_req(n_region, n_shift);
 3097       n_add_base->_opnds[0] = op_dst;
 3098       n_add_base->_opnds[1] = op_dst;
 3099       n_add_base->_bottom_type = _bottom_type;
 3100 
 3101       cond_set_0_ptrNode *n_cond_set = new cond_set_0_ptrNode();
 3102       n_cond_set->add_req(n_region, n_compare, n_add_base);
 3103       n_cond_set->_opnds[0] = op_dst;
 3104       n_cond_set->_opnds[1] = op_crx;
 3105       n_cond_set->_opnds[2] = op_dst;
 3106       n_cond_set->_bottom_type = _bottom_type;
 3107 
 3108       assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 3109       ra_->set_oop(n_cond_set, true);
 3110 
 3111       ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3112       ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 3113       ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3114       ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3115 
 3116       nodes->push(n_compare);
 3117       nodes->push(n_shift);
 3118       nodes->push(n_add_base);
 3119       nodes->push(n_cond_set);
 3120 
 3121     } else {
 3122       // before Power 7
 3123       cond_add_baseNode *n_add_base = new cond_add_baseNode();
 3124 
 3125       n_add_base->add_req(n_region, n_compare, n_shift);
 3126       n_add_base->_opnds[0] = op_dst;
 3127       n_add_base->_opnds[1] = op_crx;
 3128       n_add_base->_opnds[2] = op_dst;
 3129       n_add_base->_bottom_type = _bottom_type;
 3130 
 3131       assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 3132       ra_->set_oop(n_add_base, true);
 3133 
 3134       ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3135       ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 3136       ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3137 
 3138       nodes->push(n_compare);
 3139       nodes->push(n_shift);
 3140       nodes->push(n_add_base);
 3141     }
 3142   %}
 3143 
 3144   enc_class postalloc_expand_decode_oop_not_null(iRegPdst dst, iRegNsrc src) %{
 3145     decodeN_shiftNode *n1 = new decodeN_shiftNode();
 3146     n1->add_req(n_region, n_src);
 3147     n1->_opnds[0] = op_dst;
 3148     n1->_opnds[1] = op_src;
 3149     n1->_bottom_type = _bottom_type;
 3150 
 3151     decodeN_addNode *n2 = new decodeN_addNode();
 3152     n2->add_req(n_region, n1);
 3153     n2->_opnds[0] = op_dst;
 3154     n2->_opnds[1] = op_dst;
 3155     n2->_bottom_type = _bottom_type;
 3156     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3157     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3158 
 3159     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 3160     ra_->set_oop(n2, true);
 3161 
 3162     nodes->push(n1);
 3163     nodes->push(n2);
 3164   %}
 3165 
 3166   enc_class enc_cmove_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src, cmpOp cmp) %{
 3167 
 3168     C2_MacroAssembler _masm(&cbuf);
 3169     int cc        = $cmp$$cmpcode;
 3170     int flags_reg = $crx$$reg;
 3171     Label done;
 3172     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 3173     // Branch if not (cmp crx).
 3174     __ bc(cc_to_inverse_boint(cc), cc_to_biint(cc, flags_reg), done);
 3175     __ mr($dst$$Register, $src$$Register);
 3176     __ bind(done);
 3177   %}
 3178 
 3179   enc_class enc_cmove_imm(iRegIdst dst, flagsRegSrc crx, immI16 src, cmpOp cmp) %{
 3180 
 3181     C2_MacroAssembler _masm(&cbuf);
 3182     Label done;
 3183     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 3184     // Branch if not (cmp crx).
 3185     __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
 3186     __ li($dst$$Register, $src$$constant);
 3187     __ bind(done);
 3188   %}
 3189 
 3190   // This enc_class is needed so that scheduler gets proper
 3191   // input mapping for latency computation.
 3192   enc_class enc_andc(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 3193     C2_MacroAssembler _masm(&cbuf);
 3194     __ andc($dst$$Register, $src1$$Register, $src2$$Register);
 3195   %}
 3196 
 3197   enc_class enc_convI2B_regI__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
 3198 
 3199     C2_MacroAssembler _masm(&cbuf);
 3200 
 3201     Label done;
 3202     __ cmpwi($crx$$CondRegister, $src$$Register, 0);
 3203     __ li($dst$$Register, $zero$$constant);
 3204     __ beq($crx$$CondRegister, done);
 3205     __ li($dst$$Register, $notzero$$constant);
 3206     __ bind(done);
 3207   %}
 3208 
 3209   enc_class enc_convP2B_regP__cmove(iRegIdst dst, iRegPsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
 3210 
 3211     C2_MacroAssembler _masm(&cbuf);
 3212 
 3213     Label done;
 3214     __ cmpdi($crx$$CondRegister, $src$$Register, 0);
 3215     __ li($dst$$Register, $zero$$constant);
 3216     __ beq($crx$$CondRegister, done);
 3217     __ li($dst$$Register, $notzero$$constant);
 3218     __ bind(done);
 3219   %}
 3220 
 3221   enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL mem ) %{
 3222 
 3223     C2_MacroAssembler _masm(&cbuf);
 3224     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 3225     Label done;
 3226     __ bso($crx$$CondRegister, done);
 3227     __ ld($dst$$Register, Idisp, $mem$$base$$Register);
 3228     __ bind(done);
 3229   %}
 3230 
 3231   enc_class enc_cmove_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
 3232 
 3233     C2_MacroAssembler _masm(&cbuf);
 3234     Label done;
 3235     __ bso($crx$$CondRegister, done);
 3236     __ mffprd($dst$$Register, $src$$FloatRegister);
 3237     __ bind(done);
 3238   %}
 3239 
 3240   enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
 3241 
 3242     C2_MacroAssembler _masm(&cbuf);
 3243     Label d;   // dummy
 3244     __ bind(d);
 3245     Label* p = ($lbl$$label);
 3246     // `p' is `NULL' when this encoding class is used only to
 3247     // determine the size of the encoded instruction.
 3248     Label& l = (NULL == p)? d : *(p);
 3249     int cc = $cmp$$cmpcode;
 3250     int flags_reg = $crx$$reg;
 3251     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 3252     int bhint = Assembler::bhintNoHint;
 3253 
 3254     if (UseStaticBranchPredictionForUncommonPathsPPC64) {
 3255       if (_prob <= PROB_NEVER) {
 3256         bhint = Assembler::bhintIsNotTaken;
 3257       } else if (_prob >= PROB_ALWAYS) {
 3258         bhint = Assembler::bhintIsTaken;
 3259       }
 3260     }
 3261 
 3262     __ bc(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
 3263           cc_to_biint(cc, flags_reg),
 3264           l);
 3265   %}
 3266 
 3267   enc_class enc_bc_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
 3268     // The scheduler doesn't know about branch shortening, so we set the opcode
 3269     // to ppc64Opcode_bc in order to hide this detail from the scheduler.
 3270 
 3271     C2_MacroAssembler _masm(&cbuf);
 3272     Label d;    // dummy
 3273     __ bind(d);
 3274     Label* p = ($lbl$$label);
 3275     // `p' is `NULL' when this encoding class is used only to
 3276     // determine the size of the encoded instruction.
 3277     Label& l = (NULL == p)? d : *(p);
 3278     int cc = $cmp$$cmpcode;
 3279     int flags_reg = $crx$$reg;
 3280     int bhint = Assembler::bhintNoHint;
 3281 
 3282     if (UseStaticBranchPredictionForUncommonPathsPPC64) {
 3283       if (_prob <= PROB_NEVER) {
 3284         bhint = Assembler::bhintIsNotTaken;
 3285       } else if (_prob >= PROB_ALWAYS) {
 3286         bhint = Assembler::bhintIsTaken;
 3287       }
 3288     }
 3289 
 3290     // Tell the conditional far branch to optimize itself when being relocated.
 3291     __ bc_far(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
 3292                   cc_to_biint(cc, flags_reg),
 3293                   l,
 3294                   MacroAssembler::bc_far_optimize_on_relocate);
 3295   %}
 3296 
 3297   // Postalloc expand emitter for loading a replicatef float constant from
 3298   // the method's TOC.
 3299   // Enc_class needed as consttanttablebase is not supported by postalloc
 3300   // expand.
 3301   enc_class postalloc_expand_load_replF_constant(iRegLdst dst, immF src, iRegLdst toc) %{
 3302     // Create new nodes.
 3303 
 3304     // Make an operand with the bit pattern to load as float.
 3305     immLOper *op_repl = new immLOper((jlong)replicate_immF(op_src->constantF()));
 3306 
 3307     loadConLNodesTuple loadConLNodes =
 3308       loadConLNodesTuple_create(ra_, n_toc, op_repl,
 3309                                 ra_->get_reg_second(this), ra_->get_reg_first(this));
 3310 
 3311     // Push new nodes.
 3312     if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi);
 3313     if (loadConLNodes._last)     nodes->push(loadConLNodes._last);
 3314 
 3315     assert(nodes->length() >= 1, "must have created at least 1 node");
 3316     assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
 3317   %}
 3318 
 3319   enc_class postalloc_expand_load_replF_constant_vsx(vecX dst, immF src, iRegLdst toc, iRegLdst tmp) %{
 3320     // Create new nodes.
 3321 
 3322     // Make an operand with the bit pattern to load as float.
 3323     immLOper *op_repl = new  immLOper((jlong)replicate_immF(op_src->constantF()));
 3324     immI_0Oper *op_zero = new  immI_0Oper(0);
 3325 
 3326     loadConLReplicatedNodesTuple loadConLNodes =
 3327       loadConLReplicatedNodesTuple_create(C, ra_, n_toc, op_repl, op_dst, op_zero,
 3328                                 ra_->get_reg_second(n_tmp), ra_->get_reg_first(n_tmp),
 3329                                 ra_->get_reg_second(this), ra_->get_reg_first(this));
 3330 
 3331     // Push new nodes.
 3332     if (loadConLNodes._large_hi) { nodes->push(loadConLNodes._large_hi); }
 3333     if (loadConLNodes._large_lo) { nodes->push(loadConLNodes._large_lo); }
 3334     if (loadConLNodes._moved)    { nodes->push(loadConLNodes._moved); }
 3335     if (loadConLNodes._last)     { nodes->push(loadConLNodes._last); }
 3336 
 3337     assert(nodes->length() >= 1, "must have created at least 1 node");
 3338   %}
 3339 
 3340   // This enc_class is needed so that scheduler gets proper
 3341   // input mapping for latency computation.
 3342   enc_class enc_poll(immI dst, iRegLdst poll) %{
 3343     // Fake operand dst needed for PPC scheduler.
 3344     assert($dst$$constant == 0x0, "dst must be 0x0");
 3345 
 3346     C2_MacroAssembler _masm(&cbuf);
 3347     // Mark the code position where the load from the safepoint
 3348     // polling page was emitted as relocInfo::poll_type.
 3349     __ relocate(relocInfo::poll_type);
 3350     __ load_from_polling_page($poll$$Register);
 3351   %}
 3352 
 3353   // A Java static call or a runtime call.
 3354   //
 3355   // Branch-and-link relative to a trampoline.
 3356   // The trampoline loads the target address and does a long branch to there.
 3357   // In case we call java, the trampoline branches to a interpreter_stub
 3358   // which loads the inline cache and the real call target from the constant pool.
 3359   //
 3360   // This basically looks like this:
 3361   //
 3362   // >>>> consts      -+  -+
 3363   //                   |   |- offset1
 3364   // [call target1]    | <-+
 3365   // [IC cache]        |- offset2
 3366   // [call target2] <--+
 3367   //
 3368   // <<<< consts
 3369   // >>>> insts
 3370   //
 3371   // bl offset16               -+  -+             ??? // How many bits available?
 3372   //                            |   |
 3373   // <<<< insts                 |   |
 3374   // >>>> stubs                 |   |
 3375   //                            |   |- trampoline_stub_Reloc
 3376   // trampoline stub:           | <-+
 3377   //   r2 = toc                 |
 3378   //   r2 = [r2 + offset1]      |       // Load call target1 from const section
 3379   //   mtctr r2                 |
 3380   //   bctr                     |- static_stub_Reloc
 3381   // comp_to_interp_stub:   <---+
 3382   //   r1 = toc
 3383   //   ICreg = [r1 + IC_offset]         // Load IC from const section
 3384   //   r1    = [r1 + offset2]           // Load call target2 from const section
 3385   //   mtctr r1
 3386   //   bctr
 3387   //
 3388   // <<<< stubs
 3389   //
 3390   // The call instruction in the code either
 3391   // - Branches directly to a compiled method if the offset is encodable in instruction.
 3392   // - Branches to the trampoline stub if the offset to the compiled method is not encodable.
 3393   // - Branches to the compiled_to_interp stub if the target is interpreted.
 3394   //
 3395   // Further there are three relocations from the loads to the constants in
 3396   // the constant section.
 3397   //
 3398   // Usage of r1 and r2 in the stubs allows to distinguish them.
 3399   enc_class enc_java_static_call(method meth) %{
 3400 
 3401     C2_MacroAssembler _masm(&cbuf);
 3402     address entry_point = (address)$meth$$method;
 3403 
 3404     if (!_method) {
 3405       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
 3406       emit_call_with_trampoline_stub(_masm, entry_point, relocInfo::runtime_call_type);
 3407     } else {
 3408       // Remember the offset not the address.
 3409       const int start_offset = __ offset();
 3410 
 3411       // The trampoline stub.
 3412       // No entry point given, use the current pc.
 3413       // Make sure branch fits into
 3414       if (entry_point == 0) entry_point = __ pc();
 3415 
 3416       // Put the entry point as a constant into the constant pool.
 3417       const address entry_point_toc_addr = __ address_constant(entry_point, RelocationHolder::none);
 3418       if (entry_point_toc_addr == NULL) {
 3419         ciEnv::current()->record_out_of_memory_failure();
 3420         return;
 3421       }
 3422       const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
 3423 
 3424       // Emit the trampoline stub which will be related to the branch-and-link below.
 3425       CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset);
 3426       if (ciEnv::current()->failing()) { return; } // Code cache may be full.
 3427       int method_index = resolved_method_index(cbuf);
 3428       __ relocate(_optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 3429                   : static_call_Relocation::spec(method_index));
 3430 
 3431       // The real call.
 3432       // Note: At this point we do not have the address of the trampoline
 3433       // stub, and the entry point might be too far away for bl, so __ pc()
 3434       // serves as dummy and the bl will be patched later.
 3435       cbuf.set_insts_mark();
 3436       __ bl(__ pc());  // Emits a relocation.
 3437 
 3438       // The stub for call to interpreter.
 3439       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 3440       if (stub == NULL) {
 3441         ciEnv::current()->record_failure("CodeCache is full");
 3442         return;
 3443       }
 3444     }
 3445   %}
 3446 
 3447   // Second node of expanded dynamic call - the call.
 3448   enc_class enc_java_dynamic_call_sched(method meth) %{
 3449 
 3450     C2_MacroAssembler _masm(&cbuf);
 3451 
 3452     if (!ra_->C->output()->in_scratch_emit_size()) {
 3453       // Create a call trampoline stub for the given method.
 3454       const address entry_point = !($meth$$method) ? 0 : (address)$meth$$method;
 3455       const address entry_point_const = __ address_constant(entry_point, RelocationHolder::none);
 3456       if (entry_point_const == NULL) {
 3457         ciEnv::current()->record_out_of_memory_failure();
 3458         return;
 3459       }
 3460       const int entry_point_const_toc_offset = __ offset_to_method_toc(entry_point_const);
 3461       CallStubImpl::emit_trampoline_stub(_masm, entry_point_const_toc_offset, __ offset());
 3462       if (ra_->C->env()->failing()) { return; } // Code cache may be full.
 3463 
 3464       // Build relocation at call site with ic position as data.
 3465       assert((_load_ic_hi_node != NULL && _load_ic_node == NULL) ||
 3466              (_load_ic_hi_node == NULL && _load_ic_node != NULL),
 3467              "must have one, but can't have both");
 3468       assert((_load_ic_hi_node != NULL && _load_ic_hi_node->_cbuf_insts_offset != -1) ||
 3469              (_load_ic_node != NULL    && _load_ic_node->_cbuf_insts_offset != -1),
 3470              "must contain instruction offset");
 3471       const int virtual_call_oop_addr_offset = _load_ic_hi_node != NULL
 3472         ? _load_ic_hi_node->_cbuf_insts_offset
 3473         : _load_ic_node->_cbuf_insts_offset;
 3474       const address virtual_call_oop_addr = __ addr_at(virtual_call_oop_addr_offset);
 3475       assert(MacroAssembler::is_load_const_from_method_toc_at(virtual_call_oop_addr),
 3476              "should be load from TOC");
 3477       int method_index = resolved_method_index(cbuf);
 3478       __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr, method_index));
 3479     }
 3480 
 3481     // At this point I do not have the address of the trampoline stub,
 3482     // and the entry point might be too far away for bl. Pc() serves
 3483     // as dummy and bl will be patched later.
 3484     __ bl((address) __ pc());
 3485   %}
 3486 
 3487   // postalloc expand emitter for virtual calls.
 3488   enc_class postalloc_expand_java_dynamic_call_sched(method meth, iRegLdst toc) %{
 3489 
 3490     // Create the nodes for loading the IC from the TOC.
 3491     loadConLNodesTuple loadConLNodes_IC =
 3492       loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong)Universe::non_oop_word()),
 3493                                 OptoReg::Name(R19_H_num), OptoReg::Name(R19_num));
 3494 
 3495     // Create the call node.
 3496     CallDynamicJavaDirectSchedNode *call = new CallDynamicJavaDirectSchedNode();
 3497     call->_method_handle_invoke = _method_handle_invoke;
 3498     call->_vtable_index      = _vtable_index;
 3499     call->_method            = _method;
 3500     call->_optimized_virtual = _optimized_virtual;
 3501     call->_tf                = _tf;
 3502     call->_entry_point       = _entry_point;
 3503     call->_cnt               = _cnt;
 3504     call->_guaranteed_safepoint = true;
 3505     call->_oop_map           = _oop_map;
 3506     call->_jvms              = _jvms;
 3507     call->_jvmadj            = _jvmadj;
 3508     call->_in_rms            = _in_rms;
 3509     call->_nesting           = _nesting;
 3510     call->_override_symbolic_info = _override_symbolic_info;
 3511 
 3512     // New call needs all inputs of old call.
 3513     // Req...
 3514     for (uint i = 0; i < req(); ++i) {
 3515       // The expanded node does not need toc any more.
 3516       // Add the inline cache constant here instead. This expresses the
 3517       // register of the inline cache must be live at the call.
 3518       // Else we would have to adapt JVMState by -1.
 3519       if (i == mach_constant_base_node_input()) {
 3520         call->add_req(loadConLNodes_IC._last);
 3521       } else {
 3522         call->add_req(in(i));
 3523       }
 3524     }
 3525     // ...as well as prec
 3526     for (uint i = req(); i < len(); ++i) {
 3527       call->add_prec(in(i));
 3528     }
 3529 
 3530     // Remember nodes loading the inline cache into r19.
 3531     call->_load_ic_hi_node = loadConLNodes_IC._large_hi;
 3532     call->_load_ic_node    = loadConLNodes_IC._small;
 3533 
 3534     // Operands for new nodes.
 3535     call->_opnds[0] = _opnds[0];
 3536     call->_opnds[1] = _opnds[1];
 3537 
 3538     // Only the inline cache is associated with a register.
 3539     assert(Matcher::inline_cache_reg() == OptoReg::Name(R19_num), "ic reg should be R19");
 3540 
 3541     // Push new nodes.
 3542     if (loadConLNodes_IC._large_hi) nodes->push(loadConLNodes_IC._large_hi);
 3543     if (loadConLNodes_IC._last)     nodes->push(loadConLNodes_IC._last);
 3544     nodes->push(call);
 3545   %}
 3546 
 3547   // Compound version of call dynamic
 3548   // Toc is only passed so that it can be used in ins_encode statement.
 3549   // In the code we have to use $constanttablebase.
 3550   enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{
 3551     C2_MacroAssembler _masm(&cbuf);
 3552     int start_offset = __ offset();
 3553 
 3554     Register Rtoc = (ra_) ? $constanttablebase : R2_TOC;
 3555 
 3556     int vtable_index = this->_vtable_index;
 3557     if (vtable_index < 0) {
 3558       // Must be invalid_vtable_index, not nonvirtual_vtable_index.
 3559       assert(vtable_index == Method::invalid_vtable_index, "correct sentinel value");
 3560       Register ic_reg = as_Register(Matcher::inline_cache_reg_encode());
 3561 
 3562       // Virtual call relocation will point to ic load.
 3563       address virtual_call_meta_addr = __ pc();
 3564       // Load a clear inline cache.
 3565       AddressLiteral empty_ic((address) Universe::non_oop_word());
 3566       bool success = __ load_const_from_method_toc(ic_reg, empty_ic, Rtoc, /*fixed_size*/ true);
 3567       if (!success) {
 3568         ciEnv::current()->record_out_of_memory_failure();
 3569         return;
 3570       }
 3571       // CALL to fixup routine.  Fixup routine uses ScopeDesc info
 3572       // to determine who we intended to call.
 3573       __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr));
 3574       emit_call_with_trampoline_stub(_masm, (address)$meth$$method, relocInfo::none);
 3575       assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
 3576              "Fix constant in ret_addr_offset(), expected %d", __ offset() - start_offset);
 3577     } else {
 3578       assert(!UseInlineCaches, "expect vtable calls only if not using ICs");
 3579       // Go thru the vtable. Get receiver klass. Receiver already
 3580       // checked for non-null. If we'll go thru a C2I adapter, the
 3581       // interpreter expects method in R19_method.
 3582 
 3583       __ load_klass(R11_scratch1, R3);
 3584 
 3585       int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes();
 3586       int v_off = entry_offset + vtableEntry::method_offset_in_bytes();
 3587       __ li(R19_method, v_off);
 3588       __ ldx(R19_method/*method*/, R19_method/*method offset*/, R11_scratch1/*class*/);
 3589       // NOTE: for vtable dispatches, the vtable entry will never be
 3590       // null. However it may very well end up in handle_wrong_method
 3591       // if the method is abstract for the particular class.
 3592       __ ld(R11_scratch1, in_bytes(Method::from_compiled_offset()), R19_method);
 3593       // Call target. Either compiled code or C2I adapter.
 3594       __ mtctr(R11_scratch1);
 3595       __ bctrl();
 3596       assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
 3597              "Fix constant in ret_addr_offset(), expected %d", __ offset() - start_offset);
 3598     }
 3599   %}
 3600 
 3601   // a runtime call
 3602   enc_class enc_java_to_runtime_call (method meth) %{
 3603 
 3604     C2_MacroAssembler _masm(&cbuf);
 3605     const address start_pc = __ pc();
 3606 
 3607 #if defined(ABI_ELFv2)
 3608     address entry= !($meth$$method) ? NULL : (address)$meth$$method;
 3609     __ call_c(entry, relocInfo::runtime_call_type);
 3610 #else
 3611     // The function we're going to call.
 3612     FunctionDescriptor fdtemp;
 3613     const FunctionDescriptor* fd = !($meth$$method) ? &fdtemp : (FunctionDescriptor*)$meth$$method;
 3614 
 3615     Register Rtoc = R12_scratch2;
 3616     // Calculate the method's TOC.
 3617     __ calculate_address_from_global_toc(Rtoc, __ method_toc());
 3618     // Put entry, env, toc into the constant pool, this needs up to 3 constant
 3619     // pool entries; call_c_using_toc will optimize the call.
 3620     bool success = __ call_c_using_toc(fd, relocInfo::runtime_call_type, Rtoc);
 3621     if (!success) {
 3622       ciEnv::current()->record_out_of_memory_failure();
 3623       return;
 3624     }
 3625 #endif
 3626 
 3627     // Check the ret_addr_offset.
 3628     assert(((MachCallRuntimeNode*)this)->ret_addr_offset() ==  __ last_calls_return_pc() - start_pc,
 3629            "Fix constant in ret_addr_offset()");
 3630   %}
 3631 
 3632   // Move to ctr for leaf call.
 3633   // This enc_class is needed so that scheduler gets proper
 3634   // input mapping for latency computation.
 3635   enc_class enc_leaf_call_mtctr(iRegLsrc src) %{
 3636     C2_MacroAssembler _masm(&cbuf);
 3637     __ mtctr($src$$Register);
 3638   %}
 3639 
 3640   // Postalloc expand emitter for runtime leaf calls.
 3641   enc_class postalloc_expand_java_to_runtime_call(method meth, iRegLdst toc) %{
 3642     loadConLNodesTuple loadConLNodes_Entry;
 3643 #if defined(ABI_ELFv2)
 3644     jlong entry_address = (jlong) this->entry_point();
 3645     assert(entry_address, "need address here");
 3646     loadConLNodes_Entry = loadConLNodesTuple_create(ra_, n_toc, new immLOper(entry_address),
 3647                                                     OptoReg::Name(R12_H_num), OptoReg::Name(R12_num));
 3648 #else
 3649     // Get the struct that describes the function we are about to call.
 3650     FunctionDescriptor* fd = (FunctionDescriptor*) this->entry_point();
 3651     assert(fd, "need fd here");
 3652     jlong entry_address = (jlong) fd->entry();
 3653     // new nodes
 3654     loadConLNodesTuple loadConLNodes_Env;
 3655     loadConLNodesTuple loadConLNodes_Toc;
 3656 
 3657     // Create nodes and operands for loading the entry point.
 3658     loadConLNodes_Entry = loadConLNodesTuple_create(ra_, n_toc, new immLOper(entry_address),
 3659                                                     OptoReg::Name(R12_H_num), OptoReg::Name(R12_num));
 3660 
 3661 
 3662     // Create nodes and operands for loading the env pointer.
 3663     if (fd->env() != NULL) {
 3664       loadConLNodes_Env = loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong) fd->env()),
 3665                                                     OptoReg::Name(R11_H_num), OptoReg::Name(R11_num));
 3666     } else {
 3667       loadConLNodes_Env._large_hi = NULL;
 3668       loadConLNodes_Env._large_lo = NULL;
 3669       loadConLNodes_Env._small    = NULL;
 3670       loadConLNodes_Env._last = new loadConL16Node();
 3671       loadConLNodes_Env._last->_opnds[0] = new iRegLdstOper();
 3672       loadConLNodes_Env._last->_opnds[1] = new immL16Oper(0);
 3673       ra_->set_pair(loadConLNodes_Env._last->_idx, OptoReg::Name(R11_H_num), OptoReg::Name(R11_num));
 3674     }
 3675 
 3676     // Create nodes and operands for loading the Toc point.
 3677     loadConLNodes_Toc = loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong) fd->toc()),
 3678                                                   OptoReg::Name(R2_H_num), OptoReg::Name(R2_num));
 3679 #endif // ABI_ELFv2
 3680     // mtctr node
 3681     MachNode *mtctr = new CallLeafDirect_mtctrNode();
 3682 
 3683     assert(loadConLNodes_Entry._last != NULL, "entry must exist");
 3684     mtctr->add_req(0, loadConLNodes_Entry._last);
 3685 
 3686     mtctr->_opnds[0] = new iRegLdstOper();
 3687     mtctr->_opnds[1] = new iRegLdstOper();
 3688 
 3689     // call node
 3690     MachCallLeafNode *call = new CallLeafDirectNode();
 3691 
 3692     call->_opnds[0] = _opnds[0];
 3693     call->_opnds[1] = new methodOper((intptr_t) entry_address); // May get set later.
 3694 
 3695     // Make the new call node look like the old one.
 3696     call->_name        = _name;
 3697     call->_tf          = _tf;
 3698     call->_entry_point = _entry_point;
 3699     call->_cnt         = _cnt;
 3700     call->_guaranteed_safepoint = false;
 3701     call->_oop_map     = _oop_map;
 3702     guarantee(!_jvms, "You must clone the jvms and adapt the offsets by fix_jvms().");
 3703     call->_jvms        = NULL;
 3704     call->_jvmadj      = _jvmadj;
 3705     call->_in_rms      = _in_rms;
 3706     call->_nesting     = _nesting;
 3707 
 3708     // New call needs all inputs of old call.
 3709     // Req...
 3710     for (uint i = 0; i < req(); ++i) {
 3711       if (i != mach_constant_base_node_input()) {
 3712         call->add_req(in(i));
 3713       }
 3714     }
 3715 
 3716     // These must be reqired edges, as the registers are live up to
 3717     // the call. Else the constants are handled as kills.
 3718     call->add_req(mtctr);
 3719 #if !defined(ABI_ELFv2)
 3720     call->add_req(loadConLNodes_Env._last);
 3721     call->add_req(loadConLNodes_Toc._last);
 3722 #endif
 3723 
 3724     // ...as well as prec
 3725     for (uint i = req(); i < len(); ++i) {
 3726       call->add_prec(in(i));
 3727     }
 3728 
 3729     // registers
 3730     ra_->set1(mtctr->_idx, OptoReg::Name(SR_CTR_num));
 3731 
 3732     // Insert the new nodes.
 3733     if (loadConLNodes_Entry._large_hi) nodes->push(loadConLNodes_Entry._large_hi);
 3734     if (loadConLNodes_Entry._last)     nodes->push(loadConLNodes_Entry._last);
 3735 #if !defined(ABI_ELFv2)
 3736     if (loadConLNodes_Env._large_hi)   nodes->push(loadConLNodes_Env._large_hi);
 3737     if (loadConLNodes_Env._last)       nodes->push(loadConLNodes_Env._last);
 3738     if (loadConLNodes_Toc._large_hi)   nodes->push(loadConLNodes_Toc._large_hi);
 3739     if (loadConLNodes_Toc._last)       nodes->push(loadConLNodes_Toc._last);
 3740 #endif
 3741     nodes->push(mtctr);
 3742     nodes->push(call);
 3743   %}
 3744 %}
 3745 
 3746 //----------FRAME--------------------------------------------------------------
 3747 // Definition of frame structure and management information.
 3748 
 3749 frame %{
 3750   // These two registers define part of the calling convention between
 3751   // compiled code and the interpreter.
 3752 
 3753   // Inline Cache Register or method for I2C.
 3754   inline_cache_reg(R19); // R19_method
 3755 
 3756   // Optional: name the operand used by cisc-spilling to access
 3757   // [stack_pointer + offset].
 3758   cisc_spilling_operand_name(indOffset);
 3759 
 3760   // Number of stack slots consumed by a Monitor enter.
 3761   sync_stack_slots((frame::jit_monitor_size / VMRegImpl::stack_slot_size));
 3762 
 3763   // Compiled code's Frame Pointer.
 3764   frame_pointer(R1); // R1_SP
 3765 
 3766   // Interpreter stores its frame pointer in a register which is
 3767   // stored to the stack by I2CAdaptors. I2CAdaptors convert from
 3768   // interpreted java to compiled java.
 3769   //
 3770   // R14_state holds pointer to caller's cInterpreter.
 3771   interpreter_frame_pointer(R14); // R14_state
 3772 
 3773   stack_alignment(frame::alignment_in_bytes);
 3774 
 3775   // Number of outgoing stack slots killed above the
 3776   // out_preserve_stack_slots for calls to C. Supports the var-args
 3777   // backing area for register parms.
 3778   //
 3779   varargs_C_out_slots_killed(((frame::abi_reg_args_size - frame::jit_out_preserve_size) / VMRegImpl::stack_slot_size));
 3780 
 3781   // The after-PROLOG location of the return address. Location of
 3782   // return address specifies a type (REG or STACK) and a number
 3783   // representing the register number (i.e. - use a register name) or
 3784   // stack slot.
 3785   //
 3786   // A: Link register is stored in stack slot ...
 3787   // M:  ... but it's in the caller's frame according to PPC-64 ABI.
 3788   // J: Therefore, we make sure that the link register is also in R11_scratch1
 3789   //    at the end of the prolog.
 3790   // B: We use R20, now.
 3791   //return_addr(REG R20);
 3792 
 3793   // G: After reading the comments made by all the luminaries on their
 3794   //    failure to tell the compiler where the return address really is,
 3795   //    I hardly dare to try myself.  However, I'm convinced it's in slot
 3796   //    4 what apparently works and saves us some spills.
 3797   return_addr(STACK 4);
 3798 
 3799   // Location of native (C/C++) and interpreter return values. This
 3800   // is specified to be the same as Java. In the 32-bit VM, long
 3801   // values are actually returned from native calls in O0:O1 and
 3802   // returned to the interpreter in I0:I1. The copying to and from
 3803   // the register pairs is done by the appropriate call and epilog
 3804   // opcodes. This simplifies the register allocator.
 3805   c_return_value %{
 3806     assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) ||
 3807             (ideal_reg == Op_RegN && CompressedOops::base() == NULL && CompressedOops::shift() == 0),
 3808             "only return normal values");
 3809     // enum names from opcodes.hpp:    Op_Node Op_Set Op_RegN       Op_RegI       Op_RegP       Op_RegF       Op_RegD       Op_RegL
 3810     static int typeToRegLo[Op_RegL+1] = { 0,   0,     R3_num,   R3_num,   R3_num,   F1_num,   F1_num,   R3_num };
 3811     static int typeToRegHi[Op_RegL+1] = { 0,   0,     OptoReg::Bad, R3_H_num, R3_H_num, OptoReg::Bad, F1_H_num, R3_H_num };
 3812     return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
 3813   %}
 3814 
 3815   // Location of compiled Java return values.  Same as C
 3816   return_value %{
 3817     assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) ||
 3818             (ideal_reg == Op_RegN && CompressedOops::base() == NULL && CompressedOops::shift() == 0),
 3819             "only return normal values");
 3820     // enum names from opcodes.hpp:    Op_Node Op_Set Op_RegN       Op_RegI       Op_RegP       Op_RegF       Op_RegD       Op_RegL
 3821     static int typeToRegLo[Op_RegL+1] = { 0,   0,     R3_num,   R3_num,   R3_num,   F1_num,   F1_num,   R3_num };
 3822     static int typeToRegHi[Op_RegL+1] = { 0,   0,     OptoReg::Bad, R3_H_num, R3_H_num, OptoReg::Bad, F1_H_num, R3_H_num };
 3823     return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
 3824   %}
 3825 %}
 3826 
 3827 
 3828 //----------ATTRIBUTES---------------------------------------------------------
 3829 
 3830 //----------Operand Attributes-------------------------------------------------
 3831 op_attrib op_cost(1);          // Required cost attribute.
 3832 
 3833 //----------Instruction Attributes---------------------------------------------
 3834 
 3835 // Cost attribute. required.
 3836 ins_attrib ins_cost(DEFAULT_COST);
 3837 
 3838 // Is this instruction a non-matching short branch variant of some
 3839 // long branch? Not required.
 3840 ins_attrib ins_short_branch(0);
 3841 
 3842 ins_attrib ins_is_TrapBasedCheckNode(true);
 3843 
 3844 // Number of constants.
 3845 // This instruction uses the given number of constants
 3846 // (optional attribute).
 3847 // This is needed to determine in time whether the constant pool will
 3848 // exceed 4000 entries. Before postalloc_expand the overall number of constants
 3849 // is determined. It's also used to compute the constant pool size
 3850 // in Output().
 3851 ins_attrib ins_num_consts(0);
 3852 
 3853 // Required alignment attribute (must be a power of 2) specifies the
 3854 // alignment that some part of the instruction (not necessarily the
 3855 // start) requires. If > 1, a compute_padding() function must be
 3856 // provided for the instruction.
 3857 ins_attrib ins_alignment(1);
 3858 
 3859 // Enforce/prohibit rematerializations.
 3860 // - If an instruction is attributed with 'ins_cannot_rematerialize(true)'
 3861 //   then rematerialization of that instruction is prohibited and the
 3862 //   instruction's value will be spilled if necessary.
 3863 //   Causes that MachNode::rematerialize() returns false.
 3864 // - If an instruction is attributed with 'ins_should_rematerialize(true)'
 3865 //   then rematerialization should be enforced and a copy of the instruction
 3866 //   should be inserted if possible; rematerialization is not guaranteed.
 3867 //   Note: this may result in rematerializations in front of every use.
 3868 //   Causes that MachNode::rematerialize() can return true.
 3869 // (optional attribute)
 3870 ins_attrib ins_cannot_rematerialize(false);
 3871 ins_attrib ins_should_rematerialize(false);
 3872 
 3873 // Instruction has variable size depending on alignment.
 3874 ins_attrib ins_variable_size_depending_on_alignment(false);
 3875 
 3876 // Instruction is a nop.
 3877 ins_attrib ins_is_nop(false);
 3878 
 3879 // Instruction is mapped to a MachIfFastLock node (instead of MachFastLock).
 3880 ins_attrib ins_use_mach_if_fast_lock_node(false);
 3881 
 3882 // Field for the toc offset of a constant.
 3883 //
 3884 // This is needed if the toc offset is not encodable as an immediate in
 3885 // the PPC load instruction. If so, the upper (hi) bits of the offset are
 3886 // added to the toc, and from this a load with immediate is performed.
 3887 // With postalloc expand, we get two nodes that require the same offset
 3888 // but which don't know about each other. The offset is only known
 3889 // when the constant is added to the constant pool during emitting.
 3890 // It is generated in the 'hi'-node adding the upper bits, and saved
 3891 // in this node.  The 'lo'-node has a link to the 'hi'-node and reads
 3892 // the offset from there when it gets encoded.
 3893 ins_attrib ins_field_const_toc_offset(0);
 3894 ins_attrib ins_field_const_toc_offset_hi_node(0);
 3895 
 3896 // A field that can hold the instructions offset in the code buffer.
 3897 // Set in the nodes emitter.
 3898 ins_attrib ins_field_cbuf_insts_offset(-1);
 3899 
 3900 // Fields for referencing a call's load-IC-node.
 3901 // If the toc offset can not be encoded as an immediate in a load, we
 3902 // use two nodes.
 3903 ins_attrib ins_field_load_ic_hi_node(0);
 3904 ins_attrib ins_field_load_ic_node(0);
 3905 
 3906 //----------OPERANDS-----------------------------------------------------------
 3907 // Operand definitions must precede instruction definitions for correct
 3908 // parsing in the ADLC because operands constitute user defined types
 3909 // which are used in instruction definitions.
 3910 //
 3911 // Formats are generated automatically for constants and base registers.
 3912 
 3913 operand vecX() %{
 3914   constraint(ALLOC_IN_RC(vs_reg));
 3915   match(VecX);
 3916 
 3917   format %{ %}
 3918   interface(REG_INTER);
 3919 %}
 3920 
 3921 //----------Simple Operands----------------------------------------------------
 3922 // Immediate Operands
 3923 
 3924 // Integer Immediate: 32-bit
 3925 operand immI() %{
 3926   match(ConI);
 3927   op_cost(40);
 3928   format %{ %}
 3929   interface(CONST_INTER);
 3930 %}
 3931 
 3932 operand immI8() %{
 3933   predicate(Assembler::is_simm(n->get_int(), 8));
 3934   op_cost(0);
 3935   match(ConI);
 3936   format %{ %}
 3937   interface(CONST_INTER);
 3938 %}
 3939 
 3940 // Integer Immediate: 16-bit
 3941 operand immI16() %{
 3942   predicate(Assembler::is_simm(n->get_int(), 16));
 3943   op_cost(0);
 3944   match(ConI);
 3945   format %{ %}
 3946   interface(CONST_INTER);
 3947 %}
 3948 
 3949 // Integer Immediate: 32-bit, where lowest 16 bits are 0x0000.
 3950 operand immIhi16() %{
 3951   predicate(((n->get_int() & 0xffff0000) != 0) && ((n->get_int() & 0xffff) == 0));
 3952   match(ConI);
 3953   op_cost(0);
 3954   format %{ %}
 3955   interface(CONST_INTER);
 3956 %}
 3957 
 3958 // Integer Immediate: 32-bit immediate for prefixed addi and load/store.
 3959 operand immI32() %{
 3960   predicate(PowerArchitecturePPC64 >= 10);
 3961   op_cost(0);
 3962   match(ConI);
 3963   format %{ %}
 3964   interface(CONST_INTER);
 3965 %}
 3966 
 3967 operand immInegpow2() %{
 3968   predicate(is_power_of_2(-(juint)(n->get_int())));
 3969   match(ConI);
 3970   op_cost(0);
 3971   format %{ %}
 3972   interface(CONST_INTER);
 3973 %}
 3974 
 3975 operand immIpow2minus1() %{
 3976   predicate(is_power_of_2((juint)(n->get_int()) + 1u));
 3977   match(ConI);
 3978   op_cost(0);
 3979   format %{ %}
 3980   interface(CONST_INTER);
 3981 %}
 3982 
 3983 operand immIpowerOf2() %{
 3984   predicate(is_power_of_2((juint)(n->get_int())));
 3985   match(ConI);
 3986   op_cost(0);
 3987   format %{ %}
 3988   interface(CONST_INTER);
 3989 %}
 3990 
 3991 // Unsigned Integer Immediate: the values 0-31
 3992 operand uimmI5() %{
 3993   predicate(Assembler::is_uimm(n->get_int(), 5));
 3994   match(ConI);
 3995   op_cost(0);
 3996   format %{ %}
 3997   interface(CONST_INTER);
 3998 %}
 3999 
 4000 // Unsigned Integer Immediate: 6-bit
 4001 operand uimmI6() %{
 4002   predicate(Assembler::is_uimm(n->get_int(), 6));
 4003   match(ConI);
 4004   op_cost(0);
 4005   format %{ %}
 4006   interface(CONST_INTER);
 4007 %}
 4008 
 4009 // Unsigned Integer Immediate:  6-bit int, greater than 32
 4010 operand uimmI6_ge32() %{
 4011   predicate(Assembler::is_uimm(n->get_int(), 6) && n->get_int() >= 32);
 4012   match(ConI);
 4013   op_cost(0);
 4014   format %{ %}
 4015   interface(CONST_INTER);
 4016 %}
 4017 
 4018 // Unsigned Integer Immediate: 15-bit
 4019 operand uimmI15() %{
 4020   predicate(Assembler::is_uimm(n->get_int(), 15));
 4021   match(ConI);
 4022   op_cost(0);
 4023   format %{ %}
 4024   interface(CONST_INTER);
 4025 %}
 4026 
 4027 // Unsigned Integer Immediate: 16-bit
 4028 operand uimmI16() %{
 4029   predicate(Assembler::is_uimm(n->get_int(), 16));
 4030   match(ConI);
 4031   op_cost(0);
 4032   format %{ %}
 4033   interface(CONST_INTER);
 4034 %}
 4035 
 4036 // constant 'int 0'.
 4037 operand immI_0() %{
 4038   predicate(n->get_int() == 0);
 4039   match(ConI);
 4040   op_cost(0);
 4041   format %{ %}
 4042   interface(CONST_INTER);
 4043 %}
 4044 
 4045 // constant 'int 1'.
 4046 operand immI_1() %{
 4047   predicate(n->get_int() == 1);
 4048   match(ConI);
 4049   op_cost(0);
 4050   format %{ %}
 4051   interface(CONST_INTER);
 4052 %}
 4053 
 4054 // constant 'int -1'.
 4055 operand immI_minus1() %{
 4056   predicate(n->get_int() == -1);
 4057   match(ConI);
 4058   op_cost(0);
 4059   format %{ %}
 4060   interface(CONST_INTER);
 4061 %}
 4062 
 4063 // int value 16.
 4064 operand immI_16() %{
 4065   predicate(n->get_int() == 16);
 4066   match(ConI);
 4067   op_cost(0);
 4068   format %{ %}
 4069   interface(CONST_INTER);
 4070 %}
 4071 
 4072 // int value 24.
 4073 operand immI_24() %{
 4074   predicate(n->get_int() == 24);
 4075   match(ConI);
 4076   op_cost(0);
 4077   format %{ %}
 4078   interface(CONST_INTER);
 4079 %}
 4080 
 4081 // Compressed oops constants
 4082 // Pointer Immediate
 4083 operand immN() %{
 4084   match(ConN);
 4085 
 4086   op_cost(10);
 4087   format %{ %}
 4088   interface(CONST_INTER);
 4089 %}
 4090 
 4091 // NULL Pointer Immediate
 4092 operand immN_0() %{
 4093   predicate(n->get_narrowcon() == 0);
 4094   match(ConN);
 4095 
 4096   op_cost(0);
 4097   format %{ %}
 4098   interface(CONST_INTER);
 4099 %}
 4100 
 4101 // Compressed klass constants
 4102 operand immNKlass() %{
 4103   match(ConNKlass);
 4104 
 4105   op_cost(0);
 4106   format %{ %}
 4107   interface(CONST_INTER);
 4108 %}
 4109 
 4110 // This operand can be used to avoid matching of an instruct
 4111 // with chain rule.
 4112 operand immNKlass_NM() %{
 4113   match(ConNKlass);
 4114   predicate(false);
 4115   op_cost(0);
 4116   format %{ %}
 4117   interface(CONST_INTER);
 4118 %}
 4119 
 4120 // Pointer Immediate: 64-bit
 4121 operand immP() %{
 4122   match(ConP);
 4123   op_cost(0);
 4124   format %{ %}
 4125   interface(CONST_INTER);
 4126 %}
 4127 
 4128 // Operand to avoid match of loadConP.
 4129 // This operand can be used to avoid matching of an instruct
 4130 // with chain rule.
 4131 operand immP_NM() %{
 4132   match(ConP);
 4133   predicate(false);
 4134   op_cost(0);
 4135   format %{ %}
 4136   interface(CONST_INTER);
 4137 %}
 4138 
 4139 // costant 'pointer 0'.
 4140 operand immP_0() %{
 4141   predicate(n->get_ptr() == 0);
 4142   match(ConP);
 4143   op_cost(0);
 4144   format %{ %}
 4145   interface(CONST_INTER);
 4146 %}
 4147 
 4148 // pointer 0x0 or 0x1
 4149 operand immP_0or1() %{
 4150   predicate((n->get_ptr() == 0) || (n->get_ptr() == 1));
 4151   match(ConP);
 4152   op_cost(0);
 4153   format %{ %}
 4154   interface(CONST_INTER);
 4155 %}
 4156 
 4157 operand immL() %{
 4158   match(ConL);
 4159   op_cost(40);
 4160   format %{ %}
 4161   interface(CONST_INTER);
 4162 %}
 4163 
 4164 operand immLmax30() %{
 4165   predicate((n->get_long() <= 30));
 4166   match(ConL);
 4167   op_cost(0);
 4168   format %{ %}
 4169   interface(CONST_INTER);
 4170 %}
 4171 
 4172 // Long Immediate: 16-bit
 4173 operand immL16() %{
 4174   predicate(Assembler::is_simm(n->get_long(), 16));
 4175   match(ConL);
 4176   op_cost(0);
 4177   format %{ %}
 4178   interface(CONST_INTER);
 4179 %}
 4180 
 4181 // Long Immediate: 16-bit, 4-aligned
 4182 operand immL16Alg4() %{
 4183   predicate(Assembler::is_simm(n->get_long(), 16) && ((n->get_long() & 0x3) == 0));
 4184   match(ConL);
 4185   op_cost(0);
 4186   format %{ %}
 4187   interface(CONST_INTER);
 4188 %}
 4189 
 4190 // Long Immediate: 32-bit, where lowest 16 bits are 0x0000.
 4191 operand immL32hi16() %{
 4192   predicate(Assembler::is_simm(n->get_long(), 32) && ((n->get_long() & 0xffffL) == 0L));
 4193   match(ConL);
 4194   op_cost(0);
 4195   format %{ %}
 4196   interface(CONST_INTER);
 4197 %}
 4198 
 4199 // Long Immediate: 32-bit
 4200 operand immL32() %{
 4201   predicate(Assembler::is_simm(n->get_long(), 32));
 4202   match(ConL);
 4203   op_cost(0);
 4204   format %{ %}
 4205   interface(CONST_INTER);
 4206 %}
 4207 
 4208 // Long Immediate: 34-bit, immediate field in prefixed addi and load/store.
 4209 operand immL34() %{
 4210   predicate(PowerArchitecturePPC64 >= 10 && Assembler::is_simm(n->get_long(), 34));
 4211   match(ConL);
 4212   op_cost(0);
 4213   format %{ %}
 4214   interface(CONST_INTER);
 4215 %}
 4216 
 4217 // Long Immediate: 64-bit, where highest 16 bits are not 0x0000.
 4218 operand immLhighest16() %{
 4219   predicate((n->get_long() & 0xffff000000000000L) != 0L && (n->get_long() & 0x0000ffffffffffffL) == 0L);
 4220   match(ConL);
 4221   op_cost(0);
 4222   format %{ %}
 4223   interface(CONST_INTER);
 4224 %}
 4225 
 4226 operand immLnegpow2() %{
 4227   predicate(is_power_of_2(-(julong)(n->get_long())));
 4228   match(ConL);
 4229   op_cost(0);
 4230   format %{ %}
 4231   interface(CONST_INTER);
 4232 %}
 4233 
 4234 operand immLpow2minus1() %{
 4235   predicate(is_power_of_2((julong)(n->get_long()) + 1ull));
 4236   match(ConL);
 4237   op_cost(0);
 4238   format %{ %}
 4239   interface(CONST_INTER);
 4240 %}
 4241 
 4242 // constant 'long 0'.
 4243 operand immL_0() %{
 4244   predicate(n->get_long() == 0L);
 4245   match(ConL);
 4246   op_cost(0);
 4247   format %{ %}
 4248   interface(CONST_INTER);
 4249 %}
 4250 
 4251 // constat ' long -1'.
 4252 operand immL_minus1() %{
 4253   predicate(n->get_long() == -1L);
 4254   match(ConL);
 4255   op_cost(0);
 4256   format %{ %}
 4257   interface(CONST_INTER);
 4258 %}
 4259 
 4260 // Long Immediate: low 32-bit mask
 4261 operand immL_32bits() %{
 4262   predicate(n->get_long() == 0xFFFFFFFFL);
 4263   match(ConL);
 4264   op_cost(0);
 4265   format %{ %}
 4266   interface(CONST_INTER);
 4267 %}
 4268 
 4269 // Unsigned Long Immediate: 16-bit
 4270 operand uimmL16() %{
 4271   predicate(Assembler::is_uimm(n->get_long(), 16));
 4272   match(ConL);
 4273   op_cost(0);
 4274   format %{ %}
 4275   interface(CONST_INTER);
 4276 %}
 4277 
 4278 // Float Immediate
 4279 operand immF() %{
 4280   match(ConF);
 4281   op_cost(40);
 4282   format %{ %}
 4283   interface(CONST_INTER);
 4284 %}
 4285 
 4286 // Float Immediate: +0.0f.
 4287 operand immF_0() %{
 4288   predicate(jint_cast(n->getf()) == 0);
 4289   match(ConF);
 4290 
 4291   op_cost(0);
 4292   format %{ %}
 4293   interface(CONST_INTER);
 4294 %}
 4295 
 4296 // Double Immediate
 4297 operand immD() %{
 4298   match(ConD);
 4299   op_cost(40);
 4300   format %{ %}
 4301   interface(CONST_INTER);
 4302 %}
 4303 
 4304 // Double Immediate: +0.0d.
 4305 operand immD_0() %{
 4306   predicate(jlong_cast(n->getd()) == 0);
 4307   match(ConD);
 4308 
 4309   op_cost(0);
 4310   format %{ %}
 4311   interface(CONST_INTER);
 4312 %}
 4313 
 4314 // Integer Register Operands
 4315 // Integer Destination Register
 4316 // See definition of reg_class bits32_reg_rw.
 4317 operand iRegIdst() %{
 4318   constraint(ALLOC_IN_RC(bits32_reg_rw));
 4319   match(RegI);
 4320   match(rscratch1RegI);
 4321   match(rscratch2RegI);
 4322   match(rarg1RegI);
 4323   match(rarg2RegI);
 4324   match(rarg3RegI);
 4325   match(rarg4RegI);
 4326   format %{ %}
 4327   interface(REG_INTER);
 4328 %}
 4329 
 4330 // Integer Source Register
 4331 // See definition of reg_class bits32_reg_ro.
 4332 operand iRegIsrc() %{
 4333   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4334   match(RegI);
 4335   match(rscratch1RegI);
 4336   match(rscratch2RegI);
 4337   match(rarg1RegI);
 4338   match(rarg2RegI);
 4339   match(rarg3RegI);
 4340   match(rarg4RegI);
 4341   format %{ %}
 4342   interface(REG_INTER);
 4343 %}
 4344 
 4345 operand rscratch1RegI() %{
 4346   constraint(ALLOC_IN_RC(rscratch1_bits32_reg));
 4347   match(iRegIdst);
 4348   format %{ %}
 4349   interface(REG_INTER);
 4350 %}
 4351 
 4352 operand rscratch2RegI() %{
 4353   constraint(ALLOC_IN_RC(rscratch2_bits32_reg));
 4354   match(iRegIdst);
 4355   format %{ %}
 4356   interface(REG_INTER);
 4357 %}
 4358 
 4359 operand rarg1RegI() %{
 4360   constraint(ALLOC_IN_RC(rarg1_bits32_reg));
 4361   match(iRegIdst);
 4362   format %{ %}
 4363   interface(REG_INTER);
 4364 %}
 4365 
 4366 operand rarg2RegI() %{
 4367   constraint(ALLOC_IN_RC(rarg2_bits32_reg));
 4368   match(iRegIdst);
 4369   format %{ %}
 4370   interface(REG_INTER);
 4371 %}
 4372 
 4373 operand rarg3RegI() %{
 4374   constraint(ALLOC_IN_RC(rarg3_bits32_reg));
 4375   match(iRegIdst);
 4376   format %{ %}
 4377   interface(REG_INTER);
 4378 %}
 4379 
 4380 operand rarg4RegI() %{
 4381   constraint(ALLOC_IN_RC(rarg4_bits32_reg));
 4382   match(iRegIdst);
 4383   format %{ %}
 4384   interface(REG_INTER);
 4385 %}
 4386 
 4387 operand rarg1RegL() %{
 4388   constraint(ALLOC_IN_RC(rarg1_bits64_reg));
 4389   match(iRegLdst);
 4390   format %{ %}
 4391   interface(REG_INTER);
 4392 %}
 4393 
 4394 operand rarg2RegL() %{
 4395   constraint(ALLOC_IN_RC(rarg2_bits64_reg));
 4396   match(iRegLdst);
 4397   format %{ %}
 4398   interface(REG_INTER);
 4399 %}
 4400 
 4401 operand rarg3RegL() %{
 4402   constraint(ALLOC_IN_RC(rarg3_bits64_reg));
 4403   match(iRegLdst);
 4404   format %{ %}
 4405   interface(REG_INTER);
 4406 %}
 4407 
 4408 operand rarg4RegL() %{
 4409   constraint(ALLOC_IN_RC(rarg4_bits64_reg));
 4410   match(iRegLdst);
 4411   format %{ %}
 4412   interface(REG_INTER);
 4413 %}
 4414 
 4415 // Pointer Destination Register
 4416 // See definition of reg_class bits64_reg_rw.
 4417 operand iRegPdst() %{
 4418   constraint(ALLOC_IN_RC(bits64_reg_rw));
 4419   match(RegP);
 4420   match(rscratch1RegP);
 4421   match(rscratch2RegP);
 4422   match(rarg1RegP);
 4423   match(rarg2RegP);
 4424   match(rarg3RegP);
 4425   match(rarg4RegP);
 4426   format %{ %}
 4427   interface(REG_INTER);
 4428 %}
 4429 
 4430 // Pointer Destination Register
 4431 // Operand not using r11 and r12 (killed in epilog).
 4432 operand iRegPdstNoScratch() %{
 4433   constraint(ALLOC_IN_RC(bits64_reg_leaf_call));
 4434   match(RegP);
 4435   match(rarg1RegP);
 4436   match(rarg2RegP);
 4437   match(rarg3RegP);
 4438   match(rarg4RegP);
 4439   format %{ %}
 4440   interface(REG_INTER);
 4441 %}
 4442 
 4443 // Pointer Source Register
 4444 // See definition of reg_class bits64_reg_ro.
 4445 operand iRegPsrc() %{
 4446   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4447   match(RegP);
 4448   match(iRegPdst);
 4449   match(rscratch1RegP);
 4450   match(rscratch2RegP);
 4451   match(rarg1RegP);
 4452   match(rarg2RegP);
 4453   match(rarg3RegP);
 4454   match(rarg4RegP);
 4455   match(threadRegP);
 4456   format %{ %}
 4457   interface(REG_INTER);
 4458 %}
 4459 
 4460 // Thread operand.
 4461 operand threadRegP() %{
 4462   constraint(ALLOC_IN_RC(thread_bits64_reg));
 4463   match(iRegPdst);
 4464   format %{ "R16" %}
 4465   interface(REG_INTER);
 4466 %}
 4467 
 4468 operand rscratch1RegP() %{
 4469   constraint(ALLOC_IN_RC(rscratch1_bits64_reg));
 4470   match(iRegPdst);
 4471   format %{ "R11" %}
 4472   interface(REG_INTER);
 4473 %}
 4474 
 4475 operand rscratch2RegP() %{
 4476   constraint(ALLOC_IN_RC(rscratch2_bits64_reg));
 4477   match(iRegPdst);
 4478   format %{ %}
 4479   interface(REG_INTER);
 4480 %}
 4481 
 4482 operand rarg1RegP() %{
 4483   constraint(ALLOC_IN_RC(rarg1_bits64_reg));
 4484   match(iRegPdst);
 4485   format %{ %}
 4486   interface(REG_INTER);
 4487 %}
 4488 
 4489 operand rarg2RegP() %{
 4490   constraint(ALLOC_IN_RC(rarg2_bits64_reg));
 4491   match(iRegPdst);
 4492   format %{ %}
 4493   interface(REG_INTER);
 4494 %}
 4495 
 4496 operand rarg3RegP() %{
 4497   constraint(ALLOC_IN_RC(rarg3_bits64_reg));
 4498   match(iRegPdst);
 4499   format %{ %}
 4500   interface(REG_INTER);
 4501 %}
 4502 
 4503 operand rarg4RegP() %{
 4504   constraint(ALLOC_IN_RC(rarg4_bits64_reg));
 4505   match(iRegPdst);
 4506   format %{ %}
 4507   interface(REG_INTER);
 4508 %}
 4509 
 4510 operand iRegNsrc() %{
 4511   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4512   match(RegN);
 4513   match(iRegNdst);
 4514 
 4515   format %{ %}
 4516   interface(REG_INTER);
 4517 %}
 4518 
 4519 operand iRegNdst() %{
 4520   constraint(ALLOC_IN_RC(bits32_reg_rw));
 4521   match(RegN);
 4522 
 4523   format %{ %}
 4524   interface(REG_INTER);
 4525 %}
 4526 
 4527 // Long Destination Register
 4528 // See definition of reg_class bits64_reg_rw.
 4529 operand iRegLdst() %{
 4530   constraint(ALLOC_IN_RC(bits64_reg_rw));
 4531   match(RegL);
 4532   match(rscratch1RegL);
 4533   match(rscratch2RegL);
 4534   format %{ %}
 4535   interface(REG_INTER);
 4536 %}
 4537 
 4538 // Long Source Register
 4539 // See definition of reg_class bits64_reg_ro.
 4540 operand iRegLsrc() %{
 4541   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4542   match(RegL);
 4543   match(iRegLdst);
 4544   match(rscratch1RegL);
 4545   match(rscratch2RegL);
 4546   format %{ %}
 4547   interface(REG_INTER);
 4548 %}
 4549 
 4550 // Special operand for ConvL2I.
 4551 operand iRegL2Isrc(iRegLsrc reg) %{
 4552   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4553   match(ConvL2I reg);
 4554   format %{ "ConvL2I($reg)" %}
 4555   interface(REG_INTER)
 4556 %}
 4557 
 4558 operand rscratch1RegL() %{
 4559   constraint(ALLOC_IN_RC(rscratch1_bits64_reg));
 4560   match(RegL);
 4561   format %{ %}
 4562   interface(REG_INTER);
 4563 %}
 4564 
 4565 operand rscratch2RegL() %{
 4566   constraint(ALLOC_IN_RC(rscratch2_bits64_reg));
 4567   match(RegL);
 4568   format %{ %}
 4569   interface(REG_INTER);
 4570 %}
 4571 
 4572 // Condition Code Flag Registers
 4573 operand flagsReg() %{
 4574   constraint(ALLOC_IN_RC(int_flags));
 4575   match(RegFlags);
 4576   format %{ %}
 4577   interface(REG_INTER);
 4578 %}
 4579 
 4580 operand flagsRegSrc() %{
 4581   constraint(ALLOC_IN_RC(int_flags_ro));
 4582   match(RegFlags);
 4583   match(flagsReg);
 4584   match(flagsRegCR0);
 4585   format %{ %}
 4586   interface(REG_INTER);
 4587 %}
 4588 
 4589 // Condition Code Flag Register CR0
 4590 operand flagsRegCR0() %{
 4591   constraint(ALLOC_IN_RC(int_flags_CR0));
 4592   match(RegFlags);
 4593   format %{ "CR0" %}
 4594   interface(REG_INTER);
 4595 %}
 4596 
 4597 operand flagsRegCR1() %{
 4598   constraint(ALLOC_IN_RC(int_flags_CR1));
 4599   match(RegFlags);
 4600   format %{ "CR1" %}
 4601   interface(REG_INTER);
 4602 %}
 4603 
 4604 operand flagsRegCR6() %{
 4605   constraint(ALLOC_IN_RC(int_flags_CR6));
 4606   match(RegFlags);
 4607   format %{ "CR6" %}
 4608   interface(REG_INTER);
 4609 %}
 4610 
 4611 operand regCTR() %{
 4612   constraint(ALLOC_IN_RC(ctr_reg));
 4613   // RegFlags should work. Introducing a RegSpecial type would cause a
 4614   // lot of changes.
 4615   match(RegFlags);
 4616   format %{"SR_CTR" %}
 4617   interface(REG_INTER);
 4618 %}
 4619 
 4620 operand regD() %{
 4621   constraint(ALLOC_IN_RC(dbl_reg));
 4622   match(RegD);
 4623   format %{ %}
 4624   interface(REG_INTER);
 4625 %}
 4626 
 4627 operand regF() %{
 4628   constraint(ALLOC_IN_RC(flt_reg));
 4629   match(RegF);
 4630   format %{ %}
 4631   interface(REG_INTER);
 4632 %}
 4633 
 4634 // Special Registers
 4635 
 4636 // Method Register
 4637 operand inline_cache_regP(iRegPdst reg) %{
 4638   constraint(ALLOC_IN_RC(r19_bits64_reg)); // inline_cache_reg
 4639   match(reg);
 4640   format %{ %}
 4641   interface(REG_INTER);
 4642 %}
 4643 
 4644 // Operands to remove register moves in unscaled mode.
 4645 // Match read/write registers with an EncodeP node if neither shift nor add are required.
 4646 operand iRegP2N(iRegPsrc reg) %{
 4647   predicate(false /* TODO: PPC port MatchDecodeNodes*/&& CompressedOops::shift() == 0);
 4648   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4649   match(EncodeP reg);
 4650   format %{ "$reg" %}
 4651   interface(REG_INTER)
 4652 %}
 4653 
 4654 operand iRegN2P(iRegNsrc reg) %{
 4655   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4656   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4657   match(DecodeN reg);
 4658   format %{ "$reg" %}
 4659   interface(REG_INTER)
 4660 %}
 4661 
 4662 operand iRegN2P_klass(iRegNsrc reg) %{
 4663   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
 4664   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4665   match(DecodeNKlass reg);
 4666   format %{ "$reg" %}
 4667   interface(REG_INTER)
 4668 %}
 4669 
 4670 //----------Complex Operands---------------------------------------------------
 4671 // Indirect Memory Reference
 4672 operand indirect(iRegPsrc reg) %{
 4673   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4674   match(reg);
 4675   op_cost(100);
 4676   format %{ "[$reg]" %}
 4677   interface(MEMORY_INTER) %{
 4678     base($reg);
 4679     index(0x0);
 4680     scale(0x0);
 4681     disp(0x0);
 4682   %}
 4683 %}
 4684 
 4685 // Indirect with Offset
 4686 operand indOffset16(iRegPsrc reg, immL16 offset) %{
 4687   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4688   match(AddP reg offset);
 4689   op_cost(100);
 4690   format %{ "[$reg + $offset]" %}
 4691   interface(MEMORY_INTER) %{
 4692     base($reg);
 4693     index(0x0);
 4694     scale(0x0);
 4695     disp($offset);
 4696   %}
 4697 %}
 4698 
 4699 // Indirect with 4-aligned Offset
 4700 operand indOffset16Alg4(iRegPsrc reg, immL16Alg4 offset) %{
 4701   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4702   match(AddP reg offset);
 4703   op_cost(100);
 4704   format %{ "[$reg + $offset]" %}
 4705   interface(MEMORY_INTER) %{
 4706     base($reg);
 4707     index(0x0);
 4708     scale(0x0);
 4709     disp($offset);
 4710   %}
 4711 %}
 4712 
 4713 //----------Complex Operands for Compressed OOPs-------------------------------
 4714 // Compressed OOPs with narrow_oop_shift == 0.
 4715 
 4716 // Indirect Memory Reference, compressed OOP
 4717 operand indirectNarrow(iRegNsrc reg) %{
 4718   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4719   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4720   match(DecodeN reg);
 4721   op_cost(100);
 4722   format %{ "[$reg]" %}
 4723   interface(MEMORY_INTER) %{
 4724     base($reg);
 4725     index(0x0);
 4726     scale(0x0);
 4727     disp(0x0);
 4728   %}
 4729 %}
 4730 
 4731 operand indirectNarrow_klass(iRegNsrc reg) %{
 4732   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
 4733   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4734   match(DecodeNKlass reg);
 4735   op_cost(100);
 4736   format %{ "[$reg]" %}
 4737   interface(MEMORY_INTER) %{
 4738     base($reg);
 4739     index(0x0);
 4740     scale(0x0);
 4741     disp(0x0);
 4742   %}
 4743 %}
 4744 
 4745 // Indirect with Offset, compressed OOP
 4746 operand indOffset16Narrow(iRegNsrc reg, immL16 offset) %{
 4747   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4748   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4749   match(AddP (DecodeN reg) offset);
 4750   op_cost(100);
 4751   format %{ "[$reg + $offset]" %}
 4752   interface(MEMORY_INTER) %{
 4753     base($reg);
 4754     index(0x0);
 4755     scale(0x0);
 4756     disp($offset);
 4757   %}
 4758 %}
 4759 
 4760 operand indOffset16Narrow_klass(iRegNsrc reg, immL16 offset) %{
 4761   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
 4762   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4763   match(AddP (DecodeNKlass reg) offset);
 4764   op_cost(100);
 4765   format %{ "[$reg + $offset]" %}
 4766   interface(MEMORY_INTER) %{
 4767     base($reg);
 4768     index(0x0);
 4769     scale(0x0);
 4770     disp($offset);
 4771   %}
 4772 %}
 4773 
 4774 // Indirect with 4-aligned Offset, compressed OOP
 4775 operand indOffset16NarrowAlg4(iRegNsrc reg, immL16Alg4 offset) %{
 4776   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4777   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4778   match(AddP (DecodeN reg) offset);
 4779   op_cost(100);
 4780   format %{ "[$reg + $offset]" %}
 4781   interface(MEMORY_INTER) %{
 4782     base($reg);
 4783     index(0x0);
 4784     scale(0x0);
 4785     disp($offset);
 4786   %}
 4787 %}
 4788 
 4789 operand indOffset16NarrowAlg4_klass(iRegNsrc reg, immL16Alg4 offset) %{
 4790   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
 4791   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4792   match(AddP (DecodeNKlass reg) offset);
 4793   op_cost(100);
 4794   format %{ "[$reg + $offset]" %}
 4795   interface(MEMORY_INTER) %{
 4796     base($reg);
 4797     index(0x0);
 4798     scale(0x0);
 4799     disp($offset);
 4800   %}
 4801 %}
 4802 
 4803 //----------Special Memory Operands--------------------------------------------
 4804 // Stack Slot Operand
 4805 //
 4806 // This operand is used for loading and storing temporary values on
 4807 // the stack where a match requires a value to flow through memory.
 4808 operand stackSlotI(sRegI reg) %{
 4809   constraint(ALLOC_IN_RC(stack_slots));
 4810   op_cost(100);
 4811   //match(RegI);
 4812   format %{ "[sp+$reg]" %}
 4813   interface(MEMORY_INTER) %{
 4814     base(0x1);   // R1_SP
 4815     index(0x0);
 4816     scale(0x0);
 4817     disp($reg);  // Stack Offset
 4818   %}
 4819 %}
 4820 
 4821 operand stackSlotL(sRegL reg) %{
 4822   constraint(ALLOC_IN_RC(stack_slots));
 4823   op_cost(100);
 4824   //match(RegL);
 4825   format %{ "[sp+$reg]" %}
 4826   interface(MEMORY_INTER) %{
 4827     base(0x1);   // R1_SP
 4828     index(0x0);
 4829     scale(0x0);
 4830     disp($reg);  // Stack Offset
 4831   %}
 4832 %}
 4833 
 4834 operand stackSlotP(sRegP reg) %{
 4835   constraint(ALLOC_IN_RC(stack_slots));
 4836   op_cost(100);
 4837   //match(RegP);
 4838   format %{ "[sp+$reg]" %}
 4839   interface(MEMORY_INTER) %{
 4840     base(0x1);   // R1_SP
 4841     index(0x0);
 4842     scale(0x0);
 4843     disp($reg);  // Stack Offset
 4844   %}
 4845 %}
 4846 
 4847 operand stackSlotF(sRegF reg) %{
 4848   constraint(ALLOC_IN_RC(stack_slots));
 4849   op_cost(100);
 4850   //match(RegF);
 4851   format %{ "[sp+$reg]" %}
 4852   interface(MEMORY_INTER) %{
 4853     base(0x1);   // R1_SP
 4854     index(0x0);
 4855     scale(0x0);
 4856     disp($reg);  // Stack Offset
 4857   %}
 4858 %}
 4859 
 4860 operand stackSlotD(sRegD reg) %{
 4861   constraint(ALLOC_IN_RC(stack_slots));
 4862   op_cost(100);
 4863   //match(RegD);
 4864   format %{ "[sp+$reg]" %}
 4865   interface(MEMORY_INTER) %{
 4866     base(0x1);   // R1_SP
 4867     index(0x0);
 4868     scale(0x0);
 4869     disp($reg);  // Stack Offset
 4870   %}
 4871 %}
 4872 
 4873 // Operands for expressing Control Flow
 4874 // NOTE: Label is a predefined operand which should not be redefined in
 4875 //       the AD file. It is generically handled within the ADLC.
 4876 
 4877 //----------Conditional Branch Operands----------------------------------------
 4878 // Comparison Op
 4879 //
 4880 // This is the operation of the comparison, and is limited to the
 4881 // following set of codes: L (<), LE (<=), G (>), GE (>=), E (==), NE
 4882 // (!=).
 4883 //
 4884 // Other attributes of the comparison, such as unsignedness, are specified
 4885 // by the comparison instruction that sets a condition code flags register.
 4886 // That result is represented by a flags operand whose subtype is appropriate
 4887 // to the unsignedness (etc.) of the comparison.
 4888 //
 4889 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4890 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4891 // by matching a specific subtype of Bool operand below.
 4892 
 4893 // When used for floating point comparisons: unordered same as less.
 4894 operand cmpOp() %{
 4895   match(Bool);
 4896   format %{ "" %}
 4897   interface(COND_INTER) %{
 4898                            // BO only encodes bit 4 of bcondCRbiIsX, as bits 1-3 are always '100'.
 4899                            //           BO          &  BI
 4900     equal(0xA);            // 10 10:   bcondCRbiIs1 & Condition::equal
 4901     not_equal(0x2);        // 00 10:   bcondCRbiIs0 & Condition::equal
 4902     less(0x8);             // 10 00:   bcondCRbiIs1 & Condition::less
 4903     greater_equal(0x0);    // 00 00:   bcondCRbiIs0 & Condition::less
 4904     less_equal(0x1);       // 00 01:   bcondCRbiIs0 & Condition::greater
 4905     greater(0x9);          // 10 01:   bcondCRbiIs1 & Condition::greater
 4906     overflow(0xB);         // 10 11:   bcondCRbiIs1 & Condition::summary_overflow
 4907     no_overflow(0x3);      // 00 11:   bcondCRbiIs0 & Condition::summary_overflow
 4908   %}
 4909 %}
 4910 
 4911 //----------OPERAND CLASSES----------------------------------------------------
 4912 // Operand Classes are groups of operands that are used to simplify
 4913 // instruction definitions by not requiring the AD writer to specify
 4914 // seperate instructions for every form of operand when the
 4915 // instruction accepts multiple operand types with the same basic
 4916 // encoding and format. The classic case of this is memory operands.
 4917 // Indirect is not included since its use is limited to Compare & Swap.
 4918 
 4919 opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indirectNarrow_klass, indOffset16Narrow, indOffset16Narrow_klass);
 4920 // Memory operand where offsets are 4-aligned. Required for ld, std.
 4921 opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4, indOffset16NarrowAlg4_klass);
 4922 opclass indirectMemory(indirect, indirectNarrow);
 4923 
 4924 // Special opclass for I and ConvL2I.
 4925 opclass iRegIsrc_iRegL2Isrc(iRegIsrc, iRegL2Isrc);
 4926 
 4927 // Operand classes to match encode and decode. iRegN_P2N is only used
 4928 // for storeN. I have never seen an encode node elsewhere.
 4929 opclass iRegN_P2N(iRegNsrc, iRegP2N);
 4930 opclass iRegP_N2P(iRegPsrc, iRegN2P, iRegN2P_klass);
 4931 
 4932 //----------PIPELINE-----------------------------------------------------------
 4933 
 4934 pipeline %{
 4935 
 4936 // See J.M.Tendler et al. "Power4 system microarchitecture", IBM
 4937 // J. Res. & Dev., No. 1, Jan. 2002.
 4938 
 4939 //----------ATTRIBUTES---------------------------------------------------------
 4940 attributes %{
 4941 
 4942   // Power4 instructions are of fixed length.
 4943   fixed_size_instructions;
 4944 
 4945   // TODO: if `bundle' means number of instructions fetched
 4946   // per cycle, this is 8. If `bundle' means Power4 `group', that is
 4947   // max instructions issued per cycle, this is 5.
 4948   max_instructions_per_bundle = 8;
 4949 
 4950   // A Power4 instruction is 4 bytes long.
 4951   instruction_unit_size = 4;
 4952 
 4953   // The Power4 processor fetches 64 bytes...
 4954   instruction_fetch_unit_size = 64;
 4955 
 4956   // ...in one line
 4957   instruction_fetch_units = 1
 4958 
 4959   // Unused, list one so that array generated by adlc is not empty.
 4960   // Aix compiler chokes if _nop_count = 0.
 4961   nops(fxNop);
 4962 %}
 4963 
 4964 //----------RESOURCES----------------------------------------------------------
 4965 // Resources are the functional units available to the machine
 4966 resources(
 4967    PPC_BR,         // branch unit
 4968    PPC_CR,         // condition unit
 4969    PPC_FX1,        // integer arithmetic unit 1
 4970    PPC_FX2,        // integer arithmetic unit 2
 4971    PPC_LDST1,      // load/store unit 1
 4972    PPC_LDST2,      // load/store unit 2
 4973    PPC_FP1,        // float arithmetic unit 1
 4974    PPC_FP2,        // float arithmetic unit 2
 4975    PPC_LDST = PPC_LDST1 | PPC_LDST2,
 4976    PPC_FX = PPC_FX1 | PPC_FX2,
 4977    PPC_FP = PPC_FP1 | PPC_FP2
 4978  );
 4979 
 4980 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4981 // Pipeline Description specifies the stages in the machine's pipeline
 4982 pipe_desc(
 4983    // Power4 longest pipeline path
 4984    PPC_IF,   // instruction fetch
 4985    PPC_IC,
 4986    //PPC_BP, // branch prediction
 4987    PPC_D0,   // decode
 4988    PPC_D1,   // decode
 4989    PPC_D2,   // decode
 4990    PPC_D3,   // decode
 4991    PPC_Xfer1,
 4992    PPC_GD,   // group definition
 4993    PPC_MP,   // map
 4994    PPC_ISS,  // issue
 4995    PPC_RF,   // resource fetch
 4996    PPC_EX1,  // execute (all units)
 4997    PPC_EX2,  // execute (FP, LDST)
 4998    PPC_EX3,  // execute (FP, LDST)
 4999    PPC_EX4,  // execute (FP)
 5000    PPC_EX5,  // execute (FP)
 5001    PPC_EX6,  // execute (FP)
 5002    PPC_WB,   // write back
 5003    PPC_Xfer2,
 5004    PPC_CP
 5005  );
 5006 
 5007 //----------PIPELINE CLASSES---------------------------------------------------
 5008 // Pipeline Classes describe the stages in which input and output are
 5009 // referenced by the hardware pipeline.
 5010 
 5011 // Simple pipeline classes.
 5012 
 5013 // Default pipeline class.
 5014 pipe_class pipe_class_default() %{
 5015   single_instruction;
 5016   fixed_latency(2);
 5017 %}
 5018 
 5019 // Pipeline class for empty instructions.
 5020 pipe_class pipe_class_empty() %{
 5021   single_instruction;
 5022   fixed_latency(0);
 5023 %}
 5024 
 5025 // Pipeline class for compares.
 5026 pipe_class pipe_class_compare() %{
 5027   single_instruction;
 5028   fixed_latency(16);
 5029 %}
 5030 
 5031 // Pipeline class for traps.
 5032 pipe_class pipe_class_trap() %{
 5033   single_instruction;
 5034   fixed_latency(100);
 5035 %}
 5036 
 5037 // Pipeline class for memory operations.
 5038 pipe_class pipe_class_memory() %{
 5039   single_instruction;
 5040   fixed_latency(16);
 5041 %}
 5042 
 5043 // Pipeline class for call.
 5044 pipe_class pipe_class_call() %{
 5045   single_instruction;
 5046   fixed_latency(100);
 5047 %}
 5048 
 5049 // Define the class for the Nop node.
 5050 define %{
 5051    MachNop = pipe_class_default;
 5052 %}
 5053 
 5054 %}
 5055 
 5056 //----------INSTRUCTIONS-------------------------------------------------------
 5057 
 5058 // Naming of instructions:
 5059 //   opA_operB / opA_operB_operC:
 5060 //     Operation 'op' with one or two source operands 'oper'. Result
 5061 //     type is A, source operand types are B and C.
 5062 //     Iff A == B == C, B and C are left out.
 5063 //
 5064 // The instructions are ordered according to the following scheme:
 5065 //  - loads
 5066 //  - load constants
 5067 //  - prefetch
 5068 //  - store
 5069 //  - encode/decode
 5070 //  - membar
 5071 //  - conditional moves
 5072 //  - compare & swap
 5073 //  - arithmetic and logic operations
 5074 //    * int: Add, Sub, Mul, Div, Mod
 5075 //    * int: lShift, arShift, urShift, rot
 5076 //    * float: Add, Sub, Mul, Div
 5077 //    * and, or, xor ...
 5078 //  - register moves: float <-> int, reg <-> stack, repl
 5079 //  - cast (high level type cast, XtoP, castPP, castII, not_null etc.
 5080 //  - conv (low level type cast requiring bit changes (sign extend etc)
 5081 //  - compares, range & zero checks.
 5082 //  - branches
 5083 //  - complex operations, intrinsics, min, max, replicate
 5084 //  - lock
 5085 //  - Calls
 5086 //
 5087 // If there are similar instructions with different types they are sorted:
 5088 // int before float
 5089 // small before big
 5090 // signed before unsigned
 5091 // e.g., loadS before loadUS before loadI before loadF.
 5092 
 5093 
 5094 //----------Load/Store Instructions--------------------------------------------
 5095 
 5096 //----------Load Instructions--------------------------------------------------
 5097 
 5098 // Converts byte to int.
 5099 // As convB2I_reg, but without match rule.  The match rule of convB2I_reg
 5100 // reuses the 'amount' operand, but adlc expects that operand specification
 5101 // and operands in match rule are equivalent.
 5102 instruct convB2I_reg_2(iRegIdst dst, iRegIsrc src) %{
 5103   effect(DEF dst, USE src);
 5104   format %{ "EXTSB   $dst, $src \t// byte->int" %}
 5105   size(4);
 5106   ins_encode %{
 5107     __ extsb($dst$$Register, $src$$Register);
 5108   %}
 5109   ins_pipe(pipe_class_default);
 5110 %}
 5111 
 5112 instruct loadUB_indirect(iRegIdst dst, indirectMemory mem) %{
 5113   // match-rule, false predicate
 5114   match(Set dst (LoadB mem));
 5115   predicate(false);
 5116 
 5117   format %{ "LBZ     $dst, $mem" %}
 5118   size(4);
 5119   ins_encode( enc_lbz(dst, mem) );
 5120   ins_pipe(pipe_class_memory);
 5121 %}
 5122 
 5123 instruct loadUB_indirect_ac(iRegIdst dst, indirectMemory mem) %{
 5124   // match-rule, false predicate
 5125   match(Set dst (LoadB mem));
 5126   predicate(false);
 5127 
 5128   format %{ "LBZ     $dst, $mem\n\t"
 5129             "TWI     $dst\n\t"
 5130             "ISYNC" %}
 5131   size(12);
 5132   ins_encode( enc_lbz_ac(dst, mem) );
 5133   ins_pipe(pipe_class_memory);
 5134 %}
 5135 
 5136 // Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B.
 5137 instruct loadB_indirect_Ex(iRegIdst dst, indirectMemory mem) %{
 5138   match(Set dst (LoadB mem));
 5139   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5140   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
 5141   expand %{
 5142     iRegIdst tmp;
 5143     loadUB_indirect(tmp, mem);
 5144     convB2I_reg_2(dst, tmp);
 5145   %}
 5146 %}
 5147 
 5148 instruct loadB_indirect_ac_Ex(iRegIdst dst, indirectMemory mem) %{
 5149   match(Set dst (LoadB mem));
 5150   ins_cost(3*MEMORY_REF_COST + DEFAULT_COST);
 5151   expand %{
 5152     iRegIdst tmp;
 5153     loadUB_indirect_ac(tmp, mem);
 5154     convB2I_reg_2(dst, tmp);
 5155   %}
 5156 %}
 5157 
 5158 instruct loadUB_indOffset16(iRegIdst dst, indOffset16 mem) %{
 5159   // match-rule, false predicate
 5160   match(Set dst (LoadB mem));
 5161   predicate(false);
 5162 
 5163   format %{ "LBZ     $dst, $mem" %}
 5164   size(4);
 5165   ins_encode( enc_lbz(dst, mem) );
 5166   ins_pipe(pipe_class_memory);
 5167 %}
 5168 
 5169 instruct loadUB_indOffset16_ac(iRegIdst dst, indOffset16 mem) %{
 5170   // match-rule, false predicate
 5171   match(Set dst (LoadB mem));
 5172   predicate(false);
 5173 
 5174   format %{ "LBZ     $dst, $mem\n\t"
 5175             "TWI     $dst\n\t"
 5176             "ISYNC" %}
 5177   size(12);
 5178   ins_encode( enc_lbz_ac(dst, mem) );
 5179   ins_pipe(pipe_class_memory);
 5180 %}
 5181 
 5182 // Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B.
 5183 instruct loadB_indOffset16_Ex(iRegIdst dst, indOffset16 mem) %{
 5184   match(Set dst (LoadB mem));
 5185   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5186   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
 5187 
 5188   expand %{
 5189     iRegIdst tmp;
 5190     loadUB_indOffset16(tmp, mem);
 5191     convB2I_reg_2(dst, tmp);
 5192   %}
 5193 %}
 5194 
 5195 instruct loadB_indOffset16_ac_Ex(iRegIdst dst, indOffset16 mem) %{
 5196   match(Set dst (LoadB mem));
 5197   ins_cost(3*MEMORY_REF_COST + DEFAULT_COST);
 5198 
 5199   expand %{
 5200     iRegIdst tmp;
 5201     loadUB_indOffset16_ac(tmp, mem);
 5202     convB2I_reg_2(dst, tmp);
 5203   %}
 5204 %}
 5205 
 5206 // Load Unsigned Byte (8bit UNsigned) into an int reg.
 5207 instruct loadUB(iRegIdst dst, memory mem) %{
 5208   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5209   match(Set dst (LoadUB mem));
 5210   ins_cost(MEMORY_REF_COST);
 5211 
 5212   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to int" %}
 5213   size(4);
 5214   ins_encode( enc_lbz(dst, mem) );
 5215   ins_pipe(pipe_class_memory);
 5216 %}
 5217 
 5218 // Load  Unsigned Byte (8bit UNsigned) acquire.
 5219 instruct loadUB_ac(iRegIdst dst, memory mem) %{
 5220   match(Set dst (LoadUB mem));
 5221   ins_cost(3*MEMORY_REF_COST);
 5222 
 5223   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to int, acquire\n\t"
 5224             "TWI     $dst\n\t"
 5225             "ISYNC" %}
 5226   size(12);
 5227   ins_encode( enc_lbz_ac(dst, mem) );
 5228   ins_pipe(pipe_class_memory);
 5229 %}
 5230 
 5231 // Load Unsigned Byte (8bit UNsigned) into a Long Register.
 5232 instruct loadUB2L(iRegLdst dst, memory mem) %{
 5233   match(Set dst (ConvI2L (LoadUB mem)));
 5234   predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf));
 5235   ins_cost(MEMORY_REF_COST);
 5236 
 5237   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to long" %}
 5238   size(4);
 5239   ins_encode( enc_lbz(dst, mem) );
 5240   ins_pipe(pipe_class_memory);
 5241 %}
 5242 
 5243 instruct loadUB2L_ac(iRegLdst dst, memory mem) %{
 5244   match(Set dst (ConvI2L (LoadUB mem)));
 5245   ins_cost(3*MEMORY_REF_COST);
 5246 
 5247   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to long, acquire\n\t"
 5248             "TWI     $dst\n\t"
 5249             "ISYNC" %}
 5250   size(12);
 5251   ins_encode( enc_lbz_ac(dst, mem) );
 5252   ins_pipe(pipe_class_memory);
 5253 %}
 5254 
 5255 // Load Short (16bit signed)
 5256 instruct loadS(iRegIdst dst, memory mem) %{
 5257   match(Set dst (LoadS mem));
 5258   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5259   ins_cost(MEMORY_REF_COST);
 5260 
 5261   format %{ "LHA     $dst, $mem" %}
 5262   size(4);
 5263   ins_encode %{
 5264     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5265     __ lha($dst$$Register, Idisp, $mem$$base$$Register);
 5266   %}
 5267   ins_pipe(pipe_class_memory);
 5268 %}
 5269 
 5270 // Load Short (16bit signed) acquire.
 5271 instruct loadS_ac(iRegIdst dst, memory mem) %{
 5272   match(Set dst (LoadS mem));
 5273   ins_cost(3*MEMORY_REF_COST);
 5274 
 5275   format %{ "LHA     $dst, $mem\t acquire\n\t"
 5276             "TWI     $dst\n\t"
 5277             "ISYNC" %}
 5278   size(12);
 5279   ins_encode %{
 5280     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5281     __ lha($dst$$Register, Idisp, $mem$$base$$Register);
 5282     __ twi_0($dst$$Register);
 5283     __ isync();
 5284   %}
 5285   ins_pipe(pipe_class_memory);
 5286 %}
 5287 
 5288 // Load Char (16bit unsigned)
 5289 instruct loadUS(iRegIdst dst, memory mem) %{
 5290   match(Set dst (LoadUS mem));
 5291   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5292   ins_cost(MEMORY_REF_COST);
 5293 
 5294   format %{ "LHZ     $dst, $mem" %}
 5295   size(4);
 5296   ins_encode( enc_lhz(dst, mem) );
 5297   ins_pipe(pipe_class_memory);
 5298 %}
 5299 
 5300 // Load Char (16bit unsigned) acquire.
 5301 instruct loadUS_ac(iRegIdst dst, memory mem) %{
 5302   match(Set dst (LoadUS mem));
 5303   ins_cost(3*MEMORY_REF_COST);
 5304 
 5305   format %{ "LHZ     $dst, $mem \t// acquire\n\t"
 5306             "TWI     $dst\n\t"
 5307             "ISYNC" %}
 5308   size(12);
 5309   ins_encode( enc_lhz_ac(dst, mem) );
 5310   ins_pipe(pipe_class_memory);
 5311 %}
 5312 
 5313 // Load Unsigned Short/Char (16bit UNsigned) into a Long Register.
 5314 instruct loadUS2L(iRegLdst dst, memory mem) %{
 5315   match(Set dst (ConvI2L (LoadUS mem)));
 5316   predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf));
 5317   ins_cost(MEMORY_REF_COST);
 5318 
 5319   format %{ "LHZ     $dst, $mem \t// short, zero-extend to long" %}
 5320   size(4);
 5321   ins_encode( enc_lhz(dst, mem) );
 5322   ins_pipe(pipe_class_memory);
 5323 %}
 5324 
 5325 // Load Unsigned Short/Char (16bit UNsigned) into a Long Register acquire.
 5326 instruct loadUS2L_ac(iRegLdst dst, memory mem) %{
 5327   match(Set dst (ConvI2L (LoadUS mem)));
 5328   ins_cost(3*MEMORY_REF_COST);
 5329 
 5330   format %{ "LHZ     $dst, $mem \t// short, zero-extend to long, acquire\n\t"
 5331             "TWI     $dst\n\t"
 5332             "ISYNC" %}
 5333   size(12);
 5334   ins_encode( enc_lhz_ac(dst, mem) );
 5335   ins_pipe(pipe_class_memory);
 5336 %}
 5337 
 5338 // Load Integer.
 5339 instruct loadI(iRegIdst dst, memory mem) %{
 5340   match(Set dst (LoadI mem));
 5341   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5342   ins_cost(MEMORY_REF_COST);
 5343 
 5344   format %{ "LWZ     $dst, $mem" %}
 5345   size(4);
 5346   ins_encode( enc_lwz(dst, mem) );
 5347   ins_pipe(pipe_class_memory);
 5348 %}
 5349 
 5350 // Load Integer acquire.
 5351 instruct loadI_ac(iRegIdst dst, memory mem) %{
 5352   match(Set dst (LoadI mem));
 5353   ins_cost(3*MEMORY_REF_COST);
 5354 
 5355   format %{ "LWZ     $dst, $mem \t// load acquire\n\t"
 5356             "TWI     $dst\n\t"
 5357             "ISYNC" %}
 5358   size(12);
 5359   ins_encode( enc_lwz_ac(dst, mem) );
 5360   ins_pipe(pipe_class_memory);
 5361 %}
 5362 
 5363 // Match loading integer and casting it to unsigned int in
 5364 // long register.
 5365 // LoadI + ConvI2L + AndL 0xffffffff.
 5366 instruct loadUI2L(iRegLdst dst, memory mem, immL_32bits mask) %{
 5367   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5368   predicate(_kids[0]->_kids[0]->_leaf->as_Load()->is_unordered());
 5369   ins_cost(MEMORY_REF_COST);
 5370 
 5371   format %{ "LWZ     $dst, $mem \t// zero-extend to long" %}
 5372   size(4);
 5373   ins_encode( enc_lwz(dst, mem) );
 5374   ins_pipe(pipe_class_memory);
 5375 %}
 5376 
 5377 // Match loading integer and casting it to long.
 5378 instruct loadI2L(iRegLdst dst, memoryAlg4 mem) %{
 5379   match(Set dst (ConvI2L (LoadI mem)));
 5380   predicate(_kids[0]->_leaf->as_Load()->is_unordered());
 5381   ins_cost(MEMORY_REF_COST);
 5382 
 5383   format %{ "LWA     $dst, $mem \t// loadI2L" %}
 5384   size(4);
 5385   ins_encode %{
 5386     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5387     __ lwa($dst$$Register, Idisp, $mem$$base$$Register);
 5388   %}
 5389   ins_pipe(pipe_class_memory);
 5390 %}
 5391 
 5392 // Match loading integer and casting it to long - acquire.
 5393 instruct loadI2L_ac(iRegLdst dst, memoryAlg4 mem) %{
 5394   match(Set dst (ConvI2L (LoadI mem)));
 5395   ins_cost(3*MEMORY_REF_COST);
 5396 
 5397   format %{ "LWA     $dst, $mem \t// loadI2L acquire"
 5398             "TWI     $dst\n\t"
 5399             "ISYNC" %}
 5400   size(12);
 5401   ins_encode %{
 5402     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5403     __ lwa($dst$$Register, Idisp, $mem$$base$$Register);
 5404     __ twi_0($dst$$Register);
 5405     __ isync();
 5406   %}
 5407   ins_pipe(pipe_class_memory);
 5408 %}
 5409 
 5410 // Load Long - aligned
 5411 instruct loadL(iRegLdst dst, memoryAlg4 mem) %{
 5412   match(Set dst (LoadL mem));
 5413   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5414   ins_cost(MEMORY_REF_COST);
 5415 
 5416   format %{ "LD      $dst, $mem \t// long" %}
 5417   size(4);
 5418   ins_encode( enc_ld(dst, mem) );
 5419   ins_pipe(pipe_class_memory);
 5420 %}
 5421 
 5422 // Load Long - aligned acquire.
 5423 instruct loadL_ac(iRegLdst dst, memoryAlg4 mem) %{
 5424   match(Set dst (LoadL mem));
 5425   ins_cost(3*MEMORY_REF_COST);
 5426 
 5427   format %{ "LD      $dst, $mem \t// long acquire\n\t"
 5428             "TWI     $dst\n\t"
 5429             "ISYNC" %}
 5430   size(12);
 5431   ins_encode( enc_ld_ac(dst, mem) );
 5432   ins_pipe(pipe_class_memory);
 5433 %}
 5434 
 5435 // Load Long - UNaligned
 5436 instruct loadL_unaligned(iRegLdst dst, memoryAlg4 mem) %{
 5437   match(Set dst (LoadL_unaligned mem));
 5438   // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense).
 5439   ins_cost(MEMORY_REF_COST);
 5440 
 5441   format %{ "LD      $dst, $mem \t// unaligned long" %}
 5442   size(4);
 5443   ins_encode( enc_ld(dst, mem) );
 5444   ins_pipe(pipe_class_memory);
 5445 %}
 5446 
 5447 // Load nodes for superwords
 5448 
 5449 // Load Aligned Packed Byte
 5450 instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{
 5451   predicate(n->as_LoadVector()->memory_size() == 8);
 5452   match(Set dst (LoadVector mem));
 5453   ins_cost(MEMORY_REF_COST);
 5454 
 5455   format %{ "LD      $dst, $mem \t// load 8-byte Vector" %}
 5456   size(4);
 5457   ins_encode( enc_ld(dst, mem) );
 5458   ins_pipe(pipe_class_memory);
 5459 %}
 5460 
 5461 // Load Aligned Packed Byte
 5462 instruct loadV16(vecX dst, indirect mem) %{
 5463   predicate(n->as_LoadVector()->memory_size() == 16);
 5464   match(Set dst (LoadVector mem));
 5465   ins_cost(MEMORY_REF_COST);
 5466 
 5467   format %{ "LXVD2X      $dst, $mem \t// load 16-byte Vector" %}
 5468   size(4);
 5469   ins_encode %{
 5470     __ lxvd2x($dst$$VectorSRegister, $mem$$Register);
 5471   %}
 5472   ins_pipe(pipe_class_default);
 5473 %}
 5474 
 5475 // Load Range, range = array length (=jint)
 5476 instruct loadRange(iRegIdst dst, memory mem) %{
 5477   match(Set dst (LoadRange mem));
 5478   ins_cost(MEMORY_REF_COST);
 5479 
 5480   format %{ "LWZ     $dst, $mem \t// range" %}
 5481   size(4);
 5482   ins_encode( enc_lwz(dst, mem) );
 5483   ins_pipe(pipe_class_memory);
 5484 %}
 5485 
 5486 // Load Compressed Pointer
 5487 instruct loadN(iRegNdst dst, memory mem) %{
 5488   match(Set dst (LoadN mem));
 5489   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5490   ins_cost(MEMORY_REF_COST);
 5491 
 5492   format %{ "LWZ     $dst, $mem \t// load compressed ptr" %}
 5493   size(4);
 5494   ins_encode( enc_lwz(dst, mem) );
 5495   ins_pipe(pipe_class_memory);
 5496 %}
 5497 
 5498 // Load Compressed Pointer acquire.
 5499 instruct loadN_ac(iRegNdst dst, memory mem) %{
 5500   match(Set dst (LoadN mem));
 5501   ins_cost(3*MEMORY_REF_COST);
 5502 
 5503   format %{ "LWZ     $dst, $mem \t// load acquire compressed ptr\n\t"
 5504             "TWI     $dst\n\t"
 5505             "ISYNC" %}
 5506   size(12);
 5507   ins_encode( enc_lwz_ac(dst, mem) );
 5508   ins_pipe(pipe_class_memory);
 5509 %}
 5510 
 5511 // Load Compressed Pointer and decode it if narrow_oop_shift == 0.
 5512 instruct loadN2P_unscaled(iRegPdst dst, memory mem) %{
 5513   match(Set dst (DecodeN (LoadN mem)));
 5514   predicate(_kids[0]->_leaf->as_Load()->is_unordered() && CompressedOops::shift() == 0);
 5515   ins_cost(MEMORY_REF_COST);
 5516 
 5517   format %{ "LWZ     $dst, $mem \t// DecodeN (unscaled)" %}
 5518   size(4);
 5519   ins_encode( enc_lwz(dst, mem) );
 5520   ins_pipe(pipe_class_memory);
 5521 %}
 5522 
 5523 instruct loadN2P_klass_unscaled(iRegPdst dst, memory mem) %{
 5524   match(Set dst (DecodeNKlass (LoadNKlass mem)));
 5525   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0 &&
 5526             _kids[0]->_leaf->as_Load()->is_unordered());
 5527   ins_cost(MEMORY_REF_COST);
 5528 
 5529   format %{ "LWZ     $dst, $mem \t// DecodeN (unscaled)" %}
 5530   size(4);
 5531   ins_encode( enc_lwz(dst, mem) );
 5532   ins_pipe(pipe_class_memory);
 5533 %}
 5534 
 5535 // Load Pointer
 5536 instruct loadP(iRegPdst dst, memoryAlg4 mem) %{
 5537   match(Set dst (LoadP mem));
 5538   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5539   ins_cost(MEMORY_REF_COST);
 5540 
 5541   format %{ "LD      $dst, $mem \t// ptr" %}
 5542   size(4);
 5543   ins_encode( enc_ld(dst, mem) );
 5544   ins_pipe(pipe_class_memory);
 5545 %}
 5546 
 5547 // Load Pointer acquire.
 5548 instruct loadP_ac(iRegPdst dst, memoryAlg4 mem) %{
 5549   match(Set dst (LoadP mem));
 5550   ins_cost(3*MEMORY_REF_COST);
 5551 
 5552   format %{ "LD      $dst, $mem \t// ptr acquire\n\t"
 5553             "TWI     $dst\n\t"
 5554             "ISYNC" %}
 5555   size(12);
 5556   ins_encode( enc_ld_ac(dst, mem) );
 5557   ins_pipe(pipe_class_memory);
 5558 %}
 5559 
 5560 // LoadP + CastP2L
 5561 instruct loadP2X(iRegLdst dst, memoryAlg4 mem) %{
 5562   match(Set dst (CastP2X (LoadP mem)));
 5563   predicate(_kids[0]->_leaf->as_Load()->is_unordered());
 5564   ins_cost(MEMORY_REF_COST);
 5565 
 5566   format %{ "LD      $dst, $mem \t// ptr + p2x" %}
 5567   size(4);
 5568   ins_encode( enc_ld(dst, mem) );
 5569   ins_pipe(pipe_class_memory);
 5570 %}
 5571 
 5572 // Load compressed klass pointer.
 5573 instruct loadNKlass(iRegNdst dst, memory mem) %{
 5574   match(Set dst (LoadNKlass mem));
 5575   ins_cost(MEMORY_REF_COST);
 5576 
 5577   format %{ "LWZ     $dst, $mem \t// compressed klass ptr" %}
 5578   size(4);
 5579   ins_encode( enc_lwz(dst, mem) );
 5580   ins_pipe(pipe_class_memory);
 5581 %}
 5582 
 5583 // Load Klass Pointer
 5584 instruct loadKlass(iRegPdst dst, memoryAlg4 mem) %{
 5585   match(Set dst (LoadKlass mem));
 5586   ins_cost(MEMORY_REF_COST);
 5587 
 5588   format %{ "LD      $dst, $mem \t// klass ptr" %}
 5589   size(4);
 5590   ins_encode( enc_ld(dst, mem) );
 5591   ins_pipe(pipe_class_memory);
 5592 %}
 5593 
 5594 // Load Float
 5595 instruct loadF(regF dst, memory mem) %{
 5596   match(Set dst (LoadF mem));
 5597   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5598   ins_cost(MEMORY_REF_COST);
 5599 
 5600   format %{ "LFS     $dst, $mem" %}
 5601   size(4);
 5602   ins_encode %{
 5603     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5604     __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 5605   %}
 5606   ins_pipe(pipe_class_memory);
 5607 %}
 5608 
 5609 // Load Float acquire.
 5610 instruct loadF_ac(regF dst, memory mem, flagsRegCR0 cr0) %{
 5611   match(Set dst (LoadF mem));
 5612   effect(TEMP cr0);
 5613   ins_cost(3*MEMORY_REF_COST);
 5614 
 5615   format %{ "LFS     $dst, $mem \t// acquire\n\t"
 5616             "FCMPU   cr0, $dst, $dst\n\t"
 5617             "BNE     cr0, next\n"
 5618             "next:\n\t"
 5619             "ISYNC" %}
 5620   size(16);
 5621   ins_encode %{
 5622     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5623     Label next;
 5624     __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 5625     __ fcmpu(CCR0, $dst$$FloatRegister, $dst$$FloatRegister);
 5626     __ bne(CCR0, next);
 5627     __ bind(next);
 5628     __ isync();
 5629   %}
 5630   ins_pipe(pipe_class_memory);
 5631 %}
 5632 
 5633 // Load Double - aligned
 5634 instruct loadD(regD dst, memory mem) %{
 5635   match(Set dst (LoadD mem));
 5636   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5637   ins_cost(MEMORY_REF_COST);
 5638 
 5639   format %{ "LFD     $dst, $mem" %}
 5640   size(4);
 5641   ins_encode( enc_lfd(dst, mem) );
 5642   ins_pipe(pipe_class_memory);
 5643 %}
 5644 
 5645 // Load Double - aligned acquire.
 5646 instruct loadD_ac(regD dst, memory mem, flagsRegCR0 cr0) %{
 5647   match(Set dst (LoadD mem));
 5648   effect(TEMP cr0);
 5649   ins_cost(3*MEMORY_REF_COST);
 5650 
 5651   format %{ "LFD     $dst, $mem \t// acquire\n\t"
 5652             "FCMPU   cr0, $dst, $dst\n\t"
 5653             "BNE     cr0, next\n"
 5654             "next:\n\t"
 5655             "ISYNC" %}
 5656   size(16);
 5657   ins_encode %{
 5658     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5659     Label next;
 5660     __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 5661     __ fcmpu(CCR0, $dst$$FloatRegister, $dst$$FloatRegister);
 5662     __ bne(CCR0, next);
 5663     __ bind(next);
 5664     __ isync();
 5665   %}
 5666   ins_pipe(pipe_class_memory);
 5667 %}
 5668 
 5669 // Load Double - UNaligned
 5670 instruct loadD_unaligned(regD dst, memory mem) %{
 5671   match(Set dst (LoadD_unaligned mem));
 5672   // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense).
 5673   ins_cost(MEMORY_REF_COST);
 5674 
 5675   format %{ "LFD     $dst, $mem" %}
 5676   size(4);
 5677   ins_encode( enc_lfd(dst, mem) );
 5678   ins_pipe(pipe_class_memory);
 5679 %}
 5680 
 5681 //----------Constants--------------------------------------------------------
 5682 
 5683 // Load MachConstantTableBase: add hi offset to global toc.
 5684 // TODO: Handle hidden register r29 in bundler!
 5685 instruct loadToc_hi(iRegLdst dst) %{
 5686   effect(DEF dst);
 5687   ins_cost(DEFAULT_COST);
 5688 
 5689   format %{ "ADDIS   $dst, R29, DISP.hi \t// load TOC hi" %}
 5690   size(4);
 5691   ins_encode %{
 5692     __ calculate_address_from_global_toc_hi16only($dst$$Register, __ method_toc());
 5693   %}
 5694   ins_pipe(pipe_class_default);
 5695 %}
 5696 
 5697 // Load MachConstantTableBase: add lo offset to global toc.
 5698 instruct loadToc_lo(iRegLdst dst, iRegLdst src) %{
 5699   effect(DEF dst, USE src);
 5700   ins_cost(DEFAULT_COST);
 5701 
 5702   format %{ "ADDI    $dst, $src, DISP.lo \t// load TOC lo" %}
 5703   size(4);
 5704   ins_encode %{
 5705     __ calculate_address_from_global_toc_lo16only($dst$$Register, __ method_toc());
 5706   %}
 5707   ins_pipe(pipe_class_default);
 5708 %}
 5709 
 5710 // Load 16-bit integer constant 0xssss????
 5711 instruct loadConI16(iRegIdst dst, immI16 src) %{
 5712   match(Set dst src);
 5713 
 5714   format %{ "LI      $dst, $src" %}
 5715   size(4);
 5716   ins_encode %{
 5717     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
 5718   %}
 5719   ins_pipe(pipe_class_default);
 5720 %}
 5721 
 5722 // Load integer constant 0x????0000
 5723 instruct loadConIhi16(iRegIdst dst, immIhi16 src) %{
 5724   match(Set dst src);
 5725   ins_cost(DEFAULT_COST);
 5726 
 5727   format %{ "LIS     $dst, $src.hi" %}
 5728   size(4);
 5729   ins_encode %{
 5730     // Lis sign extends 16-bit src then shifts it 16 bit to the left.
 5731     __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16)));
 5732   %}
 5733   ins_pipe(pipe_class_default);
 5734 %}
 5735 
 5736 // Part 2 of loading 32 bit constant: hi16 is is src1 (properly shifted
 5737 // and sign extended), this adds the low 16 bits.
 5738 instruct loadConI32_lo16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
 5739   // no match-rule, false predicate
 5740   effect(DEF dst, USE src1, USE src2);
 5741   predicate(false);
 5742 
 5743   format %{ "ORI     $dst, $src1.hi, $src2.lo" %}
 5744   size(4);
 5745   ins_encode %{
 5746     __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
 5747   %}
 5748   ins_pipe(pipe_class_default);
 5749 %}
 5750 
 5751 instruct loadConI32(iRegIdst dst, immI32 src) %{
 5752   match(Set dst src);
 5753   // This macro is valid only in Power 10 and up, but adding the following predicate here
 5754   // caused a build error, so we comment it out for now.
 5755   // predicate(PowerArchitecturePPC64 >= 10);
 5756   ins_cost(DEFAULT_COST+1);
 5757 
 5758   format %{ "PLI     $dst, $src" %}
 5759   size(8);
 5760   ins_encode %{
 5761     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 5762     __ pli($dst$$Register, $src$$constant);
 5763   %}
 5764   ins_pipe(pipe_class_default);
 5765   ins_alignment(2);
 5766 %}
 5767 
 5768 instruct loadConI_Ex(iRegIdst dst, immI src) %{
 5769   match(Set dst src);
 5770   ins_cost(DEFAULT_COST*2);
 5771 
 5772   expand %{
 5773     // Would like to use $src$$constant.
 5774     immI16 srcLo %{ _opnds[1]->constant() %}
 5775     // srcHi can be 0000 if srcLo sign-extends to a negative number.
 5776     immIhi16 srcHi %{ _opnds[1]->constant() %}
 5777     iRegIdst tmpI;
 5778     loadConIhi16(tmpI, srcHi);
 5779     loadConI32_lo16(dst, tmpI, srcLo);
 5780   %}
 5781 %}
 5782 
 5783 // No constant pool entries required.
 5784 instruct loadConL16(iRegLdst dst, immL16 src) %{
 5785   match(Set dst src);
 5786 
 5787   format %{ "LI      $dst, $src \t// long" %}
 5788   size(4);
 5789   ins_encode %{
 5790     __ li($dst$$Register, (int)((short) ($src$$constant & 0xFFFF)));
 5791   %}
 5792   ins_pipe(pipe_class_default);
 5793 %}
 5794 
 5795 // Load long constant 0xssssssss????0000
 5796 instruct loadConL32hi16(iRegLdst dst, immL32hi16 src) %{
 5797   match(Set dst src);
 5798   ins_cost(DEFAULT_COST);
 5799 
 5800   format %{ "LIS     $dst, $src.hi \t// long" %}
 5801   size(4);
 5802   ins_encode %{
 5803     __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16)));
 5804   %}
 5805   ins_pipe(pipe_class_default);
 5806 %}
 5807 
 5808 // To load a 32 bit constant: merge lower 16 bits into already loaded
 5809 // high 16 bits.
 5810 instruct loadConL32_lo16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
 5811   // no match-rule, false predicate
 5812   effect(DEF dst, USE src1, USE src2);
 5813   predicate(false);
 5814 
 5815   format %{ "ORI     $dst, $src1, $src2.lo" %}
 5816   size(4);
 5817   ins_encode %{
 5818     __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
 5819   %}
 5820   ins_pipe(pipe_class_default);
 5821 %}
 5822 
 5823 // Load 32-bit long constant
 5824 instruct loadConL32_Ex(iRegLdst dst, immL32 src) %{
 5825   match(Set dst src);
 5826   ins_cost(DEFAULT_COST*2);
 5827 
 5828   expand %{
 5829     // Would like to use $src$$constant.
 5830     immL16     srcLo %{ _opnds[1]->constant() /*& 0x0000FFFFL */%}
 5831     // srcHi can be 0000 if srcLo sign-extends to a negative number.
 5832     immL32hi16 srcHi %{ _opnds[1]->constant() /*& 0xFFFF0000L */%}
 5833     iRegLdst tmpL;
 5834     loadConL32hi16(tmpL, srcHi);
 5835     loadConL32_lo16(dst, tmpL, srcLo);
 5836   %}
 5837 %}
 5838 
 5839 // Load 34-bit long constant using prefixed addi. No constant pool entries required.
 5840 instruct loadConL34(iRegLdst dst, immL34 src) %{
 5841   match(Set dst src);
 5842   // This macro is valid only in Power 10 and up, but adding the following predicate here
 5843   // caused a build error, so we comment it out for now.
 5844   // predicate(PowerArchitecturePPC64 >= 10);
 5845   ins_cost(DEFAULT_COST+1);
 5846 
 5847   format %{ "PLI     $dst, $src \t// long" %}
 5848   size(8);
 5849   ins_encode %{
 5850     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 5851     __ pli($dst$$Register, $src$$constant);
 5852   %}
 5853   ins_pipe(pipe_class_default);
 5854   ins_alignment(2);
 5855 %}
 5856 
 5857 // Load long constant 0x????000000000000.
 5858 instruct loadConLhighest16_Ex(iRegLdst dst, immLhighest16 src) %{
 5859   match(Set dst src);
 5860   ins_cost(DEFAULT_COST);
 5861 
 5862   expand %{
 5863     immL32hi16 srcHi %{ _opnds[1]->constant() >> 32 /*& 0xFFFF0000L */%}
 5864     immI shift32 %{ 32 %}
 5865     iRegLdst tmpL;
 5866     loadConL32hi16(tmpL, srcHi);
 5867     lshiftL_regL_immI(dst, tmpL, shift32);
 5868   %}
 5869 %}
 5870 
 5871 // Expand node for constant pool load: small offset.
 5872 instruct loadConL(iRegLdst dst, immL src, iRegLdst toc) %{
 5873   effect(DEF dst, USE src, USE toc);
 5874   ins_cost(MEMORY_REF_COST);
 5875 
 5876   ins_num_consts(1);
 5877   // Needed so that CallDynamicJavaDirect can compute the address of this
 5878   // instruction for relocation.
 5879   ins_field_cbuf_insts_offset(int);
 5880 
 5881   format %{ "LD      $dst, offset, $toc \t// load long $src from TOC" %}
 5882   size(4);
 5883   ins_encode( enc_load_long_constL(dst, src, toc) );
 5884   ins_pipe(pipe_class_memory);
 5885 %}
 5886 
 5887 // Expand node for constant pool load: large offset.
 5888 instruct loadConL_hi(iRegLdst dst, immL src, iRegLdst toc) %{
 5889   effect(DEF dst, USE src, USE toc);
 5890   predicate(false);
 5891 
 5892   ins_num_consts(1);
 5893   ins_field_const_toc_offset(int);
 5894   // Needed so that CallDynamicJavaDirect can compute the address of this
 5895   // instruction for relocation.
 5896   ins_field_cbuf_insts_offset(int);
 5897 
 5898   format %{ "ADDIS   $dst, $toc, offset \t// load long $src from TOC (hi)" %}
 5899   size(4);
 5900   ins_encode( enc_load_long_constL_hi(dst, toc, src) );
 5901   ins_pipe(pipe_class_default);
 5902 %}
 5903 
 5904 // Expand node for constant pool load: large offset.
 5905 // No constant pool entries required.
 5906 instruct loadConL_lo(iRegLdst dst, immL src, iRegLdst base) %{
 5907   effect(DEF dst, USE src, USE base);
 5908   predicate(false);
 5909 
 5910   ins_field_const_toc_offset_hi_node(loadConL_hiNode*);
 5911 
 5912   format %{ "LD      $dst, offset, $base \t// load long $src from TOC (lo)" %}
 5913   size(4);
 5914   ins_encode %{
 5915     int offset = ra_->C->output()->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset;
 5916     __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register);
 5917   %}
 5918   ins_pipe(pipe_class_memory);
 5919 %}
 5920 
 5921 // Load long constant from constant table. Expand in case of
 5922 // offset > 16 bit is needed.
 5923 // Adlc adds toc node MachConstantTableBase.
 5924 instruct loadConL_Ex(iRegLdst dst, immL src) %{
 5925   match(Set dst src);
 5926   ins_cost(MEMORY_REF_COST);
 5927 
 5928   format %{ "LD      $dst, offset, $constanttablebase\t// load long $src from table, postalloc expanded" %}
 5929   // We can not inline the enc_class for the expand as that does not support constanttablebase.
 5930   postalloc_expand( postalloc_expand_load_long_constant(dst, src, constanttablebase) );
 5931 %}
 5932 
 5933 // Load NULL as compressed oop.
 5934 instruct loadConN0(iRegNdst dst, immN_0 src) %{
 5935   match(Set dst src);
 5936   ins_cost(DEFAULT_COST);
 5937 
 5938   format %{ "LI      $dst, $src \t// compressed ptr" %}
 5939   size(4);
 5940   ins_encode %{
 5941     __ li($dst$$Register, 0);
 5942   %}
 5943   ins_pipe(pipe_class_default);
 5944 %}
 5945 
 5946 // Load hi part of compressed oop constant.
 5947 instruct loadConN_hi(iRegNdst dst, immN src) %{
 5948   effect(DEF dst, USE src);
 5949   ins_cost(DEFAULT_COST);
 5950 
 5951   format %{ "LIS     $dst, $src \t// narrow oop hi" %}
 5952   size(4);
 5953   ins_encode %{
 5954     __ lis($dst$$Register, (int)(short)(($src$$constant >> 16) & 0xffff));
 5955   %}
 5956   ins_pipe(pipe_class_default);
 5957 %}
 5958 
 5959 // Add lo part of compressed oop constant to already loaded hi part.
 5960 instruct loadConN_lo(iRegNdst dst, iRegNsrc src1, immN src2) %{
 5961   effect(DEF dst, USE src1, USE src2);
 5962   ins_cost(DEFAULT_COST);
 5963 
 5964   format %{ "ORI     $dst, $src1, $src2 \t// narrow oop lo" %}
 5965   size(4);
 5966   ins_encode %{
 5967     assert(__ oop_recorder() != NULL, "this assembler needs an OopRecorder");
 5968     int oop_index = __ oop_recorder()->find_index((jobject)$src2$$constant);
 5969     RelocationHolder rspec = oop_Relocation::spec(oop_index);
 5970     __ relocate(rspec, 1);
 5971     __ ori($dst$$Register, $src1$$Register, $src2$$constant & 0xffff);
 5972   %}
 5973   ins_pipe(pipe_class_default);
 5974 %}
 5975 
 5976 instruct rldicl(iRegLdst dst, iRegLsrc src, immI16 shift, immI16 mask_begin) %{
 5977   effect(DEF dst, USE src, USE shift, USE mask_begin);
 5978 
 5979   size(4);
 5980   ins_encode %{
 5981     __ rldicl($dst$$Register, $src$$Register, $shift$$constant, $mask_begin$$constant);
 5982   %}
 5983   ins_pipe(pipe_class_default);
 5984 %}
 5985 
 5986 // Needed to postalloc expand loadConN: ConN is loaded as ConI
 5987 // leaving the upper 32 bits with sign-extension bits.
 5988 // This clears these bits: dst = src & 0xFFFFFFFF.
 5989 // TODO: Eventually call this maskN_regN_FFFFFFFF.
 5990 instruct clearMs32b(iRegNdst dst, iRegNsrc src) %{
 5991   effect(DEF dst, USE src);
 5992   predicate(false);
 5993 
 5994   format %{ "MASK    $dst, $src, 0xFFFFFFFF" %} // mask
 5995   size(4);
 5996   ins_encode %{
 5997     __ clrldi($dst$$Register, $src$$Register, 0x20);
 5998   %}
 5999   ins_pipe(pipe_class_default);
 6000 %}
 6001 
 6002 // Optimize DecodeN for disjoint base.
 6003 // Load base of compressed oops into a register
 6004 instruct loadBase(iRegLdst dst) %{
 6005   effect(DEF dst);
 6006 
 6007   format %{ "LoadConst $dst, heapbase" %}
 6008   ins_encode %{
 6009     __ load_const_optimized($dst$$Register, CompressedOops::base(), R0);
 6010   %}
 6011   ins_pipe(pipe_class_default);
 6012 %}
 6013 
 6014 // Loading ConN must be postalloc expanded so that edges between
 6015 // the nodes are safe. They may not interfere with a safepoint.
 6016 // GL TODO: This needs three instructions: better put this into the constant pool.
 6017 instruct loadConN_Ex(iRegNdst dst, immN src) %{
 6018   match(Set dst src);
 6019   ins_cost(DEFAULT_COST*2);
 6020 
 6021   format %{ "LoadN   $dst, $src \t// postalloc expanded" %} // mask
 6022   postalloc_expand %{
 6023     MachNode *m1 = new loadConN_hiNode();
 6024     MachNode *m2 = new loadConN_loNode();
 6025     MachNode *m3 = new clearMs32bNode();
 6026     m1->add_req(NULL);
 6027     m2->add_req(NULL, m1);
 6028     m3->add_req(NULL, m2);
 6029     m1->_opnds[0] = op_dst;
 6030     m1->_opnds[1] = op_src;
 6031     m2->_opnds[0] = op_dst;
 6032     m2->_opnds[1] = op_dst;
 6033     m2->_opnds[2] = op_src;
 6034     m3->_opnds[0] = op_dst;
 6035     m3->_opnds[1] = op_dst;
 6036     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6037     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6038     ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6039     nodes->push(m1);
 6040     nodes->push(m2);
 6041     nodes->push(m3);
 6042   %}
 6043 %}
 6044 
 6045 // We have seen a safepoint between the hi and lo parts, and this node was handled
 6046 // as an oop. Therefore this needs a match rule so that build_oop_map knows this is
 6047 // not a narrow oop.
 6048 instruct loadConNKlass_hi(iRegNdst dst, immNKlass_NM src) %{
 6049   match(Set dst src);
 6050   effect(DEF dst, USE src);
 6051   ins_cost(DEFAULT_COST);
 6052 
 6053   format %{ "LIS     $dst, $src \t// narrow klass hi" %}
 6054   size(4);
 6055   ins_encode %{
 6056     intptr_t Csrc = CompressedKlassPointers::encode((Klass *)$src$$constant);
 6057     __ lis($dst$$Register, (int)(short)((Csrc >> 16) & 0xffff));
 6058   %}
 6059   ins_pipe(pipe_class_default);
 6060 %}
 6061 
 6062 // As loadConNKlass_hi this must be recognized as narrow klass, not oop!
 6063 instruct loadConNKlass_mask(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{
 6064   match(Set dst src1);
 6065   effect(TEMP src2);
 6066   ins_cost(DEFAULT_COST);
 6067 
 6068   format %{ "MASK    $dst, $src2, 0xFFFFFFFF" %} // mask
 6069   size(4);
 6070   ins_encode %{
 6071     __ clrldi($dst$$Register, $src2$$Register, 0x20);
 6072   %}
 6073   ins_pipe(pipe_class_default);
 6074 %}
 6075 
 6076 // This needs a match rule so that build_oop_map knows this is
 6077 // not a narrow oop.
 6078 instruct loadConNKlass_lo(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{
 6079   match(Set dst src1);
 6080   effect(TEMP src2);
 6081   ins_cost(DEFAULT_COST);
 6082 
 6083   format %{ "ORI     $dst, $src1, $src2 \t// narrow klass lo" %}
 6084   size(4);
 6085   ins_encode %{
 6086     intptr_t Csrc = CompressedKlassPointers::encode((Klass *)$src1$$constant);
 6087     assert(__ oop_recorder() != NULL, "this assembler needs an OopRecorder");
 6088     int klass_index = __ oop_recorder()->find_index((Klass *)$src1$$constant);
 6089     RelocationHolder rspec = metadata_Relocation::spec(klass_index);
 6090 
 6091     __ relocate(rspec, 1);
 6092     __ ori($dst$$Register, $src2$$Register, Csrc & 0xffff);
 6093   %}
 6094   ins_pipe(pipe_class_default);
 6095 %}
 6096 
 6097 // Loading ConNKlass must be postalloc expanded so that edges between
 6098 // the nodes are safe. They may not interfere with a safepoint.
 6099 instruct loadConNKlass_Ex(iRegNdst dst, immNKlass src) %{
 6100   match(Set dst src);
 6101   ins_cost(DEFAULT_COST*2);
 6102 
 6103   format %{ "LoadN   $dst, $src \t// postalloc expanded" %} // mask
 6104   postalloc_expand %{
 6105     // Load high bits into register. Sign extended.
 6106     MachNode *m1 = new loadConNKlass_hiNode();
 6107     m1->add_req(NULL);
 6108     m1->_opnds[0] = op_dst;
 6109     m1->_opnds[1] = op_src;
 6110     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6111     nodes->push(m1);
 6112 
 6113     MachNode *m2 = m1;
 6114     if (!Assembler::is_uimm((jlong)CompressedKlassPointers::encode((Klass *)op_src->constant()), 31)) {
 6115       // Value might be 1-extended. Mask out these bits.
 6116       m2 = new loadConNKlass_maskNode();
 6117       m2->add_req(NULL, m1);
 6118       m2->_opnds[0] = op_dst;
 6119       m2->_opnds[1] = op_src;
 6120       m2->_opnds[2] = op_dst;
 6121       ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6122       nodes->push(m2);
 6123     }
 6124 
 6125     MachNode *m3 = new loadConNKlass_loNode();
 6126     m3->add_req(NULL, m2);
 6127     m3->_opnds[0] = op_dst;
 6128     m3->_opnds[1] = op_src;
 6129     m3->_opnds[2] = op_dst;
 6130     ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6131     nodes->push(m3);
 6132   %}
 6133 %}
 6134 
 6135 // 0x1 is used in object initialization (initial object header).
 6136 // No constant pool entries required.
 6137 instruct loadConP0or1(iRegPdst dst, immP_0or1 src) %{
 6138   match(Set dst src);
 6139 
 6140   format %{ "LI      $dst, $src \t// ptr" %}
 6141   size(4);
 6142   ins_encode %{
 6143     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
 6144   %}
 6145   ins_pipe(pipe_class_default);
 6146 %}
 6147 
 6148 // Expand node for constant pool load: small offset.
 6149 // The match rule is needed to generate the correct bottom_type(),
 6150 // however this node should never match. The use of predicate is not
 6151 // possible since ADLC forbids predicates for chain rules. The higher
 6152 // costs do not prevent matching in this case. For that reason the
 6153 // operand immP_NM with predicate(false) is used.
 6154 instruct loadConP(iRegPdst dst, immP_NM src, iRegLdst toc) %{
 6155   match(Set dst src);
 6156   effect(TEMP toc);
 6157 
 6158   ins_num_consts(1);
 6159 
 6160   format %{ "LD      $dst, offset, $toc \t// load ptr $src from TOC" %}
 6161   size(4);
 6162   ins_encode( enc_load_long_constP(dst, src, toc) );
 6163   ins_pipe(pipe_class_memory);
 6164 %}
 6165 
 6166 // Expand node for constant pool load: large offset.
 6167 instruct loadConP_hi(iRegPdst dst, immP_NM src, iRegLdst toc) %{
 6168   effect(DEF dst, USE src, USE toc);
 6169   predicate(false);
 6170 
 6171   ins_num_consts(1);
 6172   ins_field_const_toc_offset(int);
 6173 
 6174   format %{ "ADDIS   $dst, $toc, offset \t// load ptr $src from TOC (hi)" %}
 6175   size(4);
 6176   ins_encode( enc_load_long_constP_hi(dst, src, toc) );
 6177   ins_pipe(pipe_class_default);
 6178 %}
 6179 
 6180 // Expand node for constant pool load: large offset.
 6181 instruct loadConP_lo(iRegPdst dst, immP_NM src, iRegLdst base) %{
 6182   match(Set dst src);
 6183   effect(TEMP base);
 6184 
 6185   ins_field_const_toc_offset_hi_node(loadConP_hiNode*);
 6186 
 6187   format %{ "LD      $dst, offset, $base \t// load ptr $src from TOC (lo)" %}
 6188   size(4);
 6189   ins_encode %{
 6190     int offset = ra_->C->output()->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset;
 6191     __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register);
 6192   %}
 6193   ins_pipe(pipe_class_memory);
 6194 %}
 6195 
 6196 // Load pointer constant from constant table. Expand in case an
 6197 // offset > 16 bit is needed.
 6198 // Adlc adds toc node MachConstantTableBase.
 6199 instruct loadConP_Ex(iRegPdst dst, immP src) %{
 6200   match(Set dst src);
 6201   ins_cost(MEMORY_REF_COST);
 6202 
 6203   // This rule does not use "expand" because then
 6204   // the result type is not known to be an Oop.  An ADLC
 6205   // enhancement will be needed to make that work - not worth it!
 6206 
 6207   // If this instruction rematerializes, it prolongs the live range
 6208   // of the toc node, causing illegal graphs.
 6209   // assert(edge_from_to(_reg_node[reg_lo],def)) fails in verify_good_schedule().
 6210   ins_cannot_rematerialize(true);
 6211 
 6212   format %{ "LD    $dst, offset, $constanttablebase \t//  load ptr $src from table, postalloc expanded" %}
 6213   postalloc_expand( postalloc_expand_load_ptr_constant(dst, src, constanttablebase) );
 6214 %}
 6215 
 6216 // Expand node for constant pool load: small offset.
 6217 instruct loadConF(regF dst, immF src, iRegLdst toc) %{
 6218   effect(DEF dst, USE src, USE toc);
 6219   ins_cost(MEMORY_REF_COST);
 6220 
 6221   ins_num_consts(1);
 6222 
 6223   format %{ "LFS     $dst, offset, $toc \t// load float $src from TOC" %}
 6224   size(4);
 6225   ins_encode %{
 6226     address float_address = __ float_constant($src$$constant);
 6227     if (float_address == NULL) {
 6228       ciEnv::current()->record_out_of_memory_failure();
 6229       return;
 6230     }
 6231     __ lfs($dst$$FloatRegister, __ offset_to_method_toc(float_address), $toc$$Register);
 6232   %}
 6233   ins_pipe(pipe_class_memory);
 6234 %}
 6235 
 6236 // Expand node for constant pool load: large offset.
 6237 instruct loadConFComp(regF dst, immF src, iRegLdst toc) %{
 6238   effect(DEF dst, USE src, USE toc);
 6239   ins_cost(MEMORY_REF_COST);
 6240 
 6241   ins_num_consts(1);
 6242 
 6243   format %{ "ADDIS   $toc, $toc, offset_hi\n\t"
 6244             "LFS     $dst, offset_lo, $toc \t// load float $src from TOC (hi/lo)\n\t"
 6245             "ADDIS   $toc, $toc, -offset_hi"%}
 6246   size(12);
 6247   ins_encode %{
 6248     FloatRegister Rdst    = $dst$$FloatRegister;
 6249     Register Rtoc         = $toc$$Register;
 6250     address float_address = __ float_constant($src$$constant);
 6251     if (float_address == NULL) {
 6252       ciEnv::current()->record_out_of_memory_failure();
 6253       return;
 6254     }
 6255     int offset            = __ offset_to_method_toc(float_address);
 6256     int hi = (offset + (1<<15))>>16;
 6257     int lo = offset - hi * (1<<16);
 6258 
 6259     __ addis(Rtoc, Rtoc, hi);
 6260     __ lfs(Rdst, lo, Rtoc);
 6261     __ addis(Rtoc, Rtoc, -hi);
 6262   %}
 6263   ins_pipe(pipe_class_memory);
 6264 %}
 6265 
 6266 // Adlc adds toc node MachConstantTableBase.
 6267 instruct loadConF_Ex(regF dst, immF src) %{
 6268   match(Set dst src);
 6269   ins_cost(MEMORY_REF_COST);
 6270 
 6271   // See loadConP.
 6272   ins_cannot_rematerialize(true);
 6273 
 6274   format %{ "LFS     $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %}
 6275   postalloc_expand( postalloc_expand_load_float_constant(dst, src, constanttablebase) );
 6276 %}
 6277 
 6278 // Expand node for constant pool load: small offset.
 6279 instruct loadConD(regD dst, immD src, iRegLdst toc) %{
 6280   effect(DEF dst, USE src, USE toc);
 6281   ins_cost(MEMORY_REF_COST);
 6282 
 6283   ins_num_consts(1);
 6284 
 6285   format %{ "LFD     $dst, offset, $toc \t// load double $src from TOC" %}
 6286   size(4);
 6287   ins_encode %{
 6288     address float_address = __ double_constant($src$$constant);
 6289     if (float_address == NULL) {
 6290       ciEnv::current()->record_out_of_memory_failure();
 6291       return;
 6292     }
 6293     int offset =  __ offset_to_method_toc(float_address);
 6294     __ lfd($dst$$FloatRegister, offset, $toc$$Register);
 6295   %}
 6296   ins_pipe(pipe_class_memory);
 6297 %}
 6298 
 6299 // Expand node for constant pool load: large offset.
 6300 instruct loadConDComp(regD dst, immD src, iRegLdst toc) %{
 6301   effect(DEF dst, USE src, USE toc);
 6302   ins_cost(MEMORY_REF_COST);
 6303 
 6304   ins_num_consts(1);
 6305 
 6306   format %{ "ADDIS   $toc, $toc, offset_hi\n\t"
 6307             "LFD     $dst, offset_lo, $toc \t// load double $src from TOC (hi/lo)\n\t"
 6308             "ADDIS   $toc, $toc, -offset_hi" %}
 6309   size(12);
 6310   ins_encode %{
 6311     FloatRegister Rdst    = $dst$$FloatRegister;
 6312     Register      Rtoc    = $toc$$Register;
 6313     address float_address = __ double_constant($src$$constant);
 6314     if (float_address == NULL) {
 6315       ciEnv::current()->record_out_of_memory_failure();
 6316       return;
 6317     }
 6318     int offset = __ offset_to_method_toc(float_address);
 6319     int hi = (offset + (1<<15))>>16;
 6320     int lo = offset - hi * (1<<16);
 6321 
 6322     __ addis(Rtoc, Rtoc, hi);
 6323     __ lfd(Rdst, lo, Rtoc);
 6324     __ addis(Rtoc, Rtoc, -hi);
 6325   %}
 6326   ins_pipe(pipe_class_memory);
 6327 %}
 6328 
 6329 // Adlc adds toc node MachConstantTableBase.
 6330 instruct loadConD_Ex(regD dst, immD src) %{
 6331   match(Set dst src);
 6332   ins_cost(MEMORY_REF_COST);
 6333 
 6334   // See loadConP.
 6335   ins_cannot_rematerialize(true);
 6336 
 6337   format %{ "ConD    $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %}
 6338   postalloc_expand( postalloc_expand_load_double_constant(dst, src, constanttablebase) );
 6339 %}
 6340 
 6341 // Prefetch instructions.
 6342 // Must be safe to execute with invalid address (cannot fault).
 6343 
 6344 // Special prefetch versions which use the dcbz instruction.
 6345 instruct prefetch_alloc_zero(indirectMemory mem, iRegLsrc src) %{
 6346   match(PrefetchAllocation (AddP mem src));
 6347   predicate(AllocatePrefetchStyle == 3);
 6348   ins_cost(MEMORY_REF_COST);
 6349 
 6350   format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many with zero" %}
 6351   size(4);
 6352   ins_encode %{
 6353     __ dcbz($src$$Register, $mem$$base$$Register);
 6354   %}
 6355   ins_pipe(pipe_class_memory);
 6356 %}
 6357 
 6358 instruct prefetch_alloc_zero_no_offset(indirectMemory mem) %{
 6359   match(PrefetchAllocation mem);
 6360   predicate(AllocatePrefetchStyle == 3);
 6361   ins_cost(MEMORY_REF_COST);
 6362 
 6363   format %{ "PREFETCH $mem, 2 \t// Prefetch write-many with zero" %}
 6364   size(4);
 6365   ins_encode %{
 6366     __ dcbz($mem$$base$$Register);
 6367   %}
 6368   ins_pipe(pipe_class_memory);
 6369 %}
 6370 
 6371 instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{
 6372   match(PrefetchAllocation (AddP mem src));
 6373   predicate(AllocatePrefetchStyle != 3);
 6374   ins_cost(MEMORY_REF_COST);
 6375 
 6376   format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many" %}
 6377   size(4);
 6378   ins_encode %{
 6379     __ dcbtst($src$$Register, $mem$$base$$Register);
 6380   %}
 6381   ins_pipe(pipe_class_memory);
 6382 %}
 6383 
 6384 instruct prefetch_alloc_no_offset(indirectMemory mem) %{
 6385   match(PrefetchAllocation mem);
 6386   predicate(AllocatePrefetchStyle != 3);
 6387   ins_cost(MEMORY_REF_COST);
 6388 
 6389   format %{ "PREFETCH $mem, 2 \t// Prefetch write-many" %}
 6390   size(4);
 6391   ins_encode %{
 6392     __ dcbtst($mem$$base$$Register);
 6393   %}
 6394   ins_pipe(pipe_class_memory);
 6395 %}
 6396 
 6397 //----------Store Instructions-------------------------------------------------
 6398 
 6399 // Store Byte
 6400 instruct storeB(memory mem, iRegIsrc src) %{
 6401   match(Set mem (StoreB mem src));
 6402   ins_cost(MEMORY_REF_COST);
 6403 
 6404   format %{ "STB     $src, $mem \t// byte" %}
 6405   size(4);
 6406   ins_encode %{
 6407     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 6408     __ stb($src$$Register, Idisp, $mem$$base$$Register);
 6409   %}
 6410   ins_pipe(pipe_class_memory);
 6411 %}
 6412 
 6413 // Store Char/Short
 6414 instruct storeC(memory mem, iRegIsrc src) %{
 6415   match(Set mem (StoreC mem src));
 6416   ins_cost(MEMORY_REF_COST);
 6417 
 6418   format %{ "STH     $src, $mem \t// short" %}
 6419   size(4);
 6420   ins_encode %{
 6421     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 6422     __ sth($src$$Register, Idisp, $mem$$base$$Register);
 6423   %}
 6424   ins_pipe(pipe_class_memory);
 6425 %}
 6426 
 6427 // Store Integer
 6428 instruct storeI(memory mem, iRegIsrc src) %{
 6429   match(Set mem (StoreI mem src));
 6430   ins_cost(MEMORY_REF_COST);
 6431 
 6432   format %{ "STW     $src, $mem" %}
 6433   size(4);
 6434   ins_encode( enc_stw(src, mem) );
 6435   ins_pipe(pipe_class_memory);
 6436 %}
 6437 
 6438 // ConvL2I + StoreI.
 6439 instruct storeI_convL2I(memory mem, iRegLsrc src) %{
 6440   match(Set mem (StoreI mem (ConvL2I src)));
 6441   ins_cost(MEMORY_REF_COST);
 6442 
 6443   format %{ "STW     l2i($src), $mem" %}
 6444   size(4);
 6445   ins_encode( enc_stw(src, mem) );
 6446   ins_pipe(pipe_class_memory);
 6447 %}
 6448 
 6449 // Store Long
 6450 instruct storeL(memoryAlg4 mem, iRegLsrc src) %{
 6451   match(Set mem (StoreL mem src));
 6452   ins_cost(MEMORY_REF_COST);
 6453 
 6454   format %{ "STD     $src, $mem \t// long" %}
 6455   size(4);
 6456   ins_encode( enc_std(src, mem) );
 6457   ins_pipe(pipe_class_memory);
 6458 %}
 6459 
 6460 // Store super word nodes.
 6461 
 6462 // Store Aligned Packed Byte long register to memory
 6463 instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{
 6464   predicate(n->as_StoreVector()->memory_size() == 8);
 6465   match(Set mem (StoreVector mem src));
 6466   ins_cost(MEMORY_REF_COST);
 6467 
 6468   format %{ "STD     $mem, $src \t// packed8B" %}
 6469   size(4);
 6470   ins_encode( enc_std(src, mem) );
 6471   ins_pipe(pipe_class_memory);
 6472 %}
 6473 
 6474 // Store Packed Byte long register to memory
 6475 instruct storeV16(indirect mem, vecX src) %{
 6476   predicate(n->as_StoreVector()->memory_size() == 16);
 6477   match(Set mem (StoreVector mem src));
 6478   ins_cost(MEMORY_REF_COST);
 6479 
 6480   format %{ "STXVD2X     $mem, $src \t// store 16-byte Vector" %}
 6481   size(4);
 6482   ins_encode %{
 6483     __ stxvd2x($src$$VectorSRegister, $mem$$Register);
 6484   %}
 6485   ins_pipe(pipe_class_default);
 6486 %}
 6487 
 6488 // Reinterpret: only one vector size used: either L or X
 6489 instruct reinterpretL(iRegLdst dst) %{
 6490   match(Set dst (VectorReinterpret dst));
 6491   ins_cost(0);
 6492   format %{ "reinterpret $dst" %}
 6493   ins_encode( /*empty*/ );
 6494   ins_pipe(pipe_class_empty);
 6495 %}
 6496 
 6497 instruct reinterpretX(vecX dst) %{
 6498   match(Set dst (VectorReinterpret dst));
 6499   ins_cost(0);
 6500   format %{ "reinterpret $dst" %}
 6501   ins_encode( /*empty*/ );
 6502   ins_pipe(pipe_class_empty);
 6503 %}
 6504 
 6505 // Store Compressed Oop
 6506 instruct storeN(memory dst, iRegN_P2N src) %{
 6507   match(Set dst (StoreN dst src));
 6508   ins_cost(MEMORY_REF_COST);
 6509 
 6510   format %{ "STW     $src, $dst \t// compressed oop" %}
 6511   size(4);
 6512   ins_encode( enc_stw(src, dst) );
 6513   ins_pipe(pipe_class_memory);
 6514 %}
 6515 
 6516 // Store Compressed KLass
 6517 instruct storeNKlass(memory dst, iRegN_P2N src) %{
 6518   match(Set dst (StoreNKlass dst src));
 6519   ins_cost(MEMORY_REF_COST);
 6520 
 6521   format %{ "STW     $src, $dst \t// compressed klass" %}
 6522   size(4);
 6523   ins_encode( enc_stw(src, dst) );
 6524   ins_pipe(pipe_class_memory);
 6525 %}
 6526 
 6527 // Store Pointer
 6528 instruct storeP(memoryAlg4 dst, iRegPsrc src) %{
 6529   match(Set dst (StoreP dst src));
 6530   ins_cost(MEMORY_REF_COST);
 6531 
 6532   format %{ "STD     $src, $dst \t// ptr" %}
 6533   size(4);
 6534   ins_encode( enc_std(src, dst) );
 6535   ins_pipe(pipe_class_memory);
 6536 %}
 6537 
 6538 // Store Float
 6539 instruct storeF(memory mem, regF src) %{
 6540   match(Set mem (StoreF mem src));
 6541   ins_cost(MEMORY_REF_COST);
 6542 
 6543   format %{ "STFS    $src, $mem" %}
 6544   size(4);
 6545   ins_encode( enc_stfs(src, mem) );
 6546   ins_pipe(pipe_class_memory);
 6547 %}
 6548 
 6549 // Store Double
 6550 instruct storeD(memory mem, regD src) %{
 6551   match(Set mem (StoreD mem src));
 6552   ins_cost(MEMORY_REF_COST);
 6553 
 6554   format %{ "STFD    $src, $mem" %}
 6555   size(4);
 6556   ins_encode( enc_stfd(src, mem) );
 6557   ins_pipe(pipe_class_memory);
 6558 %}
 6559 
 6560 //----------Store Instructions With Zeros--------------------------------------
 6561 
 6562 instruct storeCM(memory mem, immI_0 zero) %{
 6563   match(Set mem (StoreCM mem zero));
 6564   ins_cost(MEMORY_REF_COST);
 6565 
 6566   format %{ "STB     #0, $mem \t// CMS card-mark byte store" %}
 6567   size(8);
 6568   ins_encode %{
 6569     __ li(R0, 0);
 6570     // No release barrier: Oops are allowed to get visible after marking.
 6571     guarantee($mem$$base$$Register != R1_SP, "use frame_slots_bias");
 6572     __ stb(R0, $mem$$disp, $mem$$base$$Register);
 6573   %}
 6574   ins_pipe(pipe_class_memory);
 6575 %}
 6576 
 6577 // Convert oop pointer into compressed form.
 6578 
 6579 // Nodes for postalloc expand.
 6580 
 6581 // Shift node for expand.
 6582 instruct encodeP_shift(iRegNdst dst, iRegNsrc src) %{
 6583   // The match rule is needed to make it a 'MachTypeNode'!
 6584   match(Set dst (EncodeP src));
 6585   predicate(false);
 6586 
 6587   format %{ "SRDI    $dst, $src, 3 \t// encode" %}
 6588   size(4);
 6589   ins_encode %{
 6590     __ srdi($dst$$Register, $src$$Register, CompressedOops::shift() & 0x3f);
 6591   %}
 6592   ins_pipe(pipe_class_default);
 6593 %}
 6594 
 6595 // Add node for expand.
 6596 instruct encodeP_sub(iRegPdst dst, iRegPdst src) %{
 6597   // The match rule is needed to make it a 'MachTypeNode'!
 6598   match(Set dst (EncodeP src));
 6599   predicate(false);
 6600 
 6601   format %{ "SUB     $dst, $src, oop_base \t// encode" %}
 6602   ins_encode %{
 6603     __ sub_const_optimized($dst$$Register, $src$$Register, CompressedOops::base(), R0);
 6604   %}
 6605   ins_pipe(pipe_class_default);
 6606 %}
 6607 
 6608 // Conditional sub base.
 6609 instruct cond_sub_base(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
 6610   // The match rule is needed to make it a 'MachTypeNode'!
 6611   match(Set dst (EncodeP (Binary crx src1)));
 6612   predicate(false);
 6613 
 6614   format %{ "BEQ     $crx, done\n\t"
 6615             "SUB     $dst, $src1, heapbase \t// encode: subtract base if != NULL\n"
 6616             "done:" %}
 6617   ins_encode %{
 6618     Label done;
 6619     __ beq($crx$$CondRegister, done);
 6620     __ sub_const_optimized($dst$$Register, $src1$$Register, CompressedOops::base(), R0);
 6621     __ bind(done);
 6622   %}
 6623   ins_pipe(pipe_class_default);
 6624 %}
 6625 
 6626 // Power 7 can use isel instruction
 6627 instruct cond_set_0_oop(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
 6628   // The match rule is needed to make it a 'MachTypeNode'!
 6629   match(Set dst (EncodeP (Binary crx src1)));
 6630   predicate(false);
 6631 
 6632   format %{ "CMOVE   $dst, $crx eq, 0, $src1 \t// encode: preserve 0" %}
 6633   size(4);
 6634   ins_encode %{
 6635     // This is a Power7 instruction for which no machine description exists.
 6636     __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
 6637   %}
 6638   ins_pipe(pipe_class_default);
 6639 %}
 6640 
 6641 // Disjoint narrow oop base.
 6642 instruct encodeP_Disjoint(iRegNdst dst, iRegPsrc src) %{
 6643   match(Set dst (EncodeP src));
 6644   predicate(CompressedOops::base_disjoint());
 6645 
 6646   format %{ "EXTRDI  $dst, $src, #32, #3 \t// encode with disjoint base" %}
 6647   size(4);
 6648   ins_encode %{
 6649     __ rldicl($dst$$Register, $src$$Register, 64-CompressedOops::shift(), 32);
 6650   %}
 6651   ins_pipe(pipe_class_default);
 6652 %}
 6653 
 6654 // shift != 0, base != 0
 6655 instruct encodeP_Ex(iRegNdst dst, flagsReg crx, iRegPsrc src) %{
 6656   match(Set dst (EncodeP src));
 6657   effect(TEMP crx);
 6658   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull &&
 6659             CompressedOops::shift() != 0 &&
 6660             CompressedOops::base_overlaps());
 6661 
 6662   format %{ "EncodeP $dst, $crx, $src \t// postalloc expanded" %}
 6663   postalloc_expand( postalloc_expand_encode_oop(dst, src, crx));
 6664 %}
 6665 
 6666 // shift != 0, base != 0
 6667 instruct encodeP_not_null_Ex(iRegNdst dst, iRegPsrc src) %{
 6668   match(Set dst (EncodeP src));
 6669   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull &&
 6670             CompressedOops::shift() != 0 &&
 6671             CompressedOops::base_overlaps());
 6672 
 6673   format %{ "EncodeP $dst, $src\t// $src != Null, postalloc expanded" %}
 6674   postalloc_expand( postalloc_expand_encode_oop_not_null(dst, src) );
 6675 %}
 6676 
 6677 // shift != 0, base == 0
 6678 // TODO: This is the same as encodeP_shift. Merge!
 6679 instruct encodeP_not_null_base_null(iRegNdst dst, iRegPsrc src) %{
 6680   match(Set dst (EncodeP src));
 6681   predicate(CompressedOops::shift() != 0 &&
 6682             CompressedOops::base() ==0);
 6683 
 6684   format %{ "SRDI    $dst, $src, #3 \t// encodeP, $src != NULL" %}
 6685   size(4);
 6686   ins_encode %{
 6687     __ srdi($dst$$Register, $src$$Register, CompressedOops::shift() & 0x3f);
 6688   %}
 6689   ins_pipe(pipe_class_default);
 6690 %}
 6691 
 6692 // Compressed OOPs with narrow_oop_shift == 0.
 6693 // shift == 0, base == 0
 6694 instruct encodeP_narrow_oop_shift_0(iRegNdst dst, iRegPsrc src) %{
 6695   match(Set dst (EncodeP src));
 6696   predicate(CompressedOops::shift() == 0);
 6697 
 6698   format %{ "MR      $dst, $src \t// Ptr->Narrow" %}
 6699   // variable size, 0 or 4.
 6700   ins_encode %{
 6701     __ mr_if_needed($dst$$Register, $src$$Register);
 6702   %}
 6703   ins_pipe(pipe_class_default);
 6704 %}
 6705 
 6706 // Decode nodes.
 6707 
 6708 // Shift node for expand.
 6709 instruct decodeN_shift(iRegPdst dst, iRegPsrc src) %{
 6710   // The match rule is needed to make it a 'MachTypeNode'!
 6711   match(Set dst (DecodeN src));
 6712   predicate(false);
 6713 
 6714   format %{ "SLDI    $dst, $src, #3 \t// DecodeN" %}
 6715   size(4);
 6716   ins_encode %{
 6717     __ sldi($dst$$Register, $src$$Register, CompressedOops::shift());
 6718   %}
 6719   ins_pipe(pipe_class_default);
 6720 %}
 6721 
 6722 // Add node for expand.
 6723 instruct decodeN_add(iRegPdst dst, iRegPdst src) %{
 6724   // The match rule is needed to make it a 'MachTypeNode'!
 6725   match(Set dst (DecodeN src));
 6726   predicate(false);
 6727 
 6728   format %{ "ADD     $dst, $src, heapbase \t// DecodeN, add oop base" %}
 6729   ins_encode %{
 6730     __ add_const_optimized($dst$$Register, $src$$Register, CompressedOops::base(), R0);
 6731   %}
 6732   ins_pipe(pipe_class_default);
 6733 %}
 6734 
 6735 // conditianal add base for expand
 6736 instruct cond_add_base(iRegPdst dst, flagsRegSrc crx, iRegPsrc src) %{
 6737   // The match rule is needed to make it a 'MachTypeNode'!
 6738   // NOTICE that the rule is nonsense - we just have to make sure that:
 6739   //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
 6740   //  - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
 6741   match(Set dst (DecodeN (Binary crx src)));
 6742   predicate(false);
 6743 
 6744   format %{ "BEQ     $crx, done\n\t"
 6745             "ADD     $dst, $src, heapbase \t// DecodeN: add oop base if $src != NULL\n"
 6746             "done:" %}
 6747   ins_encode %{
 6748     Label done;
 6749     __ beq($crx$$CondRegister, done);
 6750     __ add_const_optimized($dst$$Register, $src$$Register, CompressedOops::base(), R0);
 6751     __ bind(done);
 6752   %}
 6753   ins_pipe(pipe_class_default);
 6754 %}
 6755 
 6756 instruct cond_set_0_ptr(iRegPdst dst, flagsRegSrc crx, iRegPsrc src1) %{
 6757   // The match rule is needed to make it a 'MachTypeNode'!
 6758   // NOTICE that the rule is nonsense - we just have to make sure that:
 6759   //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
 6760   //  - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
 6761   match(Set dst (DecodeN (Binary crx src1)));
 6762   predicate(false);
 6763 
 6764   format %{ "CMOVE   $dst, $crx eq, 0, $src1 \t// decode: preserve 0" %}
 6765   size(4);
 6766   ins_encode %{
 6767     // This is a Power7 instruction for which no machine description exists.
 6768     __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
 6769   %}
 6770   ins_pipe(pipe_class_default);
 6771 %}
 6772 
 6773 //  shift != 0, base != 0
 6774 instruct decodeN_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
 6775   match(Set dst (DecodeN src));
 6776   predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
 6777              n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) &&
 6778             CompressedOops::shift() != 0 &&
 6779             CompressedOops::base() != 0);
 6780   ins_cost(4 * DEFAULT_COST); // Should be more expensive than decodeN_Disjoint_isel_Ex.
 6781   effect(TEMP crx);
 6782 
 6783   format %{ "DecodeN $dst, $src \t// Kills $crx, postalloc expanded" %}
 6784   postalloc_expand( postalloc_expand_decode_oop(dst, src, crx) );
 6785 %}
 6786 
 6787 // shift != 0, base == 0
 6788 instruct decodeN_nullBase(iRegPdst dst, iRegNsrc src) %{
 6789   match(Set dst (DecodeN src));
 6790   predicate(CompressedOops::shift() != 0 &&
 6791             CompressedOops::base() == 0);
 6792 
 6793   format %{ "SLDI    $dst, $src, #3 \t// DecodeN (zerobased)" %}
 6794   size(4);
 6795   ins_encode %{
 6796     __ sldi($dst$$Register, $src$$Register, CompressedOops::shift());
 6797   %}
 6798   ins_pipe(pipe_class_default);
 6799 %}
 6800 
 6801 // Optimize DecodeN for disjoint base.
 6802 // Shift narrow oop and or it into register that already contains the heap base.
 6803 // Base == dst must hold, and is assured by construction in postaloc_expand.
 6804 instruct decodeN_mergeDisjoint(iRegPdst dst, iRegNsrc src, iRegLsrc base) %{
 6805   match(Set dst (DecodeN src));
 6806   effect(TEMP base);
 6807   predicate(false);
 6808 
 6809   format %{ "RLDIMI  $dst, $src, shift, 32-shift \t// DecodeN (disjoint base)" %}
 6810   size(4);
 6811   ins_encode %{
 6812     __ rldimi($dst$$Register, $src$$Register, CompressedOops::shift(), 32-CompressedOops::shift());
 6813   %}
 6814   ins_pipe(pipe_class_default);
 6815 %}
 6816 
 6817 // Optimize DecodeN for disjoint base.
 6818 // This node requires only one cycle on the critical path.
 6819 // We must postalloc_expand as we can not express use_def effects where
 6820 // the used register is L and the def'ed register P.
 6821 instruct decodeN_Disjoint_notNull_Ex(iRegPdst dst, iRegNsrc src) %{
 6822   match(Set dst (DecodeN src));
 6823   effect(TEMP_DEF dst);
 6824   predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
 6825              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
 6826             CompressedOops::base_disjoint());
 6827   ins_cost(DEFAULT_COST);
 6828 
 6829   format %{ "MOV     $dst, heapbase \t\n"
 6830             "RLDIMI  $dst, $src, shift, 32-shift \t// decode with disjoint base" %}
 6831   postalloc_expand %{
 6832     loadBaseNode *n1 = new loadBaseNode();
 6833     n1->add_req(NULL);
 6834     n1->_opnds[0] = op_dst;
 6835 
 6836     decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode();
 6837     n2->add_req(n_region, n_src, n1);
 6838     n2->_opnds[0] = op_dst;
 6839     n2->_opnds[1] = op_src;
 6840     n2->_opnds[2] = op_dst;
 6841     n2->_bottom_type = _bottom_type;
 6842 
 6843     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6844     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6845 
 6846     nodes->push(n1);
 6847     nodes->push(n2);
 6848   %}
 6849 %}
 6850 
 6851 instruct decodeN_Disjoint_isel_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
 6852   match(Set dst (DecodeN src));
 6853   effect(TEMP_DEF dst, TEMP crx);
 6854   predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
 6855              n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) &&
 6856             CompressedOops::base_disjoint() && VM_Version::has_isel());
 6857   ins_cost(3 * DEFAULT_COST);
 6858 
 6859   format %{ "DecodeN  $dst, $src \t// decode with disjoint base using isel" %}
 6860   postalloc_expand %{
 6861     loadBaseNode *n1 = new loadBaseNode();
 6862     n1->add_req(NULL);
 6863     n1->_opnds[0] = op_dst;
 6864 
 6865     cmpN_reg_imm0Node *n_compare  = new cmpN_reg_imm0Node();
 6866     n_compare->add_req(n_region, n_src);
 6867     n_compare->_opnds[0] = op_crx;
 6868     n_compare->_opnds[1] = op_src;
 6869     n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR);
 6870 
 6871     decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode();
 6872     n2->add_req(n_region, n_src, n1);
 6873     n2->_opnds[0] = op_dst;
 6874     n2->_opnds[1] = op_src;
 6875     n2->_opnds[2] = op_dst;
 6876     n2->_bottom_type = _bottom_type;
 6877 
 6878     cond_set_0_ptrNode *n_cond_set = new cond_set_0_ptrNode();
 6879     n_cond_set->add_req(n_region, n_compare, n2);
 6880     n_cond_set->_opnds[0] = op_dst;
 6881     n_cond_set->_opnds[1] = op_crx;
 6882     n_cond_set->_opnds[2] = op_dst;
 6883     n_cond_set->_bottom_type = _bottom_type;
 6884 
 6885     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 6886     ra_->set_oop(n_cond_set, true);
 6887 
 6888     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6889     ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 6890     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6891     ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6892 
 6893     nodes->push(n1);
 6894     nodes->push(n_compare);
 6895     nodes->push(n2);
 6896     nodes->push(n_cond_set);
 6897   %}
 6898 %}
 6899 
 6900 // src != 0, shift != 0, base != 0
 6901 instruct decodeN_notNull_addBase_Ex(iRegPdst dst, iRegNsrc src) %{
 6902   match(Set dst (DecodeN src));
 6903   predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
 6904              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
 6905             CompressedOops::shift() != 0 &&
 6906             CompressedOops::base() != 0);
 6907   ins_cost(2 * DEFAULT_COST);
 6908 
 6909   format %{ "DecodeN $dst, $src \t// $src != NULL, postalloc expanded" %}
 6910   postalloc_expand( postalloc_expand_decode_oop_not_null(dst, src));
 6911 %}
 6912 
 6913 // Compressed OOPs with narrow_oop_shift == 0.
 6914 instruct decodeN_unscaled(iRegPdst dst, iRegNsrc src) %{
 6915   match(Set dst (DecodeN src));
 6916   predicate(CompressedOops::shift() == 0);
 6917   ins_cost(DEFAULT_COST);
 6918 
 6919   format %{ "MR      $dst, $src \t// DecodeN (unscaled)" %}
 6920   // variable size, 0 or 4.
 6921   ins_encode %{
 6922     __ mr_if_needed($dst$$Register, $src$$Register);
 6923   %}
 6924   ins_pipe(pipe_class_default);
 6925 %}
 6926 
 6927 // Convert compressed oop into int for vectors alignment masking.
 6928 instruct decodeN2I_unscaled(iRegIdst dst, iRegNsrc src) %{
 6929   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 6930   predicate(CompressedOops::shift() == 0);
 6931   ins_cost(DEFAULT_COST);
 6932 
 6933   format %{ "MR      $dst, $src \t// (int)DecodeN (unscaled)" %}
 6934   // variable size, 0 or 4.
 6935   ins_encode %{
 6936     __ mr_if_needed($dst$$Register, $src$$Register);
 6937   %}
 6938   ins_pipe(pipe_class_default);
 6939 %}
 6940 
 6941 // Convert klass pointer into compressed form.
 6942 
 6943 // Nodes for postalloc expand.
 6944 
 6945 // Shift node for expand.
 6946 instruct encodePKlass_shift(iRegNdst dst, iRegNsrc src) %{
 6947   // The match rule is needed to make it a 'MachTypeNode'!
 6948   match(Set dst (EncodePKlass src));
 6949   predicate(false);
 6950 
 6951   format %{ "SRDI    $dst, $src, 3 \t// encode" %}
 6952   size(4);
 6953   ins_encode %{
 6954     __ srdi($dst$$Register, $src$$Register, CompressedKlassPointers::shift());
 6955   %}
 6956   ins_pipe(pipe_class_default);
 6957 %}
 6958 
 6959 // Add node for expand.
 6960 instruct encodePKlass_sub_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{
 6961   // The match rule is needed to make it a 'MachTypeNode'!
 6962   match(Set dst (EncodePKlass (Binary base src)));
 6963   predicate(false);
 6964 
 6965   format %{ "SUB     $dst, $base, $src \t// encode" %}
 6966   size(4);
 6967   ins_encode %{
 6968     __ subf($dst$$Register, $base$$Register, $src$$Register);
 6969   %}
 6970   ins_pipe(pipe_class_default);
 6971 %}
 6972 
 6973 // Disjoint narrow oop base.
 6974 instruct encodePKlass_Disjoint(iRegNdst dst, iRegPsrc src) %{
 6975   match(Set dst (EncodePKlass src));
 6976   predicate(false /* TODO: PPC port CompressedKlassPointers::base_disjoint()*/);
 6977 
 6978   format %{ "EXTRDI  $dst, $src, #32, #3 \t// encode with disjoint base" %}
 6979   size(4);
 6980   ins_encode %{
 6981     __ rldicl($dst$$Register, $src$$Register, 64-CompressedKlassPointers::shift(), 32);
 6982   %}
 6983   ins_pipe(pipe_class_default);
 6984 %}
 6985 
 6986 // shift != 0, base != 0
 6987 instruct encodePKlass_not_null_Ex(iRegNdst dst, iRegLsrc base, iRegPsrc src) %{
 6988   match(Set dst (EncodePKlass (Binary base src)));
 6989   predicate(false);
 6990 
 6991   format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %}
 6992   postalloc_expand %{
 6993     encodePKlass_sub_baseNode *n1 = new encodePKlass_sub_baseNode();
 6994     n1->add_req(n_region, n_base, n_src);
 6995     n1->_opnds[0] = op_dst;
 6996     n1->_opnds[1] = op_base;
 6997     n1->_opnds[2] = op_src;
 6998     n1->_bottom_type = _bottom_type;
 6999 
 7000     encodePKlass_shiftNode *n2 = new encodePKlass_shiftNode();
 7001     n2->add_req(n_region, n1);
 7002     n2->_opnds[0] = op_dst;
 7003     n2->_opnds[1] = op_dst;
 7004     n2->_bottom_type = _bottom_type;
 7005     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 7006     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 7007 
 7008     nodes->push(n1);
 7009     nodes->push(n2);
 7010   %}
 7011 %}
 7012 
 7013 // shift != 0, base != 0
 7014 instruct encodePKlass_not_null_ExEx(iRegNdst dst, iRegPsrc src) %{
 7015   match(Set dst (EncodePKlass src));
 7016   //predicate(CompressedKlassPointers::shift() != 0 &&
 7017   //          true /* TODO: PPC port CompressedKlassPointers::base_overlaps()*/);
 7018 
 7019   //format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %}
 7020   ins_cost(DEFAULT_COST*2);  // Don't count constant.
 7021   expand %{
 7022     immL baseImm %{ (jlong)(intptr_t)CompressedKlassPointers::base() %}
 7023     iRegLdst base;
 7024     loadConL_Ex(base, baseImm);
 7025     encodePKlass_not_null_Ex(dst, base, src);
 7026   %}
 7027 %}
 7028 
 7029 // Decode nodes.
 7030 
 7031 // Shift node for expand.
 7032 instruct decodeNKlass_shift(iRegPdst dst, iRegPsrc src) %{
 7033   // The match rule is needed to make it a 'MachTypeNode'!
 7034   match(Set dst (DecodeNKlass src));
 7035   predicate(false);
 7036 
 7037   format %{ "SLDI    $dst, $src, #3 \t// DecodeNKlass" %}
 7038   size(4);
 7039   ins_encode %{
 7040     __ sldi($dst$$Register, $src$$Register, CompressedKlassPointers::shift());
 7041   %}
 7042   ins_pipe(pipe_class_default);
 7043 %}
 7044 
 7045 // Add node for expand.
 7046 
 7047 instruct decodeNKlass_add_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{
 7048   // The match rule is needed to make it a 'MachTypeNode'!
 7049   match(Set dst (DecodeNKlass (Binary base src)));
 7050   predicate(false);
 7051 
 7052   format %{ "ADD     $dst, $base, $src \t// DecodeNKlass, add klass base" %}
 7053   size(4);
 7054   ins_encode %{
 7055     __ add($dst$$Register, $base$$Register, $src$$Register);
 7056   %}
 7057   ins_pipe(pipe_class_default);
 7058 %}
 7059 
 7060 // src != 0, shift != 0, base != 0
 7061 instruct decodeNKlass_notNull_addBase_Ex(iRegPdst dst, iRegLsrc base, iRegNsrc src) %{
 7062   match(Set dst (DecodeNKlass (Binary base src)));
 7063   //effect(kill src); // We need a register for the immediate result after shifting.
 7064   predicate(false);
 7065 
 7066   format %{ "DecodeNKlass $dst =  $base + ($src << 3) \t// $src != NULL, postalloc expanded" %}
 7067   postalloc_expand %{
 7068     decodeNKlass_add_baseNode *n1 = new decodeNKlass_add_baseNode();
 7069     n1->add_req(n_region, n_base, n_src);
 7070     n1->_opnds[0] = op_dst;
 7071     n1->_opnds[1] = op_base;
 7072     n1->_opnds[2] = op_src;
 7073     n1->_bottom_type = _bottom_type;
 7074 
 7075     decodeNKlass_shiftNode *n2 = new decodeNKlass_shiftNode();
 7076     n2->add_req(n_region, n1);
 7077     n2->_opnds[0] = op_dst;
 7078     n2->_opnds[1] = op_dst;
 7079     n2->_bottom_type = _bottom_type;
 7080 
 7081     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 7082     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 7083 
 7084     nodes->push(n1);
 7085     nodes->push(n2);
 7086   %}
 7087 %}
 7088 
 7089 // src != 0, shift != 0, base != 0
 7090 instruct decodeNKlass_notNull_addBase_ExEx(iRegPdst dst, iRegNsrc src) %{
 7091   match(Set dst (DecodeNKlass src));
 7092   // predicate(CompressedKlassPointers::shift() != 0 &&
 7093   //           CompressedKlassPointers::base() != 0);
 7094 
 7095   //format %{ "DecodeNKlass $dst, $src \t// $src != NULL, expanded" %}
 7096 
 7097   ins_cost(DEFAULT_COST*2);  // Don't count constant.
 7098   expand %{
 7099     // We add first, then we shift. Like this, we can get along with one register less.
 7100     // But we have to load the base pre-shifted.
 7101     immL baseImm %{ (jlong)((intptr_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift()) %}
 7102     iRegLdst base;
 7103     loadConL_Ex(base, baseImm);
 7104     decodeNKlass_notNull_addBase_Ex(dst, base, src);
 7105   %}
 7106 %}
 7107 
 7108 //----------MemBar Instructions-----------------------------------------------
 7109 // Memory barrier flavors
 7110 
 7111 instruct membar_acquire() %{
 7112   match(LoadFence);
 7113   ins_cost(4*MEMORY_REF_COST);
 7114 
 7115   format %{ "MEMBAR-acquire" %}
 7116   size(4);
 7117   ins_encode %{
 7118     __ acquire();
 7119   %}
 7120   ins_pipe(pipe_class_default);
 7121 %}
 7122 
 7123 instruct unnecessary_membar_acquire() %{
 7124   match(MemBarAcquire);
 7125   ins_cost(0);
 7126 
 7127   format %{ " -- \t// redundant MEMBAR-acquire - empty" %}
 7128   size(0);
 7129   ins_encode( /*empty*/ );
 7130   ins_pipe(pipe_class_default);
 7131 %}
 7132 
 7133 instruct membar_acquire_lock() %{
 7134   match(MemBarAcquireLock);
 7135   ins_cost(0);
 7136 
 7137   format %{ " -- \t// redundant MEMBAR-acquire - empty (acquire as part of CAS in prior FastLock)" %}
 7138   size(0);
 7139   ins_encode( /*empty*/ );
 7140   ins_pipe(pipe_class_default);
 7141 %}
 7142 
 7143 instruct membar_release() %{
 7144   match(MemBarRelease);
 7145   match(StoreFence);
 7146   ins_cost(4*MEMORY_REF_COST);
 7147 
 7148   format %{ "MEMBAR-release" %}
 7149   size(4);
 7150   ins_encode %{
 7151     __ release();
 7152   %}
 7153   ins_pipe(pipe_class_default);
 7154 %}
 7155 
 7156 instruct membar_storestore() %{
 7157   match(MemBarStoreStore);
 7158   ins_cost(4*MEMORY_REF_COST);
 7159 
 7160   format %{ "MEMBAR-store-store" %}
 7161   size(4);
 7162   ins_encode %{
 7163     __ membar(Assembler::StoreStore);
 7164   %}
 7165   ins_pipe(pipe_class_default);
 7166 %}
 7167 
 7168 instruct membar_release_lock() %{
 7169   match(MemBarReleaseLock);
 7170   ins_cost(0);
 7171 
 7172   format %{ " -- \t// redundant MEMBAR-release - empty (release in FastUnlock)" %}
 7173   size(0);
 7174   ins_encode( /*empty*/ );
 7175   ins_pipe(pipe_class_default);
 7176 %}
 7177 
 7178 instruct membar_volatile() %{
 7179   match(MemBarVolatile);
 7180   ins_cost(4*MEMORY_REF_COST);
 7181 
 7182   format %{ "MEMBAR-volatile" %}
 7183   size(4);
 7184   ins_encode %{
 7185     __ fence();
 7186   %}
 7187   ins_pipe(pipe_class_default);
 7188 %}
 7189 
 7190 // This optimization is wrong on PPC. The following pattern is not supported:
 7191 //  MemBarVolatile
 7192 //   ^        ^
 7193 //   |        |
 7194 //  CtrlProj MemProj
 7195 //   ^        ^
 7196 //   |        |
 7197 //   |       Load
 7198 //   |
 7199 //  MemBarVolatile
 7200 //
 7201 //  The first MemBarVolatile could get optimized out! According to
 7202 //  Vladimir, this pattern can not occur on Oracle platforms.
 7203 //  However, it does occur on PPC64 (because of membars in
 7204 //  inline_unsafe_load_store).
 7205 //
 7206 // Add this node again if we found a good solution for inline_unsafe_load_store().
 7207 // Don't forget to look at the implementation of post_store_load_barrier again,
 7208 // we did other fixes in that method.
 7209 //instruct unnecessary_membar_volatile() %{
 7210 //  match(MemBarVolatile);
 7211 //  predicate(Matcher::post_store_load_barrier(n));
 7212 //  ins_cost(0);
 7213 //
 7214 //  format %{ " -- \t// redundant MEMBAR-volatile - empty" %}
 7215 //  size(0);
 7216 //  ins_encode( /*empty*/ );
 7217 //  ins_pipe(pipe_class_default);
 7218 //%}
 7219 
 7220 instruct membar_CPUOrder() %{
 7221   match(MemBarCPUOrder);
 7222   ins_cost(0);
 7223 
 7224   format %{ " -- \t// MEMBAR-CPUOrder - empty: PPC64 processors are self-consistent." %}
 7225   size(0);
 7226   ins_encode( /*empty*/ );
 7227   ins_pipe(pipe_class_default);
 7228 %}
 7229 
 7230 //----------Conditional Move---------------------------------------------------
 7231 
 7232 // Cmove using isel.
 7233 instruct cmovI_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
 7234   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
 7235   predicate(VM_Version::has_isel());
 7236   ins_cost(DEFAULT_COST);
 7237 
 7238   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7239   size(4);
 7240   ins_encode %{
 7241     // This is a Power7 instruction for which no machine description
 7242     // exists. Anyways, the scheduler should be off on Power7.
 7243     int cc        = $cmp$$cmpcode;
 7244     __ isel($dst$$Register, $crx$$CondRegister,
 7245             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7246   %}
 7247   ins_pipe(pipe_class_default);
 7248 %}
 7249 
 7250 instruct cmovI_reg(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
 7251   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
 7252   predicate(!VM_Version::has_isel());
 7253   ins_cost(DEFAULT_COST+BRANCH_COST);
 7254 
 7255   ins_variable_size_depending_on_alignment(true);
 7256 
 7257   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7258   // Worst case is branch + move + stop, no stop without scheduler
 7259   size(8);
 7260   ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
 7261   ins_pipe(pipe_class_default);
 7262 %}
 7263 
 7264 instruct cmovI_imm(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, immI16 src) %{
 7265   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
 7266   ins_cost(DEFAULT_COST+BRANCH_COST);
 7267 
 7268   ins_variable_size_depending_on_alignment(true);
 7269 
 7270   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7271   // Worst case is branch + move + stop, no stop without scheduler
 7272   size(8);
 7273   ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
 7274   ins_pipe(pipe_class_default);
 7275 %}
 7276 
 7277 // Cmove using isel.
 7278 instruct cmovL_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
 7279   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
 7280   predicate(VM_Version::has_isel());
 7281   ins_cost(DEFAULT_COST);
 7282 
 7283   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7284   size(4);
 7285   ins_encode %{
 7286     // This is a Power7 instruction for which no machine description
 7287     // exists. Anyways, the scheduler should be off on Power7.
 7288     int cc        = $cmp$$cmpcode;
 7289     __ isel($dst$$Register, $crx$$CondRegister,
 7290             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7291   %}
 7292   ins_pipe(pipe_class_default);
 7293 %}
 7294 
 7295 instruct cmovL_reg(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
 7296   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
 7297   predicate(!VM_Version::has_isel());
 7298   ins_cost(DEFAULT_COST+BRANCH_COST);
 7299 
 7300   ins_variable_size_depending_on_alignment(true);
 7301 
 7302   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7303   // Worst case is branch + move + stop, no stop without scheduler.
 7304   size(8);
 7305   ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
 7306   ins_pipe(pipe_class_default);
 7307 %}
 7308 
 7309 instruct cmovL_imm(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, immL16 src) %{
 7310   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
 7311   ins_cost(DEFAULT_COST+BRANCH_COST);
 7312 
 7313   ins_variable_size_depending_on_alignment(true);
 7314 
 7315   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7316   // Worst case is branch + move + stop, no stop without scheduler.
 7317   size(8);
 7318   ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
 7319   ins_pipe(pipe_class_default);
 7320 %}
 7321 
 7322 // Cmove using isel.
 7323 instruct cmovN_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
 7324   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
 7325   predicate(VM_Version::has_isel());
 7326   ins_cost(DEFAULT_COST);
 7327 
 7328   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7329   size(4);
 7330   ins_encode %{
 7331     // This is a Power7 instruction for which no machine description
 7332     // exists. Anyways, the scheduler should be off on Power7.
 7333     int cc        = $cmp$$cmpcode;
 7334     __ isel($dst$$Register, $crx$$CondRegister,
 7335             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7336   %}
 7337   ins_pipe(pipe_class_default);
 7338 %}
 7339 
 7340 // Conditional move for RegN. Only cmov(reg, reg).
 7341 instruct cmovN_reg(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
 7342   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
 7343   predicate(!VM_Version::has_isel());
 7344   ins_cost(DEFAULT_COST+BRANCH_COST);
 7345 
 7346   ins_variable_size_depending_on_alignment(true);
 7347 
 7348   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7349   // Worst case is branch + move + stop, no stop without scheduler.
 7350   size(8);
 7351   ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
 7352   ins_pipe(pipe_class_default);
 7353 %}
 7354 
 7355 instruct cmovN_imm(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, immN_0 src) %{
 7356   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
 7357   ins_cost(DEFAULT_COST+BRANCH_COST);
 7358 
 7359   ins_variable_size_depending_on_alignment(true);
 7360 
 7361   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7362   // Worst case is branch + move + stop, no stop without scheduler.
 7363   size(8);
 7364   ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
 7365   ins_pipe(pipe_class_default);
 7366 %}
 7367 
 7368 // Cmove using isel.
 7369 instruct cmovP_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegPsrc src) %{
 7370   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
 7371   predicate(VM_Version::has_isel());
 7372   ins_cost(DEFAULT_COST);
 7373 
 7374   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7375   size(4);
 7376   ins_encode %{
 7377     // This is a Power7 instruction for which no machine description
 7378     // exists. Anyways, the scheduler should be off on Power7.
 7379     int cc        = $cmp$$cmpcode;
 7380     __ isel($dst$$Register, $crx$$CondRegister,
 7381             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7382   %}
 7383   ins_pipe(pipe_class_default);
 7384 %}
 7385 
 7386 instruct cmovP_reg(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegP_N2P src) %{
 7387   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
 7388   predicate(!VM_Version::has_isel());
 7389   ins_cost(DEFAULT_COST+BRANCH_COST);
 7390 
 7391   ins_variable_size_depending_on_alignment(true);
 7392 
 7393   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7394   // Worst case is branch + move + stop, no stop without scheduler.
 7395   size(8);
 7396   ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
 7397   ins_pipe(pipe_class_default);
 7398 %}
 7399 
 7400 instruct cmovP_imm(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, immP_0 src) %{
 7401   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
 7402   ins_cost(DEFAULT_COST+BRANCH_COST);
 7403 
 7404   ins_variable_size_depending_on_alignment(true);
 7405 
 7406   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7407   // Worst case is branch + move + stop, no stop without scheduler.
 7408   size(8);
 7409   ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
 7410   ins_pipe(pipe_class_default);
 7411 %}
 7412 
 7413 instruct cmovF_reg(cmpOp cmp, flagsRegSrc crx, regF dst, regF src) %{
 7414   match(Set dst (CMoveF (Binary cmp crx) (Binary dst src)));
 7415   ins_cost(DEFAULT_COST+BRANCH_COST);
 7416 
 7417   ins_variable_size_depending_on_alignment(true);
 7418 
 7419   format %{ "CMOVEF  $cmp, $crx, $dst, $src\n\t" %}
 7420   // Worst case is branch + move + stop, no stop without scheduler.
 7421   size(8);
 7422   ins_encode %{
 7423     Label done;
 7424     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 7425     // Branch if not (cmp crx).
 7426     __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
 7427     __ fmr($dst$$FloatRegister, $src$$FloatRegister);
 7428     __ bind(done);
 7429   %}
 7430   ins_pipe(pipe_class_default);
 7431 %}
 7432 
 7433 instruct cmovD_reg(cmpOp cmp, flagsRegSrc crx, regD dst, regD src) %{
 7434   match(Set dst (CMoveD (Binary cmp crx) (Binary dst src)));
 7435   ins_cost(DEFAULT_COST+BRANCH_COST);
 7436 
 7437   ins_variable_size_depending_on_alignment(true);
 7438 
 7439   format %{ "CMOVEF  $cmp, $crx, $dst, $src\n\t" %}
 7440   // Worst case is branch + move + stop, no stop without scheduler.
 7441   size(8);
 7442   ins_encode %{
 7443     Label done;
 7444     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 7445     // Branch if not (cmp crx).
 7446     __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
 7447     __ fmr($dst$$FloatRegister, $src$$FloatRegister);
 7448     __ bind(done);
 7449   %}
 7450   ins_pipe(pipe_class_default);
 7451 %}
 7452 
 7453 //----------Conditional_store--------------------------------------------------
 7454 // Conditional-store of the updated heap-top.
 7455 // Used during allocation of the shared heap.
 7456 // Sets flags (EQ) on success. Implemented with a CASA on Sparc.
 7457 
 7458 // As compareAndSwapL, but return flag register instead of boolean value in
 7459 // int register.
 7460 // Used by sun/misc/AtomicLongCSImpl.java.
 7461 // Mem_ptr must be a memory operand, else this node does not get
 7462 // Flag_needs_anti_dependence_check set by adlc. If this is not set this node
 7463 // can be rematerialized which leads to errors.
 7464 instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal, flagsRegCR0 cr0) %{
 7465   match(Set crx (StoreLConditional mem_ptr (Binary oldVal newVal)));
 7466   effect(TEMP cr0);
 7467   format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
 7468   ins_encode %{
 7469     __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register,
 7470                 MacroAssembler::MemBarAcq, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7471                 noreg, NULL, true);
 7472   %}
 7473   ins_pipe(pipe_class_default);
 7474 %}
 7475 
 7476 // As compareAndSwapP, but return flag register instead of boolean value in
 7477 // int register.
 7478 // This instruction is matched if UseTLAB is off.
 7479 // Mem_ptr must be a memory operand, else this node does not get
 7480 // Flag_needs_anti_dependence_check set by adlc. If this is not set this node
 7481 // can be rematerialized which leads to errors.
 7482 instruct storePConditional_regP_regP_regP(flagsRegCR0 cr0, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{
 7483   match(Set cr0 (StorePConditional mem_ptr (Binary oldVal newVal)));
 7484   ins_cost(2*MEMORY_REF_COST);
 7485 
 7486   format %{ "STDCX_  if ($cr0 = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
 7487   ins_encode %{
 7488     __ stdcx_($newVal$$Register, $mem_ptr$$Register);
 7489   %}
 7490   ins_pipe(pipe_class_memory);
 7491 %}
 7492 
 7493 // Implement LoadPLocked. Must be ordered against changes of the memory location
 7494 // by storePConditional.
 7495 // Don't know whether this is ever used.
 7496 instruct loadPLocked(iRegPdst dst, memory mem) %{
 7497   match(Set dst (LoadPLocked mem));
 7498   ins_cost(2*MEMORY_REF_COST);
 7499 
 7500   format %{ "LDARX   $dst, $mem \t// loadPLocked\n\t" %}
 7501   size(4);
 7502   ins_encode %{
 7503     __ ldarx($dst$$Register, $mem$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 7504   %}
 7505   ins_pipe(pipe_class_memory);
 7506 %}
 7507 
 7508 //----------Compare-And-Swap---------------------------------------------------
 7509 
 7510 // CompareAndSwap{P,I,L} have more than one output, therefore "CmpI
 7511 // (CompareAndSwap ...)" or "If (CmpI (CompareAndSwap ..))"  cannot be
 7512 // matched.
 7513 
 7514 // Strong versions:
 7515 
 7516 instruct compareAndSwapB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7517   match(Set res (CompareAndSwapB mem_ptr (Binary src1 src2)));
 7518   predicate(VM_Version::has_lqarx());
 7519   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7520   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7521   ins_encode %{
 7522     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7523     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7524                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7525                 $res$$Register, true);
 7526     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7527       __ isync();
 7528     } else {
 7529       __ sync();
 7530     }
 7531   %}
 7532   ins_pipe(pipe_class_default);
 7533 %}
 7534 
 7535 instruct compareAndSwapB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7536   match(Set res (CompareAndSwapB mem_ptr (Binary src1 src2)));
 7537   predicate(!VM_Version::has_lqarx());
 7538   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7539   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7540   ins_encode %{
 7541     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7542     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7543                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7544                 $res$$Register, true);
 7545     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7546       __ isync();
 7547     } else {
 7548       __ sync();
 7549     }
 7550   %}
 7551   ins_pipe(pipe_class_default);
 7552 %}
 7553 
 7554 instruct compareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7555   match(Set res (CompareAndSwapS mem_ptr (Binary src1 src2)));
 7556   predicate(VM_Version::has_lqarx());
 7557   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7558   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7559   ins_encode %{
 7560     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7561     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7562                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7563                 $res$$Register, true);
 7564     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7565       __ isync();
 7566     } else {
 7567       __ sync();
 7568     }
 7569   %}
 7570   ins_pipe(pipe_class_default);
 7571 %}
 7572 
 7573 instruct compareAndSwapS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7574   match(Set res (CompareAndSwapS mem_ptr (Binary src1 src2)));
 7575   predicate(!VM_Version::has_lqarx());
 7576   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7577   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7578   ins_encode %{
 7579     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7580     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7581                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7582                 $res$$Register, true);
 7583     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7584       __ isync();
 7585     } else {
 7586       __ sync();
 7587     }
 7588   %}
 7589   ins_pipe(pipe_class_default);
 7590 %}
 7591 
 7592 instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7593   match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2)));
 7594   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7595   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7596   ins_encode %{
 7597     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7598     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7599                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7600                 $res$$Register, true);
 7601     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7602       __ isync();
 7603     } else {
 7604       __ sync();
 7605     }
 7606   %}
 7607   ins_pipe(pipe_class_default);
 7608 %}
 7609 
 7610 instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7611   match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2)));
 7612   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7613   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7614   ins_encode %{
 7615     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7616     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7617                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7618                 $res$$Register, true);
 7619     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7620       __ isync();
 7621     } else {
 7622       __ sync();
 7623     }
 7624   %}
 7625   ins_pipe(pipe_class_default);
 7626 %}
 7627 
 7628 instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7629   match(Set res (CompareAndSwapL mem_ptr (Binary src1 src2)));
 7630   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7631   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
 7632   ins_encode %{
 7633     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7634     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7635                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7636                 $res$$Register, NULL, true);
 7637     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7638       __ isync();
 7639     } else {
 7640       __ sync();
 7641     }
 7642   %}
 7643   ins_pipe(pipe_class_default);
 7644 %}
 7645 
 7646 instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7647   match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2)));
 7648   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7649   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
 7650   ins_encode %{
 7651     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7652     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7653                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7654                 $res$$Register, NULL, true);
 7655     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7656       __ isync();
 7657     } else {
 7658       __ sync();
 7659     }
 7660   %}
 7661   ins_pipe(pipe_class_default);
 7662 %}
 7663 
 7664 // Weak versions:
 7665 
 7666 instruct weakCompareAndSwapB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7667   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7668   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
 7669   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7670   format %{ "weak CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7671   ins_encode %{
 7672     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7673     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7674                 MacroAssembler::MemBarNone,
 7675                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7676   %}
 7677   ins_pipe(pipe_class_default);
 7678 %}
 7679 
 7680 instruct weakCompareAndSwapB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7681   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7682   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
 7683   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7684   format %{ "weak CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7685   ins_encode %{
 7686     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7687     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7688                 MacroAssembler::MemBarNone,
 7689                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7690   %}
 7691   ins_pipe(pipe_class_default);
 7692 %}
 7693 
 7694 instruct weakCompareAndSwapB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7695   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7696   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
 7697   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7698   format %{ "weak CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7699   ins_encode %{
 7700     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7701     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7702                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7703                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7704   %}
 7705   ins_pipe(pipe_class_default);
 7706 %}
 7707 
 7708 instruct weakCompareAndSwapB4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7709   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7710   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
 7711   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7712   format %{ "weak CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7713   ins_encode %{
 7714     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7715     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7716                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7717                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7718   %}
 7719   ins_pipe(pipe_class_default);
 7720 %}
 7721 
 7722 instruct weakCompareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7723   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7724   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
 7725   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7726   format %{ "weak CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7727   ins_encode %{
 7728     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7729     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7730                 MacroAssembler::MemBarNone,
 7731                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7732   %}
 7733   ins_pipe(pipe_class_default);
 7734 %}
 7735 
 7736 instruct weakCompareAndSwapS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7737   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7738   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
 7739   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7740   format %{ "weak CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7741   ins_encode %{
 7742     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7743     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7744                 MacroAssembler::MemBarNone,
 7745                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7746   %}
 7747   ins_pipe(pipe_class_default);
 7748 %}
 7749 
 7750 instruct weakCompareAndSwapS_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7751   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7752   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
 7753   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7754   format %{ "weak CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7755   ins_encode %{
 7756     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7757     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7758                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7759                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7760   %}
 7761   ins_pipe(pipe_class_default);
 7762 %}
 7763 
 7764 instruct weakCompareAndSwapS4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7765   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7766   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
 7767   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7768   format %{ "weak CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7769   ins_encode %{
 7770     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7771     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7772                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7773                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7774   %}
 7775   ins_pipe(pipe_class_default);
 7776 %}
 7777 
 7778 instruct weakCompareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7779   match(Set res (WeakCompareAndSwapI mem_ptr (Binary src1 src2)));
 7780   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7781   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7782   format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7783   ins_encode %{
 7784     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7785     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7786                 MacroAssembler::MemBarNone,
 7787                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7788   %}
 7789   ins_pipe(pipe_class_default);
 7790 %}
 7791 
 7792 instruct weakCompareAndSwapI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7793   match(Set res (WeakCompareAndSwapI mem_ptr (Binary src1 src2)));
 7794   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7795   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7796   format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7797   ins_encode %{
 7798     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7799     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7800     // value is never passed to caller.
 7801     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7802                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7803                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7804   %}
 7805   ins_pipe(pipe_class_default);
 7806 %}
 7807 
 7808 instruct weakCompareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7809   match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
 7810   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7811   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7812   format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7813   ins_encode %{
 7814     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7815     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7816                 MacroAssembler::MemBarNone,
 7817                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7818   %}
 7819   ins_pipe(pipe_class_default);
 7820 %}
 7821 
 7822 instruct weakCompareAndSwapN_acq_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7823   match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
 7824   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7825   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7826   format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7827   ins_encode %{
 7828     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7829     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7830     // value is never passed to caller.
 7831     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7832                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7833                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7834   %}
 7835   ins_pipe(pipe_class_default);
 7836 %}
 7837 
 7838 instruct weakCompareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7839   match(Set res (WeakCompareAndSwapL mem_ptr (Binary src1 src2)));
 7840   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7841   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7842   format %{ "weak CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
 7843   ins_encode %{
 7844     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7845     // value is never passed to caller.
 7846     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7847                 MacroAssembler::MemBarNone,
 7848                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true);
 7849   %}
 7850   ins_pipe(pipe_class_default);
 7851 %}
 7852 
 7853 instruct weakCompareAndSwapL_acq_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7854   match(Set res (WeakCompareAndSwapL mem_ptr (Binary src1 src2)));
 7855   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7856   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7857   format %{ "weak CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7858   ins_encode %{
 7859     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7860     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7861     // value is never passed to caller.
 7862     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7863                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7864                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true);
 7865   %}
 7866   ins_pipe(pipe_class_default);
 7867 %}
 7868 
 7869 instruct weakCompareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7870   match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2)));
 7871   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7872   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7873   format %{ "weak CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
 7874   ins_encode %{
 7875     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7876     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7877                 MacroAssembler::MemBarNone,
 7878                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true);
 7879   %}
 7880   ins_pipe(pipe_class_default);
 7881 %}
 7882 
 7883 instruct weakCompareAndSwapP_acq_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7884   match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2)));
 7885   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7886   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7887   format %{ "weak CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
 7888   ins_encode %{
 7889     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7890     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7891     // value is never passed to caller.
 7892     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7893                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7894                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true);
 7895   %}
 7896   ins_pipe(pipe_class_default);
 7897 %}
 7898 
 7899 // CompareAndExchange
 7900 
 7901 instruct compareAndExchangeB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7902   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7903   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
 7904   effect(TEMP_DEF res, TEMP cr0);
 7905   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as int" %}
 7906   ins_encode %{
 7907     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7908     __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7909                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7910                 noreg, true);
 7911   %}
 7912   ins_pipe(pipe_class_default);
 7913 %}
 7914 
 7915 instruct compareAndExchangeB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
 7916   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7917   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
 7918   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
 7919   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as int" %}
 7920   ins_encode %{
 7921     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7922     __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
 7923                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7924                 noreg, true);
 7925   %}
 7926   ins_pipe(pipe_class_default);
 7927 %}
 7928 
 7929 instruct compareAndExchangeB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7930   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7931   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
 7932   effect(TEMP_DEF res, TEMP cr0);
 7933   format %{ "CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as int" %}
 7934   ins_encode %{
 7935     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7936     __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7937                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7938                 noreg, true);
 7939     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7940       __ isync();
 7941     } else {
 7942       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 7943       __ sync();
 7944     }
 7945   %}
 7946   ins_pipe(pipe_class_default);
 7947 %}
 7948 
 7949 instruct compareAndExchangeB4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
 7950   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7951   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
 7952   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
 7953   format %{ "CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as int" %}
 7954   ins_encode %{
 7955     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7956     __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
 7957                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7958                 noreg, true);
 7959     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7960       __ isync();
 7961     } else {
 7962       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 7963       __ sync();
 7964     }
 7965   %}
 7966   ins_pipe(pipe_class_default);
 7967 %}
 7968 
 7969 instruct compareAndExchangeS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7970   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 7971   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
 7972   effect(TEMP_DEF res, TEMP cr0);
 7973   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as int" %}
 7974   ins_encode %{
 7975     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7976     __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7977                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7978                 noreg, true);
 7979   %}
 7980   ins_pipe(pipe_class_default);
 7981 %}
 7982 
 7983 instruct compareAndExchangeS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
 7984   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 7985   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
 7986   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
 7987   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as int" %}
 7988   ins_encode %{
 7989     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7990     __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
 7991                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7992                 noreg, true);
 7993   %}
 7994   ins_pipe(pipe_class_default);
 7995 %}
 7996 
 7997 instruct compareAndExchangeS_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7998   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 7999   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
 8000   effect(TEMP_DEF res, TEMP cr0);
 8001   format %{ "CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as int" %}
 8002   ins_encode %{
 8003     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8004     __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 8005                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8006                 noreg, true);
 8007     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8008       __ isync();
 8009     } else {
 8010       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8011       __ sync();
 8012     }
 8013   %}
 8014   ins_pipe(pipe_class_default);
 8015 %}
 8016 
 8017 instruct compareAndExchangeS4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
 8018   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 8019   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
 8020   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
 8021   format %{ "CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as int" %}
 8022   ins_encode %{
 8023     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8024     __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
 8025                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8026                 noreg, true);
 8027     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8028       __ isync();
 8029     } else {
 8030       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8031       __ sync();
 8032     }
 8033   %}
 8034   ins_pipe(pipe_class_default);
 8035 %}
 8036 
 8037 instruct compareAndExchangeI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 8038   match(Set res (CompareAndExchangeI mem_ptr (Binary src1 src2)));
 8039   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 8040   effect(TEMP_DEF res, TEMP cr0);
 8041   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as int" %}
 8042   ins_encode %{
 8043     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8044     __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8045                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8046                 noreg, true);
 8047   %}
 8048   ins_pipe(pipe_class_default);
 8049 %}
 8050 
 8051 instruct compareAndExchangeI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 8052   match(Set res (CompareAndExchangeI mem_ptr (Binary src1 src2)));
 8053   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 8054   effect(TEMP_DEF res, TEMP cr0);
 8055   format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as int" %}
 8056   ins_encode %{
 8057     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8058     __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8059                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8060                 noreg, true);
 8061     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8062       __ isync();
 8063     } else {
 8064       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8065       __ sync();
 8066     }
 8067   %}
 8068   ins_pipe(pipe_class_default);
 8069 %}
 8070 
 8071 instruct compareAndExchangeN_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 8072   match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2)));
 8073   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 8074   effect(TEMP_DEF res, TEMP cr0);
 8075   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as narrow oop" %}
 8076   ins_encode %{
 8077     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8078     __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8079                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8080                 noreg, true);
 8081   %}
 8082   ins_pipe(pipe_class_default);
 8083 %}
 8084 
 8085 instruct compareAndExchangeN_acq_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 8086   match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2)));
 8087   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 8088   effect(TEMP_DEF res, TEMP cr0);
 8089   format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as narrow oop" %}
 8090   ins_encode %{
 8091     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8092     __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8093                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8094                 noreg, true);
 8095     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8096       __ isync();
 8097     } else {
 8098       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8099       __ sync();
 8100     }
 8101   %}
 8102   ins_pipe(pipe_class_default);
 8103 %}
 8104 
 8105 instruct compareAndExchangeL_regP_regL_regL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 8106   match(Set res (CompareAndExchangeL mem_ptr (Binary src1 src2)));
 8107   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 8108   effect(TEMP_DEF res, TEMP cr0);
 8109   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as long" %}
 8110   ins_encode %{
 8111     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8112     __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8113                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8114                 noreg, NULL, true);
 8115   %}
 8116   ins_pipe(pipe_class_default);
 8117 %}
 8118 
 8119 instruct compareAndExchangeL_acq_regP_regL_regL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 8120   match(Set res (CompareAndExchangeL mem_ptr (Binary src1 src2)));
 8121   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 8122   effect(TEMP_DEF res, TEMP cr0);
 8123   format %{ "CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as long" %}
 8124   ins_encode %{
 8125     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8126     __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8127                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8128                 noreg, NULL, true);
 8129     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8130       __ isync();
 8131     } else {
 8132       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8133       __ sync();
 8134     }
 8135   %}
 8136   ins_pipe(pipe_class_default);
 8137 %}
 8138 
 8139 instruct compareAndExchangeP_regP_regP_regP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 8140   match(Set res (CompareAndExchangeP mem_ptr (Binary src1 src2)));
 8141   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 8142   effect(TEMP_DEF res, TEMP cr0);
 8143   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as ptr; ptr" %}
 8144   ins_encode %{
 8145     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8146     __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8147                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8148                 noreg, NULL, true);
 8149   %}
 8150   ins_pipe(pipe_class_default);
 8151 %}
 8152 
 8153 instruct compareAndExchangeP_acq_regP_regP_regP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 8154   match(Set res (CompareAndExchangeP mem_ptr (Binary src1 src2)));
 8155   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 8156   effect(TEMP_DEF res, TEMP cr0);
 8157   format %{ "CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as ptr; ptr" %}
 8158   ins_encode %{
 8159     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8160     __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8161                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8162                 noreg, NULL, true);
 8163     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8164       __ isync();
 8165     } else {
 8166       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8167       __ sync();
 8168     }
 8169   %}
 8170   ins_pipe(pipe_class_default);
 8171 %}
 8172 
 8173 // Special RMW
 8174 
 8175 instruct getAndAddB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8176   match(Set res (GetAndAddB mem_ptr src));
 8177   predicate(VM_Version::has_lqarx());
 8178   effect(TEMP_DEF res, TEMP cr0);
 8179   format %{ "GetAndAddB $res, $mem_ptr, $src" %}
 8180   ins_encode %{
 8181     __ getandaddb($res$$Register, $src$$Register, $mem_ptr$$Register,
 8182                   R0, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 8183     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8184       __ isync();
 8185     } else {
 8186       __ sync();
 8187     }
 8188   %}
 8189   ins_pipe(pipe_class_default);
 8190 %}
 8191 
 8192 instruct getAndAddB4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
 8193   match(Set res (GetAndAddB mem_ptr src));
 8194   predicate(!VM_Version::has_lqarx());
 8195   effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
 8196   format %{ "GetAndAddB $res, $mem_ptr, $src" %}
 8197   ins_encode %{
 8198     __ getandaddb($res$$Register, $src$$Register, $mem_ptr$$Register,
 8199                   R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 8200     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8201       __ isync();
 8202     } else {
 8203       __ sync();
 8204     }
 8205   %}
 8206   ins_pipe(pipe_class_default);
 8207 %}
 8208 
 8209 instruct getAndAddS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8210   match(Set res (GetAndAddS mem_ptr src));
 8211   predicate(VM_Version::has_lqarx());
 8212   effect(TEMP_DEF res, TEMP cr0);
 8213   format %{ "GetAndAddS $res, $mem_ptr, $src" %}
 8214   ins_encode %{
 8215     __ getandaddh($res$$Register, $src$$Register, $mem_ptr$$Register,
 8216                   R0, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 8217     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8218       __ isync();
 8219     } else {
 8220       __ sync();
 8221     }
 8222   %}
 8223   ins_pipe(pipe_class_default);
 8224 %}
 8225 
 8226 instruct getAndAddS4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
 8227   match(Set res (GetAndAddS mem_ptr src));
 8228   predicate(!VM_Version::has_lqarx());
 8229   effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
 8230   format %{ "GetAndAddS $res, $mem_ptr, $src" %}
 8231   ins_encode %{
 8232     __ getandaddh($res$$Register, $src$$Register, $mem_ptr$$Register,
 8233                   R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 8234     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8235       __ isync();
 8236     } else {
 8237       __ sync();
 8238     }
 8239   %}
 8240   ins_pipe(pipe_class_default);
 8241 %}
 8242 
 8243 instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8244   match(Set res (GetAndAddI mem_ptr src));
 8245   effect(TEMP_DEF res, TEMP cr0);
 8246   format %{ "GetAndAddI $res, $mem_ptr, $src" %}
 8247   ins_encode %{
 8248     __ getandaddw($res$$Register, $src$$Register, $mem_ptr$$Register,
 8249                   R0, MacroAssembler::cmpxchgx_hint_atomic_update());
 8250     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8251       __ isync();
 8252     } else {
 8253       __ sync();
 8254     }
 8255   %}
 8256   ins_pipe(pipe_class_default);
 8257 %}
 8258 
 8259 instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
 8260   match(Set res (GetAndAddL mem_ptr src));
 8261   effect(TEMP_DEF res, TEMP cr0);
 8262   format %{ "GetAndAddL $res, $mem_ptr, $src" %}
 8263   ins_encode %{
 8264     __ getandaddd($res$$Register, $src$$Register, $mem_ptr$$Register,
 8265                   R0, MacroAssembler::cmpxchgx_hint_atomic_update());
 8266     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8267       __ isync();
 8268     } else {
 8269       __ sync();
 8270     }
 8271   %}
 8272   ins_pipe(pipe_class_default);
 8273 %}
 8274 
 8275 instruct getAndSetB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8276   match(Set res (GetAndSetB mem_ptr src));
 8277   predicate(VM_Version::has_lqarx());
 8278   effect(TEMP_DEF res, TEMP cr0);
 8279   format %{ "GetAndSetB $res, $mem_ptr, $src" %}
 8280   ins_encode %{
 8281     __ getandsetb($res$$Register, $src$$Register, $mem_ptr$$Register,
 8282                   noreg, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 8283     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8284       __ isync();
 8285     } else {
 8286       __ sync();
 8287     }
 8288   %}
 8289   ins_pipe(pipe_class_default);
 8290 %}
 8291 
 8292 instruct getAndSetB4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
 8293   match(Set res (GetAndSetB mem_ptr src));
 8294   predicate(!VM_Version::has_lqarx());
 8295   effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
 8296   format %{ "GetAndSetB $res, $mem_ptr, $src" %}
 8297   ins_encode %{
 8298     __ getandsetb($res$$Register, $src$$Register, $mem_ptr$$Register,
 8299                   R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 8300     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8301       __ isync();
 8302     } else {
 8303       __ sync();
 8304     }
 8305   %}
 8306   ins_pipe(pipe_class_default);
 8307 %}
 8308 
 8309 instruct getAndSetS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8310   match(Set res (GetAndSetS mem_ptr src));
 8311   predicate(VM_Version::has_lqarx());
 8312   effect(TEMP_DEF res, TEMP cr0);
 8313   format %{ "GetAndSetS $res, $mem_ptr, $src" %}
 8314   ins_encode %{
 8315     __ getandseth($res$$Register, $src$$Register, $mem_ptr$$Register,
 8316                   noreg, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 8317     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8318       __ isync();
 8319     } else {
 8320       __ sync();
 8321     }
 8322   %}
 8323   ins_pipe(pipe_class_default);
 8324 %}
 8325 
 8326 instruct getAndSetS4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
 8327   match(Set res (GetAndSetS mem_ptr src));
 8328   predicate(!VM_Version::has_lqarx());
 8329   effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
 8330   format %{ "GetAndSetS $res, $mem_ptr, $src" %}
 8331   ins_encode %{
 8332     __ getandseth($res$$Register, $src$$Register, $mem_ptr$$Register,
 8333                   R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 8334     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8335       __ isync();
 8336     } else {
 8337       __ sync();
 8338     }
 8339   %}
 8340   ins_pipe(pipe_class_default);
 8341 %}
 8342 
 8343 instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8344   match(Set res (GetAndSetI mem_ptr src));
 8345   effect(TEMP_DEF res, TEMP cr0);
 8346   format %{ "GetAndSetI $res, $mem_ptr, $src" %}
 8347   ins_encode %{
 8348     __ getandsetw($res$$Register, $src$$Register, $mem_ptr$$Register,
 8349                   MacroAssembler::cmpxchgx_hint_atomic_update());
 8350     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8351       __ isync();
 8352     } else {
 8353       __ sync();
 8354     }
 8355   %}
 8356   ins_pipe(pipe_class_default);
 8357 %}
 8358 
 8359 instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
 8360   match(Set res (GetAndSetL mem_ptr src));
 8361   effect(TEMP_DEF res, TEMP cr0);
 8362   format %{ "GetAndSetL $res, $mem_ptr, $src" %}
 8363   ins_encode %{
 8364     __ getandsetd($res$$Register, $src$$Register, $mem_ptr$$Register,
 8365                   MacroAssembler::cmpxchgx_hint_atomic_update());
 8366     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8367       __ isync();
 8368     } else {
 8369       __ sync();
 8370     }
 8371   %}
 8372   ins_pipe(pipe_class_default);
 8373 %}
 8374 
 8375 instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr0) %{
 8376   match(Set res (GetAndSetP mem_ptr src));
 8377   effect(TEMP_DEF res, TEMP cr0);
 8378   format %{ "GetAndSetP $res, $mem_ptr, $src" %}
 8379   ins_encode %{
 8380     __ getandsetd($res$$Register, $src$$Register, $mem_ptr$$Register,
 8381                   MacroAssembler::cmpxchgx_hint_atomic_update());
 8382     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8383       __ isync();
 8384     } else {
 8385       __ sync();
 8386     }
 8387   %}
 8388   ins_pipe(pipe_class_default);
 8389 %}
 8390 
 8391 instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{
 8392   match(Set res (GetAndSetN mem_ptr src));
 8393   effect(TEMP_DEF res, TEMP cr0);
 8394   format %{ "GetAndSetN $res, $mem_ptr, $src" %}
 8395   ins_encode %{
 8396     __ getandsetw($res$$Register, $src$$Register, $mem_ptr$$Register,
 8397                   MacroAssembler::cmpxchgx_hint_atomic_update());
 8398     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8399       __ isync();
 8400     } else {
 8401       __ sync();
 8402     }
 8403   %}
 8404   ins_pipe(pipe_class_default);
 8405 %}
 8406 
 8407 //----------Arithmetic Instructions--------------------------------------------
 8408 // Addition Instructions
 8409 
 8410 // Register Addition
 8411 instruct addI_reg_reg(iRegIdst dst, iRegIsrc_iRegL2Isrc src1, iRegIsrc_iRegL2Isrc src2) %{
 8412   match(Set dst (AddI src1 src2));
 8413   format %{ "ADD     $dst, $src1, $src2" %}
 8414   size(4);
 8415   ins_encode %{
 8416     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8417   %}
 8418   ins_pipe(pipe_class_default);
 8419 %}
 8420 
 8421 // Expand does not work with above instruct. (??)
 8422 instruct addI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8423   // no match-rule
 8424   effect(DEF dst, USE src1, USE src2);
 8425   format %{ "ADD     $dst, $src1, $src2" %}
 8426   size(4);
 8427   ins_encode %{
 8428     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8429   %}
 8430   ins_pipe(pipe_class_default);
 8431 %}
 8432 
 8433 instruct tree_addI_addI_addI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
 8434   match(Set dst (AddI (AddI (AddI src1 src2) src3) src4));
 8435   ins_cost(DEFAULT_COST*3);
 8436 
 8437   expand %{
 8438     // FIXME: we should do this in the ideal world.
 8439     iRegIdst tmp1;
 8440     iRegIdst tmp2;
 8441     addI_reg_reg(tmp1, src1, src2);
 8442     addI_reg_reg_2(tmp2, src3, src4); // Adlc complains about addI_reg_reg.
 8443     addI_reg_reg(dst, tmp1, tmp2);
 8444   %}
 8445 %}
 8446 
 8447 // Immediate Addition
 8448 instruct addI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
 8449   match(Set dst (AddI src1 src2));
 8450   format %{ "ADDI    $dst, $src1, $src2" %}
 8451   size(4);
 8452   ins_encode %{
 8453     __ addi($dst$$Register, $src1$$Register, $src2$$constant);
 8454   %}
 8455   ins_pipe(pipe_class_default);
 8456 %}
 8457 
 8458 // Immediate Addition with 16-bit shifted operand
 8459 instruct addI_reg_immhi16(iRegIdst dst, iRegIsrc src1, immIhi16 src2) %{
 8460   match(Set dst (AddI src1 src2));
 8461   format %{ "ADDIS   $dst, $src1, $src2" %}
 8462   size(4);
 8463   ins_encode %{
 8464     __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
 8465   %}
 8466   ins_pipe(pipe_class_default);
 8467 %}
 8468 
 8469 // Immediate Addition using prefixed addi
 8470 instruct addI_reg_imm32(iRegIdst dst, iRegIsrc src1, immI32 src2) %{
 8471   match(Set dst (AddI src1 src2));
 8472   predicate(PowerArchitecturePPC64 >= 10);
 8473   ins_cost(DEFAULT_COST+1);
 8474   format %{ "PADDI   $dst, $src1, $src2" %}
 8475   size(8);
 8476   ins_encode %{
 8477     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 8478     __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
 8479   %}
 8480   ins_pipe(pipe_class_default);
 8481   ins_alignment(2);
 8482 %}
 8483 
 8484 // Long Addition
 8485 instruct addL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8486   match(Set dst (AddL src1 src2));
 8487   format %{ "ADD     $dst, $src1, $src2 \t// long" %}
 8488   size(4);
 8489   ins_encode %{
 8490     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8491   %}
 8492   ins_pipe(pipe_class_default);
 8493 %}
 8494 
 8495 // Expand does not work with above instruct. (??)
 8496 instruct addL_reg_reg_2(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8497   // no match-rule
 8498   effect(DEF dst, USE src1, USE src2);
 8499   format %{ "ADD     $dst, $src1, $src2 \t// long" %}
 8500   size(4);
 8501   ins_encode %{
 8502     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8503   %}
 8504   ins_pipe(pipe_class_default);
 8505 %}
 8506 
 8507 instruct tree_addL_addL_addL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, iRegLsrc src3, iRegLsrc src4) %{
 8508   match(Set dst (AddL (AddL (AddL src1 src2) src3) src4));
 8509   ins_cost(DEFAULT_COST*3);
 8510 
 8511   expand %{
 8512     // FIXME: we should do this in the ideal world.
 8513     iRegLdst tmp1;
 8514     iRegLdst tmp2;
 8515     addL_reg_reg(tmp1, src1, src2);
 8516     addL_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg.
 8517     addL_reg_reg(dst, tmp1, tmp2);
 8518   %}
 8519 %}
 8520 
 8521 // AddL + ConvL2I.
 8522 instruct addI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8523   match(Set dst (ConvL2I (AddL src1 src2)));
 8524 
 8525   format %{ "ADD     $dst, $src1, $src2 \t// long + l2i" %}
 8526   size(4);
 8527   ins_encode %{
 8528     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8529   %}
 8530   ins_pipe(pipe_class_default);
 8531 %}
 8532 
 8533 // No constant pool entries required.
 8534 instruct addL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
 8535   match(Set dst (AddL src1 src2));
 8536 
 8537   format %{ "ADDI    $dst, $src1, $src2" %}
 8538   size(4);
 8539   ins_encode %{
 8540     __ addi($dst$$Register, $src1$$Register, $src2$$constant);
 8541   %}
 8542   ins_pipe(pipe_class_default);
 8543 %}
 8544 
 8545 // Long Immediate Addition with 16-bit shifted operand.
 8546 // No constant pool entries required.
 8547 instruct addL_reg_immhi16(iRegLdst dst, iRegLsrc src1, immL32hi16 src2) %{
 8548   match(Set dst (AddL src1 src2));
 8549 
 8550   format %{ "ADDIS   $dst, $src1, $src2" %}
 8551   size(4);
 8552   ins_encode %{
 8553     __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
 8554   %}
 8555   ins_pipe(pipe_class_default);
 8556 %}
 8557 
 8558 // Long Immediate Addition using prefixed addi
 8559 // No constant pool entries required.
 8560 instruct addL_reg_imm34(iRegLdst dst, iRegLsrc src1, immL34 src2) %{
 8561   match(Set dst (AddL src1 src2));
 8562   predicate(PowerArchitecturePPC64 >= 10);
 8563   ins_cost(DEFAULT_COST+1);
 8564 
 8565   format %{ "PADDI   $dst, $src1, $src2" %}
 8566   size(8);
 8567   ins_encode %{
 8568     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 8569     __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
 8570   %}
 8571   ins_pipe(pipe_class_default);
 8572   ins_alignment(2);
 8573 %}
 8574 
 8575 // Pointer Register Addition
 8576 instruct addP_reg_reg(iRegPdst dst, iRegP_N2P src1, iRegLsrc src2) %{
 8577   match(Set dst (AddP src1 src2));
 8578   format %{ "ADD     $dst, $src1, $src2" %}
 8579   size(4);
 8580   ins_encode %{
 8581     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8582   %}
 8583   ins_pipe(pipe_class_default);
 8584 %}
 8585 
 8586 // Pointer Immediate Addition
 8587 // No constant pool entries required.
 8588 instruct addP_reg_imm16(iRegPdst dst, iRegP_N2P src1, immL16 src2) %{
 8589   match(Set dst (AddP src1 src2));
 8590 
 8591   format %{ "ADDI    $dst, $src1, $src2" %}
 8592   size(4);
 8593   ins_encode %{
 8594     __ addi($dst$$Register, $src1$$Register, $src2$$constant);
 8595   %}
 8596   ins_pipe(pipe_class_default);
 8597 %}
 8598 
 8599 // Pointer Immediate Addition with 16-bit shifted operand.
 8600 // No constant pool entries required.
 8601 instruct addP_reg_immhi16(iRegPdst dst, iRegP_N2P src1, immL32hi16 src2) %{
 8602   match(Set dst (AddP src1 src2));
 8603 
 8604   format %{ "ADDIS   $dst, $src1, $src2" %}
 8605   size(4);
 8606   ins_encode %{
 8607     __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
 8608   %}
 8609   ins_pipe(pipe_class_default);
 8610 %}
 8611 
 8612 // Pointer Immediate Addition using prefixed addi
 8613 // No constant pool entries required.
 8614 instruct addP_reg_imm34(iRegPdst dst, iRegP_N2P src1, immL34 src2) %{
 8615   match(Set dst (AddP src1 src2));
 8616   predicate(PowerArchitecturePPC64 >= 10);
 8617   ins_cost(DEFAULT_COST+1);
 8618 
 8619   format %{ "PADDI    $dst, $src1, $src2" %}
 8620   size(8);
 8621   ins_encode %{
 8622     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 8623     __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
 8624   %}
 8625   ins_pipe(pipe_class_default);
 8626   ins_alignment(2);
 8627 %}
 8628 
 8629 //---------------------
 8630 // Subtraction Instructions
 8631 
 8632 // Register Subtraction
 8633 instruct subI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8634   match(Set dst (SubI src1 src2));
 8635   format %{ "SUBF    $dst, $src2, $src1" %}
 8636   size(4);
 8637   ins_encode %{
 8638     __ subf($dst$$Register, $src2$$Register, $src1$$Register);
 8639   %}
 8640   ins_pipe(pipe_class_default);
 8641 %}
 8642 
 8643 // Immediate Subtraction
 8644 // Immediate Subtraction: The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal),
 8645 // Don't try to use addi with - $src2$$constant since it can overflow when $src2$$constant == minI16.
 8646 
 8647 // SubI from constant (using subfic).
 8648 instruct subI_imm16_reg(iRegIdst dst, immI16 src1, iRegIsrc src2) %{
 8649   match(Set dst (SubI src1 src2));
 8650   format %{ "SUBI    $dst, $src1, $src2" %}
 8651 
 8652   size(4);
 8653   ins_encode %{
 8654     __ subfic($dst$$Register, $src2$$Register, $src1$$constant);
 8655   %}
 8656   ins_pipe(pipe_class_default);
 8657 %}
 8658 
 8659 // Turn the sign-bit of an integer into a 32-bit mask, 0x0...0 for
 8660 // positive integers and 0xF...F for negative ones.
 8661 instruct signmask32I_regI(iRegIdst dst, iRegIsrc src) %{
 8662   // no match-rule, false predicate
 8663   effect(DEF dst, USE src);
 8664   predicate(false);
 8665 
 8666   format %{ "SRAWI   $dst, $src, #31" %}
 8667   size(4);
 8668   ins_encode %{
 8669     __ srawi($dst$$Register, $src$$Register, 0x1f);
 8670   %}
 8671   ins_pipe(pipe_class_default);
 8672 %}
 8673 
 8674 instruct absI_reg_Ex(iRegIdst dst, iRegIsrc src) %{
 8675   match(Set dst (AbsI src));
 8676   ins_cost(DEFAULT_COST*3);
 8677 
 8678   expand %{
 8679     iRegIdst tmp1;
 8680     iRegIdst tmp2;
 8681     signmask32I_regI(tmp1, src);
 8682     xorI_reg_reg(tmp2, tmp1, src);
 8683     subI_reg_reg(dst, tmp2, tmp1);
 8684   %}
 8685 %}
 8686 
 8687 instruct negI_regI(iRegIdst dst, immI_0 zero, iRegIsrc src2) %{
 8688   match(Set dst (SubI zero src2));
 8689   format %{ "NEG     $dst, $src2" %}
 8690   size(4);
 8691   ins_encode %{
 8692     __ neg($dst$$Register, $src2$$Register);
 8693   %}
 8694   ins_pipe(pipe_class_default);
 8695 %}
 8696 
 8697 // Long subtraction
 8698 instruct subL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8699   match(Set dst (SubL src1 src2));
 8700   format %{ "SUBF    $dst, $src2, $src1 \t// long" %}
 8701   size(4);
 8702   ins_encode %{
 8703     __ subf($dst$$Register, $src2$$Register, $src1$$Register);
 8704   %}
 8705   ins_pipe(pipe_class_default);
 8706 %}
 8707 
 8708 // SubL + convL2I.
 8709 instruct subI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8710   match(Set dst (ConvL2I (SubL src1 src2)));
 8711 
 8712   format %{ "SUBF    $dst, $src2, $src1 \t// long + l2i" %}
 8713   size(4);
 8714   ins_encode %{
 8715     __ subf($dst$$Register, $src2$$Register, $src1$$Register);
 8716   %}
 8717   ins_pipe(pipe_class_default);
 8718 %}
 8719 
 8720 // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
 8721 // positive longs and 0xF...F for negative ones.
 8722 instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{
 8723   // no match-rule, false predicate
 8724   effect(DEF dst, USE src);
 8725   predicate(false);
 8726 
 8727   format %{ "SRADI   $dst, $src, #63" %}
 8728   size(4);
 8729   ins_encode %{
 8730     __ sradi($dst$$Register, $src$$Register, 0x3f);
 8731   %}
 8732   ins_pipe(pipe_class_default);
 8733 %}
 8734 
 8735 // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
 8736 // positive longs and 0xF...F for negative ones.
 8737 instruct signmask64L_regL(iRegLdst dst, iRegLsrc src) %{
 8738   // no match-rule, false predicate
 8739   effect(DEF dst, USE src);
 8740   predicate(false);
 8741 
 8742   format %{ "SRADI   $dst, $src, #63" %}
 8743   size(4);
 8744   ins_encode %{
 8745     __ sradi($dst$$Register, $src$$Register, 0x3f);
 8746   %}
 8747   ins_pipe(pipe_class_default);
 8748 %}
 8749 
 8750 instruct absL_reg_Ex(iRegLdst dst, iRegLsrc src) %{
 8751   match(Set dst (AbsL src));
 8752   ins_cost(DEFAULT_COST*3);
 8753 
 8754   expand %{
 8755     iRegLdst tmp1;
 8756     iRegLdst tmp2;
 8757     signmask64L_regL(tmp1, src);
 8758     xorL_reg_reg(tmp2, tmp1, src);
 8759     subL_reg_reg(dst, tmp2, tmp1);
 8760   %}
 8761 %}
 8762 
 8763 // Long negation
 8764 instruct negL_reg_reg(iRegLdst dst, immL_0 zero, iRegLsrc src2) %{
 8765   match(Set dst (SubL zero src2));
 8766   format %{ "NEG     $dst, $src2 \t// long" %}
 8767   size(4);
 8768   ins_encode %{
 8769     __ neg($dst$$Register, $src2$$Register);
 8770   %}
 8771   ins_pipe(pipe_class_default);
 8772 %}
 8773 
 8774 // NegL + ConvL2I.
 8775 instruct negI_con0_regL(iRegIdst dst, immL_0 zero, iRegLsrc src2) %{
 8776   match(Set dst (ConvL2I (SubL zero src2)));
 8777 
 8778   format %{ "NEG     $dst, $src2 \t// long + l2i" %}
 8779   size(4);
 8780   ins_encode %{
 8781     __ neg($dst$$Register, $src2$$Register);
 8782   %}
 8783   ins_pipe(pipe_class_default);
 8784 %}
 8785 
 8786 // Multiplication Instructions
 8787 // Integer Multiplication
 8788 
 8789 // Register Multiplication
 8790 instruct mulI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8791   match(Set dst (MulI src1 src2));
 8792   ins_cost(DEFAULT_COST);
 8793 
 8794   format %{ "MULLW   $dst, $src1, $src2" %}
 8795   size(4);
 8796   ins_encode %{
 8797     __ mullw($dst$$Register, $src1$$Register, $src2$$Register);
 8798   %}
 8799   ins_pipe(pipe_class_default);
 8800 %}
 8801 
 8802 // Immediate Multiplication
 8803 instruct mulI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
 8804   match(Set dst (MulI src1 src2));
 8805   ins_cost(DEFAULT_COST);
 8806 
 8807   format %{ "MULLI   $dst, $src1, $src2" %}
 8808   size(4);
 8809   ins_encode %{
 8810     __ mulli($dst$$Register, $src1$$Register, $src2$$constant);
 8811   %}
 8812   ins_pipe(pipe_class_default);
 8813 %}
 8814 
 8815 instruct mulL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8816   match(Set dst (MulL src1 src2));
 8817   ins_cost(DEFAULT_COST);
 8818 
 8819   format %{ "MULLD   $dst $src1, $src2 \t// long" %}
 8820   size(4);
 8821   ins_encode %{
 8822     __ mulld($dst$$Register, $src1$$Register, $src2$$Register);
 8823   %}
 8824   ins_pipe(pipe_class_default);
 8825 %}
 8826 
 8827 // Multiply high for optimized long division by constant.
 8828 instruct mulHighL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8829   match(Set dst (MulHiL src1 src2));
 8830   ins_cost(DEFAULT_COST);
 8831 
 8832   format %{ "MULHD   $dst $src1, $src2 \t// long" %}
 8833   size(4);
 8834   ins_encode %{
 8835     __ mulhd($dst$$Register, $src1$$Register, $src2$$Register);
 8836   %}
 8837   ins_pipe(pipe_class_default);
 8838 %}
 8839 
 8840 // Immediate Multiplication
 8841 instruct mulL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
 8842   match(Set dst (MulL src1 src2));
 8843   ins_cost(DEFAULT_COST);
 8844 
 8845   format %{ "MULLI   $dst, $src1, $src2" %}
 8846   size(4);
 8847   ins_encode %{
 8848     __ mulli($dst$$Register, $src1$$Register, $src2$$constant);
 8849   %}
 8850   ins_pipe(pipe_class_default);
 8851 %}
 8852 
 8853 // Integer Division with Immediate -1: Negate.
 8854 instruct divI_reg_immIvalueMinus1(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{
 8855   match(Set dst (DivI src1 src2));
 8856   ins_cost(DEFAULT_COST);
 8857 
 8858   format %{ "NEG     $dst, $src1 \t// /-1" %}
 8859   size(4);
 8860   ins_encode %{
 8861     __ neg($dst$$Register, $src1$$Register);
 8862   %}
 8863   ins_pipe(pipe_class_default);
 8864 %}
 8865 
 8866 // Integer Division with constant, but not -1.
 8867 // We should be able to improve this by checking the type of src2.
 8868 // It might well be that src2 is known to be positive.
 8869 instruct divI_reg_regnotMinus1(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8870   match(Set dst (DivI src1 src2));
 8871   predicate(n->in(2)->find_int_con(-1) != -1); // src2 is a constant, but not -1
 8872   ins_cost(2*DEFAULT_COST);
 8873 
 8874   format %{ "DIVW    $dst, $src1, $src2 \t// /not-1" %}
 8875   size(4);
 8876   ins_encode %{
 8877     __ divw($dst$$Register, $src1$$Register, $src2$$Register);
 8878   %}
 8879   ins_pipe(pipe_class_default);
 8880 %}
 8881 
 8882 instruct cmovI_bne_negI_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src1) %{
 8883   effect(USE_DEF dst, USE src1, USE crx);
 8884   predicate(false);
 8885 
 8886   ins_variable_size_depending_on_alignment(true);
 8887 
 8888   format %{ "CMOVE   $dst, neg($src1), $crx" %}
 8889   // Worst case is branch + move + stop, no stop without scheduler.
 8890   size(8);
 8891   ins_encode %{
 8892     Label done;
 8893     __ bne($crx$$CondRegister, done);
 8894     __ neg($dst$$Register, $src1$$Register);
 8895     __ bind(done);
 8896   %}
 8897   ins_pipe(pipe_class_default);
 8898 %}
 8899 
 8900 // Integer Division with Registers not containing constants.
 8901 instruct divI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8902   match(Set dst (DivI src1 src2));
 8903   ins_cost(10*DEFAULT_COST);
 8904 
 8905   expand %{
 8906     immI16 imm %{ (int)-1 %}
 8907     flagsReg tmp1;
 8908     cmpI_reg_imm16(tmp1, src2, imm);          // check src2 == -1
 8909     divI_reg_regnotMinus1(dst, src1, src2);   // dst = src1 / src2
 8910     cmovI_bne_negI_reg(dst, tmp1, src1);      // cmove dst = neg(src1) if src2 == -1
 8911   %}
 8912 %}
 8913 
 8914 // Long Division with Immediate -1: Negate.
 8915 instruct divL_reg_immLvalueMinus1(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{
 8916   match(Set dst (DivL src1 src2));
 8917   ins_cost(DEFAULT_COST);
 8918 
 8919   format %{ "NEG     $dst, $src1 \t// /-1, long" %}
 8920   size(4);
 8921   ins_encode %{
 8922     __ neg($dst$$Register, $src1$$Register);
 8923   %}
 8924   ins_pipe(pipe_class_default);
 8925 %}
 8926 
 8927 // Long Division with constant, but not -1.
 8928 instruct divL_reg_regnotMinus1(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8929   match(Set dst (DivL src1 src2));
 8930   predicate(n->in(2)->find_long_con(-1L) != -1L); // Src2 is a constant, but not -1.
 8931   ins_cost(2*DEFAULT_COST);
 8932 
 8933   format %{ "DIVD    $dst, $src1, $src2 \t// /not-1, long" %}
 8934   size(4);
 8935   ins_encode %{
 8936     __ divd($dst$$Register, $src1$$Register, $src2$$Register);
 8937   %}
 8938   ins_pipe(pipe_class_default);
 8939 %}
 8940 
 8941 instruct cmovL_bne_negL_reg(iRegLdst dst, flagsRegSrc crx, iRegLsrc src1) %{
 8942   effect(USE_DEF dst, USE src1, USE crx);
 8943   predicate(false);
 8944 
 8945   ins_variable_size_depending_on_alignment(true);
 8946 
 8947   format %{ "CMOVE   $dst, neg($src1), $crx" %}
 8948   // Worst case is branch + move + stop, no stop without scheduler.
 8949   size(8);
 8950   ins_encode %{
 8951     Label done;
 8952     __ bne($crx$$CondRegister, done);
 8953     __ neg($dst$$Register, $src1$$Register);
 8954     __ bind(done);
 8955   %}
 8956   ins_pipe(pipe_class_default);
 8957 %}
 8958 
 8959 // Long Division with Registers not containing constants.
 8960 instruct divL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8961   match(Set dst (DivL src1 src2));
 8962   ins_cost(10*DEFAULT_COST);
 8963 
 8964   expand %{
 8965     immL16 imm %{ (int)-1 %}
 8966     flagsReg tmp1;
 8967     cmpL_reg_imm16(tmp1, src2, imm);          // check src2 == -1
 8968     divL_reg_regnotMinus1(dst, src1, src2);   // dst = src1 / src2
 8969     cmovL_bne_negL_reg(dst, tmp1, src1);      // cmove dst = neg(src1) if src2 == -1
 8970   %}
 8971 %}
 8972 
 8973 // Integer Remainder with registers.
 8974 instruct modI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8975   match(Set dst (ModI src1 src2));
 8976   ins_cost(10*DEFAULT_COST);
 8977 
 8978   expand %{
 8979     immI16 imm %{ (int)-1 %}
 8980     flagsReg tmp1;
 8981     iRegIdst tmp2;
 8982     iRegIdst tmp3;
 8983     cmpI_reg_imm16(tmp1, src2, imm);           // check src2 == -1
 8984     divI_reg_regnotMinus1(tmp2, src1, src2);   // tmp2 = src1 / src2
 8985     cmovI_bne_negI_reg(tmp2, tmp1, src1);      // cmove tmp2 = neg(src1) if src2 == -1
 8986     mulI_reg_reg(tmp3, src2, tmp2);            // tmp3 = src2 * tmp2
 8987     subI_reg_reg(dst, src1, tmp3);             // dst = src1 - tmp3
 8988   %}
 8989 %}
 8990 
 8991 // Long Remainder with registers
 8992 instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8993   match(Set dst (ModL src1 src2));
 8994   ins_cost(10*DEFAULT_COST);
 8995 
 8996   expand %{
 8997     immL16 imm %{ (int)-1 %}
 8998     flagsReg tmp1;
 8999     iRegLdst tmp2;
 9000     iRegLdst tmp3;
 9001     cmpL_reg_imm16(tmp1, src2, imm);             // check src2 == -1
 9002     divL_reg_regnotMinus1(tmp2, src1, src2);     // tmp2 = src1 / src2
 9003     cmovL_bne_negL_reg(tmp2, tmp1, src1);        // cmove tmp2 = neg(src1) if src2 == -1
 9004     mulL_reg_reg(tmp3, src2, tmp2);              // tmp3 = src2 * tmp2
 9005     subL_reg_reg(dst, src1, tmp3);               // dst = src1 - tmp3
 9006   %}
 9007 %}
 9008 
 9009 // Integer Shift Instructions
 9010 
 9011 // Register Shift Left
 9012 
 9013 // Clear all but the lowest #mask bits.
 9014 // Used to normalize shift amounts in registers.
 9015 instruct maskI_reg_imm(iRegIdst dst, iRegIsrc src, uimmI6 mask) %{
 9016   // no match-rule, false predicate
 9017   effect(DEF dst, USE src, USE mask);
 9018   predicate(false);
 9019 
 9020   format %{ "MASK    $dst, $src, $mask \t// clear $mask upper bits" %}
 9021   size(4);
 9022   ins_encode %{
 9023     __ clrldi($dst$$Register, $src$$Register, $mask$$constant);
 9024   %}
 9025   ins_pipe(pipe_class_default);
 9026 %}
 9027 
 9028 instruct lShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9029   // no match-rule, false predicate
 9030   effect(DEF dst, USE src1, USE src2);
 9031   predicate(false);
 9032 
 9033   format %{ "SLW     $dst, $src1, $src2" %}
 9034   size(4);
 9035   ins_encode %{
 9036     __ slw($dst$$Register, $src1$$Register, $src2$$Register);
 9037   %}
 9038   ins_pipe(pipe_class_default);
 9039 %}
 9040 
 9041 instruct lShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9042   match(Set dst (LShiftI src1 src2));
 9043   ins_cost(DEFAULT_COST*2);
 9044   expand %{
 9045     uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
 9046     iRegIdst tmpI;
 9047     maskI_reg_imm(tmpI, src2, mask);
 9048     lShiftI_reg_reg(dst, src1, tmpI);
 9049   %}
 9050 %}
 9051 
 9052 // Register Shift Left Immediate
 9053 instruct lShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
 9054   match(Set dst (LShiftI src1 src2));
 9055 
 9056   format %{ "SLWI    $dst, $src1, ($src2 & 0x1f)" %}
 9057   size(4);
 9058   ins_encode %{
 9059     __ slwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
 9060   %}
 9061   ins_pipe(pipe_class_default);
 9062 %}
 9063 
 9064 // AndI with negpow2-constant + LShiftI
 9065 instruct lShiftI_andI_immInegpow2_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{
 9066   match(Set dst (LShiftI (AndI src1 src2) src3));
 9067   predicate(UseRotateAndMaskInstructionsPPC64);
 9068 
 9069   format %{ "RLWINM  $dst, lShiftI(AndI($src1, $src2), $src3)" %}
 9070   size(4);
 9071   ins_encode %{
 9072     long src3      = $src3$$constant;
 9073     long maskbits  = src3 + log2i_exact(-(juint)$src2$$constant);
 9074     if (maskbits >= 32) {
 9075       __ li($dst$$Register, 0); // addi
 9076     } else {
 9077       __ rlwinm($dst$$Register, $src1$$Register, src3 & 0x1f, 0, (31-maskbits) & 0x1f);
 9078     }
 9079   %}
 9080   ins_pipe(pipe_class_default);
 9081 %}
 9082 
 9083 // RShiftI + AndI with negpow2-constant + LShiftI
 9084 instruct lShiftI_andI_immInegpow2_rShiftI_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{
 9085   match(Set dst (LShiftI (AndI (RShiftI src1 src3) src2) src3));
 9086   predicate(UseRotateAndMaskInstructionsPPC64);
 9087 
 9088   format %{ "RLWINM  $dst, lShiftI(AndI(RShiftI($src1, $src3), $src2), $src3)" %}
 9089   size(4);
 9090   ins_encode %{
 9091     long src3      = $src3$$constant;
 9092     long maskbits  = src3 + log2i_exact(-(juint)$src2$$constant);
 9093     if (maskbits >= 32) {
 9094       __ li($dst$$Register, 0); // addi
 9095     } else {
 9096       __ rlwinm($dst$$Register, $src1$$Register, 0, 0, (31-maskbits) & 0x1f);
 9097     }
 9098   %}
 9099   ins_pipe(pipe_class_default);
 9100 %}
 9101 
 9102 instruct lShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9103   // no match-rule, false predicate
 9104   effect(DEF dst, USE src1, USE src2);
 9105   predicate(false);
 9106 
 9107   format %{ "SLD     $dst, $src1, $src2" %}
 9108   size(4);
 9109   ins_encode %{
 9110     __ sld($dst$$Register, $src1$$Register, $src2$$Register);
 9111   %}
 9112   ins_pipe(pipe_class_default);
 9113 %}
 9114 
 9115 // Register Shift Left
 9116 instruct lShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9117   match(Set dst (LShiftL src1 src2));
 9118   ins_cost(DEFAULT_COST*2);
 9119   expand %{
 9120     uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
 9121     iRegIdst tmpI;
 9122     maskI_reg_imm(tmpI, src2, mask);
 9123     lShiftL_regL_regI(dst, src1, tmpI);
 9124   %}
 9125 %}
 9126 
 9127 // Register Shift Left Immediate
 9128 instruct lshiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
 9129   match(Set dst (LShiftL src1 src2));
 9130   format %{ "SLDI    $dst, $src1, ($src2 & 0x3f)" %}
 9131   size(4);
 9132   ins_encode %{
 9133     __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9134   %}
 9135   ins_pipe(pipe_class_default);
 9136 %}
 9137 
 9138 // If we shift more than 32 bits, we need not convert I2L.
 9139 instruct lShiftL_regI_immGE32(iRegLdst dst, iRegIsrc src1, uimmI6_ge32 src2) %{
 9140   match(Set dst (LShiftL (ConvI2L src1) src2));
 9141   ins_cost(DEFAULT_COST);
 9142 
 9143   size(4);
 9144   format %{ "SLDI    $dst, i2l($src1), $src2" %}
 9145   ins_encode %{
 9146     __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9147   %}
 9148   ins_pipe(pipe_class_default);
 9149 %}
 9150 
 9151 // Shift a postivie int to the left.
 9152 // Clrlsldi clears the upper 32 bits and shifts.
 9153 instruct scaledPositiveI2L_lShiftL_convI2L_reg_imm6(iRegLdst dst, iRegIsrc src1, uimmI6 src2) %{
 9154   match(Set dst (LShiftL (ConvI2L src1) src2));
 9155   predicate(((ConvI2LNode*)(_kids[0]->_leaf))->type()->is_long()->is_positive_int());
 9156 
 9157   format %{ "SLDI    $dst, i2l(positive_int($src1)), $src2" %}
 9158   size(4);
 9159   ins_encode %{
 9160     __ clrlsldi($dst$$Register, $src1$$Register, 0x20, $src2$$constant);
 9161   %}
 9162   ins_pipe(pipe_class_default);
 9163 %}
 9164 
 9165 instruct arShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9166   // no match-rule, false predicate
 9167   effect(DEF dst, USE src1, USE src2);
 9168   predicate(false);
 9169 
 9170   format %{ "SRAW    $dst, $src1, $src2" %}
 9171   size(4);
 9172   ins_encode %{
 9173     __ sraw($dst$$Register, $src1$$Register, $src2$$Register);
 9174   %}
 9175   ins_pipe(pipe_class_default);
 9176 %}
 9177 
 9178 // Register Arithmetic Shift Right
 9179 instruct arShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9180   match(Set dst (RShiftI src1 src2));
 9181   ins_cost(DEFAULT_COST*2);
 9182   expand %{
 9183     uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
 9184     iRegIdst tmpI;
 9185     maskI_reg_imm(tmpI, src2, mask);
 9186     arShiftI_reg_reg(dst, src1, tmpI);
 9187   %}
 9188 %}
 9189 
 9190 // Register Arithmetic Shift Right Immediate
 9191 instruct arShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
 9192   match(Set dst (RShiftI src1 src2));
 9193 
 9194   format %{ "SRAWI   $dst, $src1, ($src2 & 0x1f)" %}
 9195   size(4);
 9196   ins_encode %{
 9197     __ srawi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
 9198   %}
 9199   ins_pipe(pipe_class_default);
 9200 %}
 9201 
 9202 instruct arShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9203   // no match-rule, false predicate
 9204   effect(DEF dst, USE src1, USE src2);
 9205   predicate(false);
 9206 
 9207   format %{ "SRAD    $dst, $src1, $src2" %}
 9208   size(4);
 9209   ins_encode %{
 9210     __ srad($dst$$Register, $src1$$Register, $src2$$Register);
 9211   %}
 9212   ins_pipe(pipe_class_default);
 9213 %}
 9214 
 9215 // Register Shift Right Arithmetic Long
 9216 instruct arShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9217   match(Set dst (RShiftL src1 src2));
 9218   ins_cost(DEFAULT_COST*2);
 9219 
 9220   expand %{
 9221     uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
 9222     iRegIdst tmpI;
 9223     maskI_reg_imm(tmpI, src2, mask);
 9224     arShiftL_regL_regI(dst, src1, tmpI);
 9225   %}
 9226 %}
 9227 
 9228 // Register Shift Right Immediate
 9229 instruct arShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
 9230   match(Set dst (RShiftL src1 src2));
 9231 
 9232   format %{ "SRADI   $dst, $src1, ($src2 & 0x3f)" %}
 9233   size(4);
 9234   ins_encode %{
 9235     __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9236   %}
 9237   ins_pipe(pipe_class_default);
 9238 %}
 9239 
 9240 // RShiftL + ConvL2I
 9241 instruct convL2I_arShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{
 9242   match(Set dst (ConvL2I (RShiftL src1 src2)));
 9243 
 9244   format %{ "SRADI   $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %}
 9245   size(4);
 9246   ins_encode %{
 9247     __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9248   %}
 9249   ins_pipe(pipe_class_default);
 9250 %}
 9251 
 9252 instruct urShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9253   // no match-rule, false predicate
 9254   effect(DEF dst, USE src1, USE src2);
 9255   predicate(false);
 9256 
 9257   format %{ "SRW     $dst, $src1, $src2" %}
 9258   size(4);
 9259   ins_encode %{
 9260     __ srw($dst$$Register, $src1$$Register, $src2$$Register);
 9261   %}
 9262   ins_pipe(pipe_class_default);
 9263 %}
 9264 
 9265 // Register Shift Right
 9266 instruct urShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9267   match(Set dst (URShiftI src1 src2));
 9268   ins_cost(DEFAULT_COST*2);
 9269 
 9270   expand %{
 9271     uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
 9272     iRegIdst tmpI;
 9273     maskI_reg_imm(tmpI, src2, mask);
 9274     urShiftI_reg_reg(dst, src1, tmpI);
 9275   %}
 9276 %}
 9277 
 9278 // Register Shift Right Immediate
 9279 instruct urShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
 9280   match(Set dst (URShiftI src1 src2));
 9281 
 9282   format %{ "SRWI    $dst, $src1, ($src2 & 0x1f)" %}
 9283   size(4);
 9284   ins_encode %{
 9285     __ srwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
 9286   %}
 9287   ins_pipe(pipe_class_default);
 9288 %}
 9289 
 9290 instruct urShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9291   // no match-rule, false predicate
 9292   effect(DEF dst, USE src1, USE src2);
 9293   predicate(false);
 9294 
 9295   format %{ "SRD     $dst, $src1, $src2" %}
 9296   size(4);
 9297   ins_encode %{
 9298     __ srd($dst$$Register, $src1$$Register, $src2$$Register);
 9299   %}
 9300   ins_pipe(pipe_class_default);
 9301 %}
 9302 
 9303 // Register Shift Right
 9304 instruct urShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9305   match(Set dst (URShiftL src1 src2));
 9306   ins_cost(DEFAULT_COST*2);
 9307 
 9308   expand %{
 9309     uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
 9310     iRegIdst tmpI;
 9311     maskI_reg_imm(tmpI, src2, mask);
 9312     urShiftL_regL_regI(dst, src1, tmpI);
 9313   %}
 9314 %}
 9315 
 9316 // Register Shift Right Immediate
 9317 instruct urShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
 9318   match(Set dst (URShiftL src1 src2));
 9319 
 9320   format %{ "SRDI    $dst, $src1, ($src2 & 0x3f)" %}
 9321   size(4);
 9322   ins_encode %{
 9323     __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9324   %}
 9325   ins_pipe(pipe_class_default);
 9326 %}
 9327 
 9328 // URShiftL + ConvL2I.
 9329 instruct convL2I_urShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{
 9330   match(Set dst (ConvL2I (URShiftL src1 src2)));
 9331 
 9332   format %{ "SRDI    $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %}
 9333   size(4);
 9334   ins_encode %{
 9335     __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9336   %}
 9337   ins_pipe(pipe_class_default);
 9338 %}
 9339 
 9340 // Register Shift Right Immediate with a CastP2X
 9341 instruct shrP_convP2X_reg_imm6(iRegLdst dst, iRegP_N2P src1, uimmI6 src2) %{
 9342   match(Set dst (URShiftL (CastP2X src1) src2));
 9343 
 9344   format %{ "SRDI    $dst, $src1, $src2 \t// Cast ptr $src1 to long and shift" %}
 9345   size(4);
 9346   ins_encode %{
 9347     __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9348   %}
 9349   ins_pipe(pipe_class_default);
 9350 %}
 9351 
 9352 // Bitfield Extract: URShiftI + AndI
 9353 instruct andI_urShiftI_regI_immI_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immI src2, immIpow2minus1 src3) %{
 9354   match(Set dst (AndI (URShiftI src1 src2) src3));
 9355 
 9356   format %{ "EXTRDI  $dst, $src1, shift=$src2, mask=$src3 \t// int bitfield extract" %}
 9357   size(4);
 9358   ins_encode %{
 9359     int rshift = ($src2$$constant) & 0x1f;
 9360     int length = log2i_exact((juint)$src3$$constant + 1u);
 9361     if (rshift + length > 32) {
 9362       // if necessary, adjust mask to omit rotated bits.
 9363       length = 32 - rshift;
 9364     }
 9365     __ extrdi($dst$$Register, $src1$$Register, length, 64 - (rshift + length));
 9366   %}
 9367   ins_pipe(pipe_class_default);
 9368 %}
 9369 
 9370 // Bitfield Extract: URShiftL + AndL
 9371 instruct andL_urShiftL_regL_immI_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immI src2, immLpow2minus1 src3) %{
 9372   match(Set dst (AndL (URShiftL src1 src2) src3));
 9373 
 9374   format %{ "EXTRDI  $dst, $src1, shift=$src2, mask=$src3 \t// long bitfield extract" %}
 9375   size(4);
 9376   ins_encode %{
 9377     int rshift  = ($src2$$constant) & 0x3f;
 9378     int length = log2i_exact((julong)$src3$$constant + 1ull);
 9379     if (rshift + length > 64) {
 9380       // if necessary, adjust mask to omit rotated bits.
 9381       length = 64 - rshift;
 9382     }
 9383     __ extrdi($dst$$Register, $src1$$Register, length, 64 - (rshift + length));
 9384   %}
 9385   ins_pipe(pipe_class_default);
 9386 %}
 9387 
 9388 instruct sxtI_reg(iRegIdst dst, iRegIsrc src) %{
 9389   match(Set dst (ConvL2I (ConvI2L src)));
 9390 
 9391   format %{ "EXTSW   $dst, $src \t// int->int" %}
 9392   size(4);
 9393   ins_encode %{
 9394     __ extsw($dst$$Register, $src$$Register);
 9395   %}
 9396   ins_pipe(pipe_class_default);
 9397 %}
 9398 
 9399 //----------Rotate Instructions------------------------------------------------
 9400 
 9401 // Rotate Left by 8-bit immediate
 9402 instruct rotlI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 lshift, immI8 rshift) %{
 9403   match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift)));
 9404   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 9405 
 9406   format %{ "ROTLWI  $dst, $src, $lshift" %}
 9407   size(4);
 9408   ins_encode %{
 9409     __ rotlwi($dst$$Register, $src$$Register, $lshift$$constant);
 9410   %}
 9411   ins_pipe(pipe_class_default);
 9412 %}
 9413 
 9414 // Rotate Right by 8-bit immediate
 9415 instruct rotrI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 rshift, immI8 lshift) %{
 9416   match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
 9417   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 9418 
 9419   format %{ "ROTRWI  $dst, $rshift" %}
 9420   size(4);
 9421   ins_encode %{
 9422     __ rotrwi($dst$$Register, $src$$Register, $rshift$$constant);
 9423   %}
 9424   ins_pipe(pipe_class_default);
 9425 %}
 9426 
 9427 //----------Floating Point Arithmetic Instructions-----------------------------
 9428 
 9429 // Add float single precision
 9430 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
 9431   match(Set dst (AddF src1 src2));
 9432 
 9433   format %{ "FADDS   $dst, $src1, $src2" %}
 9434   size(4);
 9435   ins_encode %{
 9436     __ fadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9437   %}
 9438   ins_pipe(pipe_class_default);
 9439 %}
 9440 
 9441 // Add float double precision
 9442 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
 9443   match(Set dst (AddD src1 src2));
 9444 
 9445   format %{ "FADD    $dst, $src1, $src2" %}
 9446   size(4);
 9447   ins_encode %{
 9448     __ fadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9449   %}
 9450   ins_pipe(pipe_class_default);
 9451 %}
 9452 
 9453 // Sub float single precision
 9454 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
 9455   match(Set dst (SubF src1 src2));
 9456 
 9457   format %{ "FSUBS   $dst, $src1, $src2" %}
 9458   size(4);
 9459   ins_encode %{
 9460     __ fsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9461   %}
 9462   ins_pipe(pipe_class_default);
 9463 %}
 9464 
 9465 // Sub float double precision
 9466 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
 9467   match(Set dst (SubD src1 src2));
 9468   format %{ "FSUB    $dst, $src1, $src2" %}
 9469   size(4);
 9470   ins_encode %{
 9471     __ fsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9472   %}
 9473   ins_pipe(pipe_class_default);
 9474 %}
 9475 
 9476 // Mul float single precision
 9477 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
 9478   match(Set dst (MulF src1 src2));
 9479   format %{ "FMULS   $dst, $src1, $src2" %}
 9480   size(4);
 9481   ins_encode %{
 9482     __ fmuls($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9483   %}
 9484   ins_pipe(pipe_class_default);
 9485 %}
 9486 
 9487 // Mul float double precision
 9488 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
 9489   match(Set dst (MulD src1 src2));
 9490   format %{ "FMUL    $dst, $src1, $src2" %}
 9491   size(4);
 9492   ins_encode %{
 9493     __ fmul($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9494   %}
 9495   ins_pipe(pipe_class_default);
 9496 %}
 9497 
 9498 // Div float single precision
 9499 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
 9500   match(Set dst (DivF src1 src2));
 9501   format %{ "FDIVS   $dst, $src1, $src2" %}
 9502   size(4);
 9503   ins_encode %{
 9504     __ fdivs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9505   %}
 9506   ins_pipe(pipe_class_default);
 9507 %}
 9508 
 9509 // Div float double precision
 9510 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
 9511   match(Set dst (DivD src1 src2));
 9512   format %{ "FDIV    $dst, $src1, $src2" %}
 9513   size(4);
 9514   ins_encode %{
 9515     __ fdiv($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9516   %}
 9517   ins_pipe(pipe_class_default);
 9518 %}
 9519 
 9520 // Absolute float single precision
 9521 instruct absF_reg(regF dst, regF src) %{
 9522   match(Set dst (AbsF src));
 9523   format %{ "FABS    $dst, $src \t// float" %}
 9524   size(4);
 9525   ins_encode %{
 9526     __ fabs($dst$$FloatRegister, $src$$FloatRegister);
 9527   %}
 9528   ins_pipe(pipe_class_default);
 9529 %}
 9530 
 9531 // Absolute float double precision
 9532 instruct absD_reg(regD dst, regD src) %{
 9533   match(Set dst (AbsD src));
 9534   format %{ "FABS    $dst, $src \t// double" %}
 9535   size(4);
 9536   ins_encode %{
 9537     __ fabs($dst$$FloatRegister, $src$$FloatRegister);
 9538   %}
 9539   ins_pipe(pipe_class_default);
 9540 %}
 9541 
 9542 instruct negF_reg(regF dst, regF src) %{
 9543   match(Set dst (NegF src));
 9544   format %{ "FNEG    $dst, $src \t// float" %}
 9545   size(4);
 9546   ins_encode %{
 9547     __ fneg($dst$$FloatRegister, $src$$FloatRegister);
 9548   %}
 9549   ins_pipe(pipe_class_default);
 9550 %}
 9551 
 9552 instruct negD_reg(regD dst, regD src) %{
 9553   match(Set dst (NegD src));
 9554   format %{ "FNEG    $dst, $src \t// double" %}
 9555   size(4);
 9556   ins_encode %{
 9557     __ fneg($dst$$FloatRegister, $src$$FloatRegister);
 9558   %}
 9559   ins_pipe(pipe_class_default);
 9560 %}
 9561 
 9562 // AbsF + NegF.
 9563 instruct negF_absF_reg(regF dst, regF src) %{
 9564   match(Set dst (NegF (AbsF src)));
 9565   format %{ "FNABS   $dst, $src \t// float" %}
 9566   size(4);
 9567   ins_encode %{
 9568     __ fnabs($dst$$FloatRegister, $src$$FloatRegister);
 9569   %}
 9570   ins_pipe(pipe_class_default);
 9571 %}
 9572 
 9573 // AbsD + NegD.
 9574 instruct negD_absD_reg(regD dst, regD src) %{
 9575   match(Set dst (NegD (AbsD src)));
 9576   format %{ "FNABS   $dst, $src \t// double" %}
 9577   size(4);
 9578   ins_encode %{
 9579     __ fnabs($dst$$FloatRegister, $src$$FloatRegister);
 9580   %}
 9581   ins_pipe(pipe_class_default);
 9582 %}
 9583 
 9584 // VM_Version::has_fsqrt() decides if this node will be used.
 9585 // Sqrt float double precision
 9586 instruct sqrtD_reg(regD dst, regD src) %{
 9587   match(Set dst (SqrtD src));
 9588   format %{ "FSQRT   $dst, $src" %}
 9589   size(4);
 9590   ins_encode %{
 9591     __ fsqrt($dst$$FloatRegister, $src$$FloatRegister);
 9592   %}
 9593   ins_pipe(pipe_class_default);
 9594 %}
 9595 
 9596 // Single-precision sqrt.
 9597 instruct sqrtF_reg(regF dst, regF src) %{
 9598   match(Set dst (SqrtF src));
 9599   predicate(VM_Version::has_fsqrts());
 9600   ins_cost(DEFAULT_COST);
 9601 
 9602   format %{ "FSQRTS  $dst, $src" %}
 9603   size(4);
 9604   ins_encode %{
 9605     __ fsqrts($dst$$FloatRegister, $src$$FloatRegister);
 9606   %}
 9607   ins_pipe(pipe_class_default);
 9608 %}
 9609 
 9610 instruct roundDouble_nop(regD dst) %{
 9611   match(Set dst (RoundDouble dst));
 9612   ins_cost(0);
 9613 
 9614   format %{ " -- \t// RoundDouble not needed - empty" %}
 9615   size(0);
 9616   // PPC results are already "rounded" (i.e., normal-format IEEE).
 9617   ins_encode( /*empty*/ );
 9618   ins_pipe(pipe_class_default);
 9619 %}
 9620 
 9621 instruct roundFloat_nop(regF dst) %{
 9622   match(Set dst (RoundFloat dst));
 9623   ins_cost(0);
 9624 
 9625   format %{ " -- \t// RoundFloat not needed - empty" %}
 9626   size(0);
 9627   // PPC results are already "rounded" (i.e., normal-format IEEE).
 9628   ins_encode( /*empty*/ );
 9629   ins_pipe(pipe_class_default);
 9630 %}
 9631 
 9632 
 9633 // Multiply-Accumulate
 9634 // src1 * src2 + src3
 9635 instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9636   match(Set dst (FmaF src3 (Binary src1 src2)));
 9637 
 9638   format %{ "FMADDS  $dst, $src1, $src2, $src3" %}
 9639   size(4);
 9640   ins_encode %{
 9641     __ fmadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9642   %}
 9643   ins_pipe(pipe_class_default);
 9644 %}
 9645 
 9646 // src1 * src2 + src3
 9647 instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9648   match(Set dst (FmaD src3 (Binary src1 src2)));
 9649 
 9650   format %{ "FMADD   $dst, $src1, $src2, $src3" %}
 9651   size(4);
 9652   ins_encode %{
 9653     __ fmadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9654   %}
 9655   ins_pipe(pipe_class_default);
 9656 %}
 9657 
 9658 // -src1 * src2 + src3 = -(src1*src2-src3)
 9659 instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9660   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
 9661   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
 9662 
 9663   format %{ "FNMSUBS $dst, $src1, $src2, $src3" %}
 9664   size(4);
 9665   ins_encode %{
 9666     __ fnmsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9667   %}
 9668   ins_pipe(pipe_class_default);
 9669 %}
 9670 
 9671 // -src1 * src2 + src3 = -(src1*src2-src3)
 9672 instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9673   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
 9674   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
 9675 
 9676   format %{ "FNMSUB  $dst, $src1, $src2, $src3" %}
 9677   size(4);
 9678   ins_encode %{
 9679     __ fnmsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9680   %}
 9681   ins_pipe(pipe_class_default);
 9682 %}
 9683 
 9684 // -src1 * src2 - src3 = -(src1*src2+src3)
 9685 instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9686   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
 9687   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
 9688 
 9689   format %{ "FNMADDS $dst, $src1, $src2, $src3" %}
 9690   size(4);
 9691   ins_encode %{
 9692     __ fnmadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9693   %}
 9694   ins_pipe(pipe_class_default);
 9695 %}
 9696 
 9697 // -src1 * src2 - src3 = -(src1*src2+src3)
 9698 instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9699   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
 9700   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
 9701 
 9702   format %{ "FNMADD  $dst, $src1, $src2, $src3" %}
 9703   size(4);
 9704   ins_encode %{
 9705     __ fnmadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9706   %}
 9707   ins_pipe(pipe_class_default);
 9708 %}
 9709 
 9710 // src1 * src2 - src3
 9711 instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9712   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
 9713 
 9714   format %{ "FMSUBS  $dst, $src1, $src2, $src3" %}
 9715   size(4);
 9716   ins_encode %{
 9717     __ fmsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9718   %}
 9719   ins_pipe(pipe_class_default);
 9720 %}
 9721 
 9722 // src1 * src2 - src3
 9723 instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9724   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
 9725 
 9726   format %{ "FMSUB   $dst, $src1, $src2, $src3" %}
 9727   size(4);
 9728   ins_encode %{
 9729     __ fmsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9730   %}
 9731   ins_pipe(pipe_class_default);
 9732 %}
 9733 
 9734 
 9735 //----------Logical Instructions-----------------------------------------------
 9736 
 9737 // And Instructions
 9738 
 9739 // Register And
 9740 instruct andI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9741   match(Set dst (AndI src1 src2));
 9742   format %{ "AND     $dst, $src1, $src2" %}
 9743   size(4);
 9744   ins_encode %{
 9745     __ andr($dst$$Register, $src1$$Register, $src2$$Register);
 9746   %}
 9747   ins_pipe(pipe_class_default);
 9748 %}
 9749 
 9750 // Left shifted Immediate And
 9751 instruct andI_reg_immIhi16(iRegIdst dst, iRegIsrc src1, immIhi16  src2, flagsRegCR0 cr0) %{
 9752   match(Set dst (AndI src1 src2));
 9753   effect(KILL cr0);
 9754   format %{ "ANDIS   $dst, $src1, $src2.hi" %}
 9755   size(4);
 9756   ins_encode %{
 9757     __ andis_($dst$$Register, $src1$$Register, (int)((unsigned short)(($src2$$constant & 0xFFFF0000) >> 16)));
 9758   %}
 9759   ins_pipe(pipe_class_default);
 9760 %}
 9761 
 9762 // Immediate And
 9763 instruct andI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2, flagsRegCR0 cr0) %{
 9764   match(Set dst (AndI src1 src2));
 9765   effect(KILL cr0);
 9766 
 9767   format %{ "ANDI    $dst, $src1, $src2" %}
 9768   size(4);
 9769   ins_encode %{
 9770     // FIXME: avoid andi_ ?
 9771     __ andi_($dst$$Register, $src1$$Register, $src2$$constant);
 9772   %}
 9773   ins_pipe(pipe_class_default);
 9774 %}
 9775 
 9776 // Immediate And where the immediate is a negative power of 2.
 9777 instruct andI_reg_immInegpow2(iRegIdst dst, iRegIsrc src1, immInegpow2 src2) %{
 9778   match(Set dst (AndI src1 src2));
 9779   format %{ "ANDWI   $dst, $src1, $src2" %}
 9780   size(4);
 9781   ins_encode %{
 9782     __ clrrdi($dst$$Register, $src1$$Register, log2i_exact(-(juint)$src2$$constant));
 9783   %}
 9784   ins_pipe(pipe_class_default);
 9785 %}
 9786 
 9787 instruct andI_reg_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immIpow2minus1 src2) %{
 9788   match(Set dst (AndI src1 src2));
 9789   format %{ "ANDWI   $dst, $src1, $src2" %}
 9790   size(4);
 9791   ins_encode %{
 9792     __ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((juint)$src2$$constant + 1u));
 9793   %}
 9794   ins_pipe(pipe_class_default);
 9795 %}
 9796 
 9797 instruct andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src1, immIpowerOf2 src2) %{
 9798   match(Set dst (AndI src1 src2));
 9799   predicate(UseRotateAndMaskInstructionsPPC64);
 9800   format %{ "ANDWI   $dst, $src1, $src2" %}
 9801   size(4);
 9802   ins_encode %{
 9803     int bitpos = 31 - log2i_exact((juint)$src2$$constant);
 9804     __ rlwinm($dst$$Register, $src1$$Register, 0, bitpos, bitpos);
 9805   %}
 9806   ins_pipe(pipe_class_default);
 9807 %}
 9808 
 9809 // Register And Long
 9810 instruct andL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9811   match(Set dst (AndL src1 src2));
 9812   ins_cost(DEFAULT_COST);
 9813 
 9814   format %{ "AND     $dst, $src1, $src2 \t// long" %}
 9815   size(4);
 9816   ins_encode %{
 9817     __ andr($dst$$Register, $src1$$Register, $src2$$Register);
 9818   %}
 9819   ins_pipe(pipe_class_default);
 9820 %}
 9821 
 9822 // Immediate And long
 9823 instruct andL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2, flagsRegCR0 cr0) %{
 9824   match(Set dst (AndL src1 src2));
 9825   effect(KILL cr0);
 9826 
 9827   format %{ "ANDI    $dst, $src1, $src2 \t// long" %}
 9828   size(4);
 9829   ins_encode %{
 9830     // FIXME: avoid andi_ ?
 9831     __ andi_($dst$$Register, $src1$$Register, $src2$$constant);
 9832   %}
 9833   ins_pipe(pipe_class_default);
 9834 %}
 9835 
 9836 // Immediate And Long where the immediate is a negative power of 2.
 9837 instruct andL_reg_immLnegpow2(iRegLdst dst, iRegLsrc src1, immLnegpow2 src2) %{
 9838   match(Set dst (AndL src1 src2));
 9839   format %{ "ANDDI   $dst, $src1, $src2" %}
 9840   size(4);
 9841   ins_encode %{
 9842     __ clrrdi($dst$$Register, $src1$$Register, log2i_exact(-(julong)$src2$$constant));
 9843   %}
 9844   ins_pipe(pipe_class_default);
 9845 %}
 9846 
 9847 instruct andL_reg_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immLpow2minus1 src2) %{
 9848   match(Set dst (AndL src1 src2));
 9849   format %{ "ANDDI   $dst, $src1, $src2" %}
 9850   size(4);
 9851   ins_encode %{
 9852     __ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((julong)$src2$$constant + 1ull));
 9853   %}
 9854   ins_pipe(pipe_class_default);
 9855 %}
 9856 
 9857 // AndL + ConvL2I.
 9858 instruct convL2I_andL_reg_immLpow2minus1(iRegIdst dst, iRegLsrc src1, immLpow2minus1 src2) %{
 9859   match(Set dst (ConvL2I (AndL src1 src2)));
 9860   ins_cost(DEFAULT_COST);
 9861 
 9862   format %{ "ANDDI   $dst, $src1, $src2 \t// long + l2i" %}
 9863   size(4);
 9864   ins_encode %{
 9865     __ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((julong)$src2$$constant + 1ull));
 9866   %}
 9867   ins_pipe(pipe_class_default);
 9868 %}
 9869 
 9870 // Or Instructions
 9871 
 9872 // Register Or
 9873 instruct orI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9874   match(Set dst (OrI src1 src2));
 9875   format %{ "OR      $dst, $src1, $src2" %}
 9876   size(4);
 9877   ins_encode %{
 9878     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9879   %}
 9880   ins_pipe(pipe_class_default);
 9881 %}
 9882 
 9883 // Expand does not work with above instruct. (??)
 9884 instruct orI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9885   // no match-rule
 9886   effect(DEF dst, USE src1, USE src2);
 9887   format %{ "OR      $dst, $src1, $src2" %}
 9888   size(4);
 9889   ins_encode %{
 9890     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9891   %}
 9892   ins_pipe(pipe_class_default);
 9893 %}
 9894 
 9895 instruct tree_orI_orI_orI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
 9896   match(Set dst (OrI (OrI (OrI src1 src2) src3) src4));
 9897   ins_cost(DEFAULT_COST*3);
 9898 
 9899   expand %{
 9900     // FIXME: we should do this in the ideal world.
 9901     iRegIdst tmp1;
 9902     iRegIdst tmp2;
 9903     orI_reg_reg(tmp1, src1, src2);
 9904     orI_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg.
 9905     orI_reg_reg(dst, tmp1, tmp2);
 9906   %}
 9907 %}
 9908 
 9909 // Immediate Or
 9910 instruct orI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{
 9911   match(Set dst (OrI src1 src2));
 9912   format %{ "ORI     $dst, $src1, $src2" %}
 9913   size(4);
 9914   ins_encode %{
 9915     __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
 9916   %}
 9917   ins_pipe(pipe_class_default);
 9918 %}
 9919 
 9920 // Register Or Long
 9921 instruct orL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9922   match(Set dst (OrL src1 src2));
 9923   ins_cost(DEFAULT_COST);
 9924 
 9925   size(4);
 9926   format %{ "OR      $dst, $src1, $src2 \t// long" %}
 9927   ins_encode %{
 9928     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9929   %}
 9930   ins_pipe(pipe_class_default);
 9931 %}
 9932 
 9933 // OrL + ConvL2I.
 9934 instruct orI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9935   match(Set dst (ConvL2I (OrL src1 src2)));
 9936   ins_cost(DEFAULT_COST);
 9937 
 9938   format %{ "OR      $dst, $src1, $src2 \t// long + l2i" %}
 9939   size(4);
 9940   ins_encode %{
 9941     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9942   %}
 9943   ins_pipe(pipe_class_default);
 9944 %}
 9945 
 9946 // Immediate Or long
 9947 instruct orL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 con) %{
 9948   match(Set dst (OrL src1 con));
 9949   ins_cost(DEFAULT_COST);
 9950 
 9951   format %{ "ORI     $dst, $src1, $con \t// long" %}
 9952   size(4);
 9953   ins_encode %{
 9954     __ ori($dst$$Register, $src1$$Register, ($con$$constant) & 0xFFFF);
 9955   %}
 9956   ins_pipe(pipe_class_default);
 9957 %}
 9958 
 9959 // Xor Instructions
 9960 
 9961 // Register Xor
 9962 instruct xorI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9963   match(Set dst (XorI src1 src2));
 9964   format %{ "XOR     $dst, $src1, $src2" %}
 9965   size(4);
 9966   ins_encode %{
 9967     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
 9968   %}
 9969   ins_pipe(pipe_class_default);
 9970 %}
 9971 
 9972 // Expand does not work with above instruct. (??)
 9973 instruct xorI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9974   // no match-rule
 9975   effect(DEF dst, USE src1, USE src2);
 9976   format %{ "XOR     $dst, $src1, $src2" %}
 9977   size(4);
 9978   ins_encode %{
 9979     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
 9980   %}
 9981   ins_pipe(pipe_class_default);
 9982 %}
 9983 
 9984 instruct tree_xorI_xorI_xorI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
 9985   match(Set dst (XorI (XorI (XorI src1 src2) src3) src4));
 9986   ins_cost(DEFAULT_COST*3);
 9987 
 9988   expand %{
 9989     // FIXME: we should do this in the ideal world.
 9990     iRegIdst tmp1;
 9991     iRegIdst tmp2;
 9992     xorI_reg_reg(tmp1, src1, src2);
 9993     xorI_reg_reg_2(tmp2, src3, src4); // Adlc complains about xorI_reg_reg.
 9994     xorI_reg_reg(dst, tmp1, tmp2);
 9995   %}
 9996 %}
 9997 
 9998 // Immediate Xor
 9999 instruct xorI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{
10000   match(Set dst (XorI src1 src2));
10001   format %{ "XORI    $dst, $src1, $src2" %}
10002   size(4);
10003   ins_encode %{
10004     __ xori($dst$$Register, $src1$$Register, $src2$$constant);
10005   %}
10006   ins_pipe(pipe_class_default);
10007 %}
10008 
10009 // Register Xor Long
10010 instruct xorL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
10011   match(Set dst (XorL src1 src2));
10012   ins_cost(DEFAULT_COST);
10013 
10014   format %{ "XOR     $dst, $src1, $src2 \t// long" %}
10015   size(4);
10016   ins_encode %{
10017     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
10018   %}
10019   ins_pipe(pipe_class_default);
10020 %}
10021 
10022 // XorL + ConvL2I.
10023 instruct xorI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
10024   match(Set dst (ConvL2I (XorL src1 src2)));
10025   ins_cost(DEFAULT_COST);
10026 
10027   format %{ "XOR     $dst, $src1, $src2 \t// long + l2i" %}
10028   size(4);
10029   ins_encode %{
10030     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
10031   %}
10032   ins_pipe(pipe_class_default);
10033 %}
10034 
10035 // Immediate Xor Long
10036 instruct xorL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2) %{
10037   match(Set dst (XorL src1 src2));
10038   ins_cost(DEFAULT_COST);
10039 
10040   format %{ "XORI    $dst, $src1, $src2 \t// long" %}
10041   size(4);
10042   ins_encode %{
10043     __ xori($dst$$Register, $src1$$Register, $src2$$constant);
10044   %}
10045   ins_pipe(pipe_class_default);
10046 %}
10047 
10048 instruct notI_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{
10049   match(Set dst (XorI src1 src2));
10050   ins_cost(DEFAULT_COST);
10051 
10052   format %{ "NOT     $dst, $src1 ($src2)" %}
10053   size(4);
10054   ins_encode %{
10055     __ nor($dst$$Register, $src1$$Register, $src1$$Register);
10056   %}
10057   ins_pipe(pipe_class_default);
10058 %}
10059 
10060 instruct notL_reg(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{
10061   match(Set dst (XorL src1 src2));
10062   ins_cost(DEFAULT_COST);
10063 
10064   format %{ "NOT     $dst, $src1 ($src2) \t// long" %}
10065   size(4);
10066   ins_encode %{
10067     __ nor($dst$$Register, $src1$$Register, $src1$$Register);
10068   %}
10069   ins_pipe(pipe_class_default);
10070 %}
10071 
10072 // And-complement
10073 instruct andcI_reg_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2, iRegIsrc src3) %{
10074   match(Set dst (AndI (XorI src1 src2) src3));
10075   ins_cost(DEFAULT_COST);
10076 
10077   format %{ "ANDW    $dst, xori($src1, $src2), $src3" %}
10078   size(4);
10079   ins_encode( enc_andc(dst, src3, src1) );
10080   ins_pipe(pipe_class_default);
10081 %}
10082 
10083 // And-complement
10084 instruct andcL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
10085   // no match-rule, false predicate
10086   effect(DEF dst, USE src1, USE src2);
10087   predicate(false);
10088 
10089   format %{ "ANDC    $dst, $src1, $src2" %}
10090   size(4);
10091   ins_encode %{
10092     __ andc($dst$$Register, $src1$$Register, $src2$$Register);
10093   %}
10094   ins_pipe(pipe_class_default);
10095 %}
10096 
10097 //----------Moves between int/long and float/double----------------------------
10098 //
10099 // The following rules move values from int/long registers/stack-locations
10100 // to float/double registers/stack-locations and vice versa, without doing any
10101 // conversions. These rules are used to implement the bit-conversion methods
10102 // of java.lang.Float etc., e.g.
10103 //   int   floatToIntBits(float value)
10104 //   float intBitsToFloat(int bits)
10105 //
10106 // Notes on the implementation on ppc64:
10107 // For Power7 and earlier, the rules are limited to those which move between a
10108 // register and a stack-location, because we always have to go through memory
10109 // when moving between a float register and an integer register.
10110 // This restriction is removed in Power8 with the introduction of the mtfprd
10111 // and mffprd instructions.
10112 
10113 instruct moveL2D_reg(regD dst, iRegLsrc src) %{
10114   match(Set dst (MoveL2D src));
10115   predicate(VM_Version::has_mtfprd());
10116 
10117   format %{ "MTFPRD  $dst, $src" %}
10118   size(4);
10119   ins_encode %{
10120     __ mtfprd($dst$$FloatRegister, $src$$Register);
10121   %}
10122   ins_pipe(pipe_class_default);
10123 %}
10124 
10125 instruct moveI2D_reg(regD dst, iRegIsrc src) %{
10126   // no match-rule, false predicate
10127   effect(DEF dst, USE src);
10128   predicate(false);
10129 
10130   format %{ "MTFPRWA $dst, $src" %}
10131   size(4);
10132   ins_encode %{
10133     __ mtfprwa($dst$$FloatRegister, $src$$Register);
10134   %}
10135   ins_pipe(pipe_class_default);
10136 %}
10137 
10138 //---------- Chain stack slots between similar types --------
10139 
10140 // These are needed so that the rules below can match.
10141 
10142 // Load integer from stack slot
10143 instruct stkI_to_regI(iRegIdst dst, stackSlotI src) %{
10144   match(Set dst src);
10145   ins_cost(MEMORY_REF_COST);
10146 
10147   format %{ "LWZ     $dst, $src" %}
10148   size(4);
10149   ins_encode( enc_lwz(dst, src) );
10150   ins_pipe(pipe_class_memory);
10151 %}
10152 
10153 // Store integer to stack slot
10154 instruct regI_to_stkI(stackSlotI dst, iRegIsrc src) %{
10155   match(Set dst src);
10156   ins_cost(MEMORY_REF_COST);
10157 
10158   format %{ "STW     $src, $dst \t// stk" %}
10159   size(4);
10160   ins_encode( enc_stw(src, dst) ); // rs=rt
10161   ins_pipe(pipe_class_memory);
10162 %}
10163 
10164 // Load long from stack slot
10165 instruct stkL_to_regL(iRegLdst dst, stackSlotL src) %{
10166   match(Set dst src);
10167   ins_cost(MEMORY_REF_COST);
10168 
10169   format %{ "LD      $dst, $src \t// long" %}
10170   size(4);
10171   ins_encode( enc_ld(dst, src) );
10172   ins_pipe(pipe_class_memory);
10173 %}
10174 
10175 // Store long to stack slot
10176 instruct regL_to_stkL(stackSlotL dst, iRegLsrc src) %{
10177   match(Set dst src);
10178   ins_cost(MEMORY_REF_COST);
10179 
10180   format %{ "STD     $src, $dst \t// long" %}
10181   size(4);
10182   ins_encode( enc_std(src, dst) ); // rs=rt
10183   ins_pipe(pipe_class_memory);
10184 %}
10185 
10186 //----------Moves between int and float
10187 
10188 // Move float value from float stack-location to integer register.
10189 instruct moveF2I_stack_reg(iRegIdst dst, stackSlotF src) %{
10190   match(Set dst (MoveF2I src));
10191   ins_cost(MEMORY_REF_COST);
10192 
10193   format %{ "LWZ     $dst, $src \t// MoveF2I" %}
10194   size(4);
10195   ins_encode( enc_lwz(dst, src) );
10196   ins_pipe(pipe_class_memory);
10197 %}
10198 
10199 // Move float value from float register to integer stack-location.
10200 instruct moveF2I_reg_stack(stackSlotI dst, regF src) %{
10201   match(Set dst (MoveF2I src));
10202   ins_cost(MEMORY_REF_COST);
10203 
10204   format %{ "STFS    $src, $dst \t// MoveF2I" %}
10205   size(4);
10206   ins_encode( enc_stfs(src, dst) );
10207   ins_pipe(pipe_class_memory);
10208 %}
10209 
10210 // Move integer value from integer stack-location to float register.
10211 instruct moveI2F_stack_reg(regF dst, stackSlotI src) %{
10212   match(Set dst (MoveI2F src));
10213   ins_cost(MEMORY_REF_COST);
10214 
10215   format %{ "LFS     $dst, $src \t// MoveI2F" %}
10216   size(4);
10217   ins_encode %{
10218     int Idisp = $src$$disp + frame_slots_bias($src$$base, ra_);
10219     __ lfs($dst$$FloatRegister, Idisp, $src$$base$$Register);
10220   %}
10221   ins_pipe(pipe_class_memory);
10222 %}
10223 
10224 // Move integer value from integer register to float stack-location.
10225 instruct moveI2F_reg_stack(stackSlotF dst, iRegIsrc src) %{
10226   match(Set dst (MoveI2F src));
10227   ins_cost(MEMORY_REF_COST);
10228 
10229   format %{ "STW     $src, $dst \t// MoveI2F" %}
10230   size(4);
10231   ins_encode( enc_stw(src, dst) );
10232   ins_pipe(pipe_class_memory);
10233 %}
10234 
10235 //----------Moves between long and float
10236 
10237 instruct moveF2L_reg_stack(stackSlotL dst, regF src) %{
10238   // no match-rule, false predicate
10239   effect(DEF dst, USE src);
10240   predicate(false);
10241 
10242   format %{ "storeD  $src, $dst \t// STACK" %}
10243   size(4);
10244   ins_encode( enc_stfd(src, dst) );
10245   ins_pipe(pipe_class_default);
10246 %}
10247 
10248 //----------Moves between long and double
10249 
10250 // Move double value from double stack-location to long register.
10251 instruct moveD2L_stack_reg(iRegLdst dst, stackSlotD src) %{
10252   match(Set dst (MoveD2L src));
10253   ins_cost(MEMORY_REF_COST);
10254   size(4);
10255   format %{ "LD      $dst, $src \t// MoveD2L" %}
10256   ins_encode( enc_ld(dst, src) );
10257   ins_pipe(pipe_class_memory);
10258 %}
10259 
10260 // Move double value from double register to long stack-location.
10261 instruct moveD2L_reg_stack(stackSlotL dst, regD src) %{
10262   match(Set dst (MoveD2L src));
10263   effect(DEF dst, USE src);
10264   ins_cost(MEMORY_REF_COST);
10265 
10266   format %{ "STFD    $src, $dst \t// MoveD2L" %}
10267   size(4);
10268   ins_encode( enc_stfd(src, dst) );
10269   ins_pipe(pipe_class_memory);
10270 %}
10271 
10272 // Move long value from long stack-location to double register.
10273 instruct moveL2D_stack_reg(regD dst, stackSlotL src) %{
10274   match(Set dst (MoveL2D src));
10275   ins_cost(MEMORY_REF_COST);
10276 
10277   format %{ "LFD     $dst, $src \t// MoveL2D" %}
10278   size(4);
10279   ins_encode( enc_lfd(dst, src) );
10280   ins_pipe(pipe_class_memory);
10281 %}
10282 
10283 // Move long value from long register to double stack-location.
10284 instruct moveL2D_reg_stack(stackSlotD dst, iRegLsrc src) %{
10285   match(Set dst (MoveL2D src));
10286   ins_cost(MEMORY_REF_COST);
10287 
10288   format %{ "STD     $src, $dst \t// MoveL2D" %}
10289   size(4);
10290   ins_encode( enc_std(src, dst) );
10291   ins_pipe(pipe_class_memory);
10292 %}
10293 
10294 //----------Register Move Instructions-----------------------------------------
10295 
10296 // Replicate for Superword
10297 
10298 instruct moveReg(iRegLdst dst, iRegIsrc src) %{
10299   predicate(false);
10300   effect(DEF dst, USE src);
10301 
10302   format %{ "MR      $dst, $src \t// replicate " %}
10303   // variable size, 0 or 4.
10304   ins_encode %{
10305     __ mr_if_needed($dst$$Register, $src$$Register);
10306   %}
10307   ins_pipe(pipe_class_default);
10308 %}
10309 
10310 //----------Cast instructions (Java-level type cast)---------------------------
10311 
10312 // Cast Long to Pointer for unsafe natives.
10313 instruct castX2P(iRegPdst dst, iRegLsrc src) %{
10314   match(Set dst (CastX2P src));
10315 
10316   format %{ "MR      $dst, $src \t// Long->Ptr" %}
10317   // variable size, 0 or 4.
10318   ins_encode %{
10319     __ mr_if_needed($dst$$Register, $src$$Register);
10320   %}
10321  ins_pipe(pipe_class_default);
10322 %}
10323 
10324 // Cast Pointer to Long for unsafe natives.
10325 instruct castP2X(iRegLdst dst, iRegP_N2P src) %{
10326   match(Set dst (CastP2X src));
10327 
10328   format %{ "MR      $dst, $src \t// Ptr->Long" %}
10329   // variable size, 0 or 4.
10330   ins_encode %{
10331     __ mr_if_needed($dst$$Register, $src$$Register);
10332   %}
10333   ins_pipe(pipe_class_default);
10334 %}
10335 
10336 instruct castPP(iRegPdst dst) %{
10337   match(Set dst (CastPP dst));
10338   format %{ " -- \t// castPP of $dst" %}
10339   size(0);
10340   ins_encode( /*empty*/ );
10341   ins_pipe(pipe_class_default);
10342 %}
10343 
10344 instruct castII(iRegIdst dst) %{
10345   match(Set dst (CastII dst));
10346   format %{ " -- \t// castII of $dst" %}
10347   size(0);
10348   ins_encode( /*empty*/ );
10349   ins_pipe(pipe_class_default);
10350 %}
10351 
10352 instruct castLL(iRegLdst dst) %{
10353   match(Set dst (CastLL dst));
10354   format %{ " -- \t// castLL of $dst" %}
10355   size(0);
10356   ins_encode( /*empty*/ );
10357   ins_pipe(pipe_class_default);
10358 %}
10359 
10360 instruct castFF(regF dst) %{
10361   match(Set dst (CastFF dst));
10362   format %{ " -- \t// castFF of $dst" %}
10363   size(0);
10364   ins_encode( /*empty*/ );
10365   ins_pipe(pipe_class_default);
10366 %}
10367 
10368 instruct castDD(regD dst) %{
10369   match(Set dst (CastDD dst));
10370   format %{ " -- \t// castDD of $dst" %}
10371   size(0);
10372   ins_encode( /*empty*/ );
10373   ins_pipe(pipe_class_default);
10374 %}
10375 
10376 instruct castVV8(iRegLdst dst) %{
10377   match(Set dst (CastVV dst));
10378   format %{ " -- \t// castVV of $dst" %}
10379   size(0);
10380   ins_encode( /*empty*/ );
10381   ins_pipe(pipe_class_default);
10382 %}
10383 
10384 instruct castVV16(vecX dst) %{
10385   match(Set dst (CastVV dst));
10386   format %{ " -- \t// castVV of $dst" %}
10387   size(0);
10388   ins_encode( /*empty*/ );
10389   ins_pipe(pipe_class_default);
10390 %}
10391 
10392 instruct checkCastPP(iRegPdst dst) %{
10393   match(Set dst (CheckCastPP dst));
10394   format %{ " -- \t// checkcastPP of $dst" %}
10395   size(0);
10396   ins_encode( /*empty*/ );
10397   ins_pipe(pipe_class_default);
10398 %}
10399 
10400 //----------Convert instructions-----------------------------------------------
10401 
10402 // Convert to boolean.
10403 
10404 // int_to_bool(src) : { 1   if src != 0
10405 //                    { 0   else
10406 //
10407 // strategy:
10408 // 1) Count leading zeros of 32 bit-value src,
10409 //    this returns 32 (0b10.0000) iff src == 0 and <32 otherwise.
10410 // 2) Shift 5 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise.
10411 // 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0.
10412 
10413 // convI2Bool
10414 instruct convI2Bool_reg__cntlz_Ex(iRegIdst dst, iRegIsrc src) %{
10415   match(Set dst (Conv2B src));
10416   predicate(UseCountLeadingZerosInstructionsPPC64);
10417   ins_cost(DEFAULT_COST);
10418 
10419   expand %{
10420     immI shiftAmount %{ 0x5 %}
10421     uimmI16 mask %{ 0x1 %}
10422     iRegIdst tmp1;
10423     iRegIdst tmp2;
10424     countLeadingZerosI(tmp1, src);
10425     urShiftI_reg_imm(tmp2, tmp1, shiftAmount);
10426     xorI_reg_uimm16(dst, tmp2, mask);
10427   %}
10428 %}
10429 
10430 instruct convI2Bool_reg__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx) %{
10431   match(Set dst (Conv2B src));
10432   effect(TEMP crx);
10433   predicate(!UseCountLeadingZerosInstructionsPPC64);
10434   ins_cost(DEFAULT_COST);
10435 
10436   format %{ "CMPWI   $crx, $src, #0 \t// convI2B"
10437             "LI      $dst, #0\n\t"
10438             "BEQ     $crx, done\n\t"
10439             "LI      $dst, #1\n"
10440             "done:" %}
10441   size(16);
10442   ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x0, 0x1) );
10443   ins_pipe(pipe_class_compare);
10444 %}
10445 
10446 // ConvI2B + XorI
10447 instruct xorI_convI2Bool_reg_immIvalue1__cntlz_Ex(iRegIdst dst, iRegIsrc src, immI_1 mask) %{
10448   match(Set dst (XorI (Conv2B src) mask));
10449   predicate(UseCountLeadingZerosInstructionsPPC64);
10450   ins_cost(DEFAULT_COST);
10451 
10452   expand %{
10453     immI shiftAmount %{ 0x5 %}
10454     iRegIdst tmp1;
10455     countLeadingZerosI(tmp1, src);
10456     urShiftI_reg_imm(dst, tmp1, shiftAmount);
10457   %}
10458 %}
10459 
10460 instruct xorI_convI2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI_1 mask) %{
10461   match(Set dst (XorI (Conv2B src) mask));
10462   effect(TEMP crx);
10463   predicate(!UseCountLeadingZerosInstructionsPPC64);
10464   ins_cost(DEFAULT_COST);
10465 
10466   format %{ "CMPWI   $crx, $src, #0 \t// Xor(convI2B($src), $mask)"
10467             "LI      $dst, #1\n\t"
10468             "BEQ     $crx, done\n\t"
10469             "LI      $dst, #0\n"
10470             "done:" %}
10471   size(16);
10472   ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x1, 0x0) );
10473   ins_pipe(pipe_class_compare);
10474 %}
10475 
10476 // AndI 0b0..010..0 + ConvI2B
10477 instruct convI2Bool_andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src, immIpowerOf2 mask) %{
10478   match(Set dst (Conv2B (AndI src mask)));
10479   predicate(UseRotateAndMaskInstructionsPPC64);
10480   ins_cost(DEFAULT_COST);
10481 
10482   format %{ "RLWINM  $dst, $src, $mask \t// convI2B(AndI($src, $mask))" %}
10483   size(4);
10484   ins_encode %{
10485     __ rlwinm($dst$$Register, $src$$Register, 32 - log2i_exact((juint)($mask$$constant)), 31, 31);
10486   %}
10487   ins_pipe(pipe_class_default);
10488 %}
10489 
10490 // Convert pointer to boolean.
10491 //
10492 // ptr_to_bool(src) : { 1   if src != 0
10493 //                    { 0   else
10494 //
10495 // strategy:
10496 // 1) Count leading zeros of 64 bit-value src,
10497 //    this returns 64 (0b100.0000) iff src == 0 and <64 otherwise.
10498 // 2) Shift 6 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise.
10499 // 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0.
10500 
10501 // ConvP2B
10502 instruct convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src) %{
10503   match(Set dst (Conv2B src));
10504   predicate(UseCountLeadingZerosInstructionsPPC64);
10505   ins_cost(DEFAULT_COST);
10506 
10507   expand %{
10508     immI shiftAmount %{ 0x6 %}
10509     uimmI16 mask %{ 0x1 %}
10510     iRegIdst tmp1;
10511     iRegIdst tmp2;
10512     countLeadingZerosP(tmp1, src);
10513     urShiftI_reg_imm(tmp2, tmp1, shiftAmount);
10514     xorI_reg_uimm16(dst, tmp2, mask);
10515   %}
10516 %}
10517 
10518 instruct convP2Bool_reg__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx) %{
10519   match(Set dst (Conv2B src));
10520   effect(TEMP crx);
10521   predicate(!UseCountLeadingZerosInstructionsPPC64);
10522   ins_cost(DEFAULT_COST);
10523 
10524   format %{ "CMPDI   $crx, $src, #0 \t// convP2B"
10525             "LI      $dst, #0\n\t"
10526             "BEQ     $crx, done\n\t"
10527             "LI      $dst, #1\n"
10528             "done:" %}
10529   size(16);
10530   ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x0, 0x1) );
10531   ins_pipe(pipe_class_compare);
10532 %}
10533 
10534 // ConvP2B + XorI
10535 instruct xorI_convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src, immI_1 mask) %{
10536   match(Set dst (XorI (Conv2B src) mask));
10537   predicate(UseCountLeadingZerosInstructionsPPC64);
10538   ins_cost(DEFAULT_COST);
10539 
10540   expand %{
10541     immI shiftAmount %{ 0x6 %}
10542     iRegIdst tmp1;
10543     countLeadingZerosP(tmp1, src);
10544     urShiftI_reg_imm(dst, tmp1, shiftAmount);
10545   %}
10546 %}
10547 
10548 instruct xorI_convP2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx, immI_1 mask) %{
10549   match(Set dst (XorI (Conv2B src) mask));
10550   effect(TEMP crx);
10551   predicate(!UseCountLeadingZerosInstructionsPPC64);
10552   ins_cost(DEFAULT_COST);
10553 
10554   format %{ "CMPDI   $crx, $src, #0 \t// XorI(convP2B($src), $mask)"
10555             "LI      $dst, #1\n\t"
10556             "BEQ     $crx, done\n\t"
10557             "LI      $dst, #0\n"
10558             "done:" %}
10559   size(16);
10560   ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x1, 0x0) );
10561   ins_pipe(pipe_class_compare);
10562 %}
10563 
10564 // if src1 < src2, return -1 else return 0
10565 instruct cmpLTMask_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
10566   match(Set dst (CmpLTMask src1 src2));
10567   ins_cost(DEFAULT_COST*4);
10568 
10569   expand %{
10570     iRegLdst src1s;
10571     iRegLdst src2s;
10572     iRegLdst diff;
10573     convI2L_reg(src1s, src1); // Ensure proper sign extension.
10574     convI2L_reg(src2s, src2); // Ensure proper sign extension.
10575     subL_reg_reg(diff, src1s, src2s);
10576     // Need to consider >=33 bit result, therefore we need signmaskL.
10577     signmask64I_regL(dst, diff);
10578   %}
10579 %}
10580 
10581 instruct cmpLTMask_reg_immI0(iRegIdst dst, iRegIsrc src1, immI_0 src2) %{
10582   match(Set dst (CmpLTMask src1 src2)); // if src1 < src2, return -1 else return 0
10583   format %{ "SRAWI   $dst, $src1, $src2 \t// CmpLTMask" %}
10584   size(4);
10585   ins_encode %{
10586     __ srawi($dst$$Register, $src1$$Register, 0x1f);
10587   %}
10588   ins_pipe(pipe_class_default);
10589 %}
10590 
10591 //----------Arithmetic Conversion Instructions---------------------------------
10592 
10593 // Convert to Byte  -- nop
10594 // Convert to Short -- nop
10595 
10596 // Convert to Int
10597 
10598 instruct convB2I_reg(iRegIdst dst, iRegIsrc src, immI_24 amount) %{
10599   match(Set dst (RShiftI (LShiftI src amount) amount));
10600   format %{ "EXTSB   $dst, $src \t// byte->int" %}
10601   size(4);
10602   ins_encode %{
10603     __ extsb($dst$$Register, $src$$Register);
10604   %}
10605   ins_pipe(pipe_class_default);
10606 %}
10607 
10608 instruct extsh(iRegIdst dst, iRegIsrc src) %{
10609   effect(DEF dst, USE src);
10610 
10611   size(4);
10612   ins_encode %{
10613     __ extsh($dst$$Register, $src$$Register);
10614   %}
10615   ins_pipe(pipe_class_default);
10616 %}
10617 
10618 // LShiftI 16 + RShiftI 16 converts short to int.
10619 instruct convS2I_reg(iRegIdst dst, iRegIsrc src, immI_16 amount) %{
10620   match(Set dst (RShiftI (LShiftI src amount) amount));
10621   format %{ "EXTSH   $dst, $src \t// short->int" %}
10622   size(4);
10623   ins_encode %{
10624     __ extsh($dst$$Register, $src$$Register);
10625   %}
10626   ins_pipe(pipe_class_default);
10627 %}
10628 
10629 // ConvL2I + ConvI2L: Sign extend int in long register.
10630 instruct sxtI_L2L_reg(iRegLdst dst, iRegLsrc src) %{
10631   match(Set dst (ConvI2L (ConvL2I src)));
10632 
10633   format %{ "EXTSW   $dst, $src \t// long->long" %}
10634   size(4);
10635   ins_encode %{
10636     __ extsw($dst$$Register, $src$$Register);
10637   %}
10638   ins_pipe(pipe_class_default);
10639 %}
10640 
10641 instruct convL2I_reg(iRegIdst dst, iRegLsrc src) %{
10642   match(Set dst (ConvL2I src));
10643   format %{ "MR      $dst, $src \t// long->int" %}
10644   // variable size, 0 or 4
10645   ins_encode %{
10646     __ mr_if_needed($dst$$Register, $src$$Register);
10647   %}
10648   ins_pipe(pipe_class_default);
10649 %}
10650 
10651 instruct convD2IRaw_regD(regD dst, regD src) %{
10652   // no match-rule, false predicate
10653   effect(DEF dst, USE src);
10654   predicate(false);
10655 
10656   format %{ "FCTIWZ $dst, $src \t// convD2I, $src != NaN" %}
10657   size(4);
10658   ins_encode %{
10659     __ fctiwz($dst$$FloatRegister, $src$$FloatRegister);
10660   %}
10661   ins_pipe(pipe_class_default);
10662 %}
10663 
10664 instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsRegSrc crx, stackSlotL src) %{
10665   // no match-rule, false predicate
10666   effect(DEF dst, USE crx, USE src);
10667   predicate(false);
10668 
10669   ins_variable_size_depending_on_alignment(true);
10670 
10671   format %{ "cmovI   $crx, $dst, $src" %}
10672   // Worst case is branch + move + stop, no stop without scheduler.
10673   size(8);
10674   ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) );
10675   ins_pipe(pipe_class_default);
10676 %}
10677 
10678 instruct cmovI_bso_reg(iRegIdst dst, flagsRegSrc crx, regD src) %{
10679   // no match-rule, false predicate
10680   effect(DEF dst, USE crx, USE src);
10681   predicate(false);
10682 
10683   ins_variable_size_depending_on_alignment(true);
10684 
10685   format %{ "cmovI   $crx, $dst, $src" %}
10686   // Worst case is branch + move + stop, no stop without scheduler.
10687   size(8);
10688   ins_encode( enc_cmove_bso_reg(dst, crx, src) );
10689   ins_pipe(pipe_class_default);
10690 %}
10691 
10692 instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, stackSlotL mem) %{
10693   // no match-rule, false predicate
10694   effect(DEF dst, USE crx, USE mem);
10695   predicate(false);
10696 
10697   format %{ "CmovI   $dst, $crx, $mem \t// postalloc expanded" %}
10698   postalloc_expand %{
10699     //
10700     // replaces
10701     //
10702     //   region  dst  crx  mem
10703     //    \       |    |   /
10704     //     dst=cmovI_bso_stackSlotL_conLvalue0
10705     //
10706     // with
10707     //
10708     //   region  dst
10709     //    \       /
10710     //     dst=loadConI16(0)
10711     //      |
10712     //      ^  region  dst  crx  mem
10713     //      |   \       |    |    /
10714     //      dst=cmovI_bso_stackSlotL
10715     //
10716 
10717     // Create new nodes.
10718     MachNode *m1 = new loadConI16Node();
10719     MachNode *m2 = new cmovI_bso_stackSlotLNode();
10720 
10721     // inputs for new nodes
10722     m1->add_req(n_region);
10723     m2->add_req(n_region, n_crx, n_mem);
10724 
10725     // precedences for new nodes
10726     m2->add_prec(m1);
10727 
10728     // operands for new nodes
10729     m1->_opnds[0] = op_dst;
10730     m1->_opnds[1] = new immI16Oper(0);
10731 
10732     m2->_opnds[0] = op_dst;
10733     m2->_opnds[1] = op_crx;
10734     m2->_opnds[2] = op_mem;
10735 
10736     // registers for new nodes
10737     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10738     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10739 
10740     // Insert new nodes.
10741     nodes->push(m1);
10742     nodes->push(m2);
10743   %}
10744 %}
10745 
10746 instruct cmovI_bso_reg_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, regD src) %{
10747   // no match-rule, false predicate
10748   effect(DEF dst, USE crx, USE src);
10749   predicate(false);
10750 
10751   format %{ "CmovI   $dst, $crx, $src \t// postalloc expanded" %}
10752   postalloc_expand %{
10753     //
10754     // replaces
10755     //
10756     //   region  dst  crx  src
10757     //    \       |    |   /
10758     //     dst=cmovI_bso_reg_conLvalue0
10759     //
10760     // with
10761     //
10762     //   region  dst
10763     //    \       /
10764     //     dst=loadConI16(0)
10765     //      |
10766     //      ^  region  dst  crx  src
10767     //      |   \       |    |    /
10768     //      dst=cmovI_bso_reg
10769     //
10770 
10771     // Create new nodes.
10772     MachNode *m1 = new loadConI16Node();
10773     MachNode *m2 = new cmovI_bso_regNode();
10774 
10775     // inputs for new nodes
10776     m1->add_req(n_region);
10777     m2->add_req(n_region, n_crx, n_src);
10778 
10779     // precedences for new nodes
10780     m2->add_prec(m1);
10781 
10782     // operands for new nodes
10783     m1->_opnds[0] = op_dst;
10784     m1->_opnds[1] = new immI16Oper(0);
10785 
10786     m2->_opnds[0] = op_dst;
10787     m2->_opnds[1] = op_crx;
10788     m2->_opnds[2] = op_src;
10789 
10790     // registers for new nodes
10791     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10792     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10793 
10794     // Insert new nodes.
10795     nodes->push(m1);
10796     nodes->push(m2);
10797   %}
10798 %}
10799 
10800 // Double to Int conversion, NaN is mapped to 0.
10801 instruct convD2I_reg_ExEx(iRegIdst dst, regD src) %{
10802   match(Set dst (ConvD2I src));
10803   predicate(!VM_Version::has_mtfprd());
10804   ins_cost(DEFAULT_COST);
10805 
10806   expand %{
10807     regD tmpD;
10808     stackSlotL tmpS;
10809     flagsReg crx;
10810     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10811     convD2IRaw_regD(tmpD, src);                         // Convert float to int (speculated).
10812     moveD2L_reg_stack(tmpS, tmpD);                      // Store float to stack (speculated).
10813     cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
10814   %}
10815 %}
10816 
10817 // Double to Int conversion, NaN is mapped to 0. Special version for Power8.
10818 instruct convD2I_reg_mffprd_ExEx(iRegIdst dst, regD src) %{
10819   match(Set dst (ConvD2I src));
10820   predicate(VM_Version::has_mtfprd());
10821   ins_cost(DEFAULT_COST);
10822 
10823   expand %{
10824     regD tmpD;
10825     flagsReg crx;
10826     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10827     convD2IRaw_regD(tmpD, src);                         // Convert float to int (speculated).
10828     cmovI_bso_reg_conLvalue0_Ex(dst, crx, tmpD);        // Cmove based on NaN check.
10829   %}
10830 %}
10831 
10832 instruct convF2IRaw_regF(regF dst, regF src) %{
10833   // no match-rule, false predicate
10834   effect(DEF dst, USE src);
10835   predicate(false);
10836 
10837   format %{ "FCTIWZ $dst, $src \t// convF2I, $src != NaN" %}
10838   size(4);
10839   ins_encode %{
10840     __ fctiwz($dst$$FloatRegister, $src$$FloatRegister);
10841   %}
10842   ins_pipe(pipe_class_default);
10843 %}
10844 
10845 // Float to Int conversion, NaN is mapped to 0.
10846 instruct convF2I_regF_ExEx(iRegIdst dst, regF src) %{
10847   match(Set dst (ConvF2I src));
10848   predicate(!VM_Version::has_mtfprd());
10849   ins_cost(DEFAULT_COST);
10850 
10851   expand %{
10852     regF tmpF;
10853     stackSlotL tmpS;
10854     flagsReg crx;
10855     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10856     convF2IRaw_regF(tmpF, src);                         // Convert float to int (speculated).
10857     moveF2L_reg_stack(tmpS, tmpF);                      // Store float to stack (speculated).
10858     cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
10859   %}
10860 %}
10861 
10862 // Float to Int conversion, NaN is mapped to 0. Special version for Power8.
10863 instruct convF2I_regF_mffprd_ExEx(iRegIdst dst, regF src) %{
10864   match(Set dst (ConvF2I src));
10865   predicate(VM_Version::has_mtfprd());
10866   ins_cost(DEFAULT_COST);
10867 
10868   expand %{
10869     regF tmpF;
10870     flagsReg crx;
10871     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10872     convF2IRaw_regF(tmpF, src);                         // Convert float to int (speculated).
10873     cmovI_bso_reg_conLvalue0_Ex(dst, crx, tmpF);        // Cmove based on NaN check.
10874   %}
10875 %}
10876 
10877 // Convert to Long
10878 
10879 instruct convI2L_reg(iRegLdst dst, iRegIsrc src) %{
10880   match(Set dst (ConvI2L src));
10881   format %{ "EXTSW   $dst, $src \t// int->long" %}
10882   size(4);
10883   ins_encode %{
10884     __ extsw($dst$$Register, $src$$Register);
10885   %}
10886   ins_pipe(pipe_class_default);
10887 %}
10888 
10889 // Zero-extend: convert unsigned int to long (convUI2L).
10890 instruct zeroExtendL_regI(iRegLdst dst, iRegIsrc src, immL_32bits mask) %{
10891   match(Set dst (AndL (ConvI2L src) mask));
10892   ins_cost(DEFAULT_COST);
10893 
10894   format %{ "CLRLDI  $dst, $src, #32 \t// zero-extend int to long" %}
10895   size(4);
10896   ins_encode %{
10897     __ clrldi($dst$$Register, $src$$Register, 32);
10898   %}
10899   ins_pipe(pipe_class_default);
10900 %}
10901 
10902 // Zero-extend: convert unsigned int to long in long register.
10903 instruct zeroExtendL_regL(iRegLdst dst, iRegLsrc src, immL_32bits mask) %{
10904   match(Set dst (AndL src mask));
10905   ins_cost(DEFAULT_COST);
10906 
10907   format %{ "CLRLDI  $dst, $src, #32 \t// zero-extend int to long" %}
10908   size(4);
10909   ins_encode %{
10910     __ clrldi($dst$$Register, $src$$Register, 32);
10911   %}
10912   ins_pipe(pipe_class_default);
10913 %}
10914 
10915 instruct convF2LRaw_regF(regF dst, regF src) %{
10916   // no match-rule, false predicate
10917   effect(DEF dst, USE src);
10918   predicate(false);
10919 
10920   format %{ "FCTIDZ $dst, $src \t// convF2L, $src != NaN" %}
10921   size(4);
10922   ins_encode %{
10923     __ fctidz($dst$$FloatRegister, $src$$FloatRegister);
10924   %}
10925   ins_pipe(pipe_class_default);
10926 %}
10927 
10928 instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL src) %{
10929   // no match-rule, false predicate
10930   effect(DEF dst, USE crx, USE src);
10931   predicate(false);
10932 
10933   ins_variable_size_depending_on_alignment(true);
10934 
10935   format %{ "cmovL   $crx, $dst, $src" %}
10936   // Worst case is branch + move + stop, no stop without scheduler.
10937   size(8);
10938   ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) );
10939   ins_pipe(pipe_class_default);
10940 %}
10941 
10942 instruct cmovL_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
10943   // no match-rule, false predicate
10944   effect(DEF dst, USE crx, USE src);
10945   predicate(false);
10946 
10947   ins_variable_size_depending_on_alignment(true);
10948 
10949   format %{ "cmovL   $crx, $dst, $src" %}
10950   // Worst case is branch + move + stop, no stop without scheduler.
10951   size(8);
10952   ins_encode( enc_cmove_bso_reg(dst, crx, src) );
10953   ins_pipe(pipe_class_default);
10954 %}
10955 
10956 instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, stackSlotL mem) %{
10957   // no match-rule, false predicate
10958   effect(DEF dst, USE crx, USE mem);
10959   predicate(false);
10960 
10961   format %{ "CmovL   $dst, $crx, $mem \t// postalloc expanded" %}
10962   postalloc_expand %{
10963     //
10964     // replaces
10965     //
10966     //   region  dst  crx  mem
10967     //    \       |    |   /
10968     //     dst=cmovL_bso_stackSlotL_conLvalue0
10969     //
10970     // with
10971     //
10972     //   region  dst
10973     //    \       /
10974     //     dst=loadConL16(0)
10975     //      |
10976     //      ^  region  dst  crx  mem
10977     //      |   \       |    |    /
10978     //      dst=cmovL_bso_stackSlotL
10979     //
10980 
10981     // Create new nodes.
10982     MachNode *m1 = new loadConL16Node();
10983     MachNode *m2 = new cmovL_bso_stackSlotLNode();
10984 
10985     // inputs for new nodes
10986     m1->add_req(n_region);
10987     m2->add_req(n_region, n_crx, n_mem);
10988     m2->add_prec(m1);
10989 
10990     // operands for new nodes
10991     m1->_opnds[0] = op_dst;
10992     m1->_opnds[1] = new immL16Oper(0);
10993     m2->_opnds[0] = op_dst;
10994     m2->_opnds[1] = op_crx;
10995     m2->_opnds[2] = op_mem;
10996 
10997     // registers for new nodes
10998     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10999     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
11000 
11001     // Insert new nodes.
11002     nodes->push(m1);
11003     nodes->push(m2);
11004   %}
11005 %}
11006 
11007 instruct cmovL_bso_reg_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, regD src) %{
11008   // no match-rule, false predicate
11009   effect(DEF dst, USE crx, USE src);
11010   predicate(false);
11011 
11012   format %{ "CmovL   $dst, $crx, $src \t// postalloc expanded" %}
11013   postalloc_expand %{
11014     //
11015     // replaces
11016     //
11017     //   region  dst  crx  src
11018     //    \       |    |   /
11019     //     dst=cmovL_bso_reg_conLvalue0
11020     //
11021     // with
11022     //
11023     //   region  dst
11024     //    \       /
11025     //     dst=loadConL16(0)
11026     //      |
11027     //      ^  region  dst  crx  src
11028     //      |   \       |    |    /
11029     //      dst=cmovL_bso_reg
11030     //
11031 
11032     // Create new nodes.
11033     MachNode *m1 = new loadConL16Node();
11034     MachNode *m2 = new cmovL_bso_regNode();
11035 
11036     // inputs for new nodes
11037     m1->add_req(n_region);
11038     m2->add_req(n_region, n_crx, n_src);
11039     m2->add_prec(m1);
11040 
11041     // operands for new nodes
11042     m1->_opnds[0] = op_dst;
11043     m1->_opnds[1] = new immL16Oper(0);
11044     m2->_opnds[0] = op_dst;
11045     m2->_opnds[1] = op_crx;
11046     m2->_opnds[2] = op_src;
11047 
11048     // registers for new nodes
11049     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
11050     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
11051 
11052     // Insert new nodes.
11053     nodes->push(m1);
11054     nodes->push(m2);
11055   %}
11056 %}
11057 
11058 // Float to Long conversion, NaN is mapped to 0.
11059 instruct convF2L_reg_ExEx(iRegLdst dst, regF src) %{
11060   match(Set dst (ConvF2L src));
11061   predicate(!VM_Version::has_mtfprd());
11062   ins_cost(DEFAULT_COST);
11063 
11064   expand %{
11065     regF tmpF;
11066     stackSlotL tmpS;
11067     flagsReg crx;
11068     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
11069     convF2LRaw_regF(tmpF, src);                         // Convert float to long (speculated).
11070     moveF2L_reg_stack(tmpS, tmpF);                      // Store float to stack (speculated).
11071     cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
11072   %}
11073 %}
11074 
11075 // Float to Long conversion, NaN is mapped to 0. Special version for Power8.
11076 instruct convF2L_reg_mffprd_ExEx(iRegLdst dst, regF src) %{
11077   match(Set dst (ConvF2L src));
11078   predicate(VM_Version::has_mtfprd());
11079   ins_cost(DEFAULT_COST);
11080 
11081   expand %{
11082     regF tmpF;
11083     flagsReg crx;
11084     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
11085     convF2LRaw_regF(tmpF, src);                         // Convert float to long (speculated).
11086     cmovL_bso_reg_conLvalue0_Ex(dst, crx, tmpF);        // Cmove based on NaN check.
11087   %}
11088 %}
11089 
11090 instruct convD2LRaw_regD(regD dst, regD src) %{
11091   // no match-rule, false predicate
11092   effect(DEF dst, USE src);
11093   predicate(false);
11094 
11095   format %{ "FCTIDZ $dst, $src \t// convD2L $src != NaN" %}
11096   size(4);
11097   ins_encode %{
11098     __ fctidz($dst$$FloatRegister, $src$$FloatRegister);
11099   %}
11100   ins_pipe(pipe_class_default);
11101 %}
11102 
11103 // Double to Long conversion, NaN is mapped to 0.
11104 instruct convD2L_reg_ExEx(iRegLdst dst, regD src) %{
11105   match(Set dst (ConvD2L src));
11106   predicate(!VM_Version::has_mtfprd());
11107   ins_cost(DEFAULT_COST);
11108 
11109   expand %{
11110     regD tmpD;
11111     stackSlotL tmpS;
11112     flagsReg crx;
11113     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
11114     convD2LRaw_regD(tmpD, src);                         // Convert float to long (speculated).
11115     moveD2L_reg_stack(tmpS, tmpD);                      // Store float to stack (speculated).
11116     cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
11117   %}
11118 %}
11119 
11120 // Double to Long conversion, NaN is mapped to 0. Special version for Power8.
11121 instruct convD2L_reg_mffprd_ExEx(iRegLdst dst, regD src) %{
11122   match(Set dst (ConvD2L src));
11123   predicate(VM_Version::has_mtfprd());
11124   ins_cost(DEFAULT_COST);
11125 
11126   expand %{
11127     regD tmpD;
11128     flagsReg crx;
11129     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
11130     convD2LRaw_regD(tmpD, src);                         // Convert float to long (speculated).
11131     cmovL_bso_reg_conLvalue0_Ex(dst, crx, tmpD);        // Cmove based on NaN check.
11132   %}
11133 %}
11134 
11135 // Convert to Float
11136 
11137 // Placed here as needed in expand.
11138 instruct convL2DRaw_regD(regD dst, regD src) %{
11139   // no match-rule, false predicate
11140   effect(DEF dst, USE src);
11141   predicate(false);
11142 
11143   format %{ "FCFID $dst, $src \t// convL2D" %}
11144   size(4);
11145   ins_encode %{
11146     __ fcfid($dst$$FloatRegister, $src$$FloatRegister);
11147   %}
11148   ins_pipe(pipe_class_default);
11149 %}
11150 
11151 // Placed here as needed in expand.
11152 instruct convD2F_reg(regF dst, regD src) %{
11153   match(Set dst (ConvD2F src));
11154   format %{ "FRSP    $dst, $src \t// convD2F" %}
11155   size(4);
11156   ins_encode %{
11157     __ frsp($dst$$FloatRegister, $src$$FloatRegister);
11158   %}
11159   ins_pipe(pipe_class_default);
11160 %}
11161 
11162 // Integer to Float conversion.
11163 instruct convI2F_ireg_Ex(regF dst, iRegIsrc src) %{
11164   match(Set dst (ConvI2F src));
11165   predicate(!VM_Version::has_fcfids());
11166   ins_cost(DEFAULT_COST);
11167 
11168   expand %{
11169     iRegLdst tmpL;
11170     stackSlotL tmpS;
11171     regD tmpD;
11172     regD tmpD2;
11173     convI2L_reg(tmpL, src);              // Sign-extension int to long.
11174     regL_to_stkL(tmpS, tmpL);            // Store long to stack.
11175     moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
11176     convL2DRaw_regD(tmpD2, tmpD);        // Convert to double.
11177     convD2F_reg(dst, tmpD2);             // Convert double to float.
11178   %}
11179 %}
11180 
11181 instruct convL2FRaw_regF(regF dst, regD src) %{
11182   // no match-rule, false predicate
11183   effect(DEF dst, USE src);
11184   predicate(false);
11185 
11186   format %{ "FCFIDS $dst, $src \t// convL2F" %}
11187   size(4);
11188   ins_encode %{
11189     __ fcfids($dst$$FloatRegister, $src$$FloatRegister);
11190   %}
11191   ins_pipe(pipe_class_default);
11192 %}
11193 
11194 // Integer to Float conversion. Special version for Power7.
11195 instruct convI2F_ireg_fcfids_Ex(regF dst, iRegIsrc src) %{
11196   match(Set dst (ConvI2F src));
11197   predicate(VM_Version::has_fcfids() && !VM_Version::has_mtfprd());
11198   ins_cost(DEFAULT_COST);
11199 
11200   expand %{
11201     iRegLdst tmpL;
11202     stackSlotL tmpS;
11203     regD tmpD;
11204     convI2L_reg(tmpL, src);              // Sign-extension int to long.
11205     regL_to_stkL(tmpS, tmpL);            // Store long to stack.
11206     moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
11207     convL2FRaw_regF(dst, tmpD);          // Convert to float.
11208   %}
11209 %}
11210 
11211 // Integer to Float conversion. Special version for Power8.
11212 instruct convI2F_ireg_mtfprd_Ex(regF dst, iRegIsrc src) %{
11213   match(Set dst (ConvI2F src));
11214   predicate(VM_Version::has_fcfids() && VM_Version::has_mtfprd());
11215   ins_cost(DEFAULT_COST);
11216 
11217   expand %{
11218     regD tmpD;
11219     moveI2D_reg(tmpD, src);
11220     convL2FRaw_regF(dst, tmpD);          // Convert to float.
11221   %}
11222 %}
11223 
11224 // L2F to avoid runtime call.
11225 instruct convL2F_ireg_fcfids_Ex(regF dst, iRegLsrc src) %{
11226   match(Set dst (ConvL2F src));
11227   predicate(VM_Version::has_fcfids() && !VM_Version::has_mtfprd());
11228   ins_cost(DEFAULT_COST);
11229 
11230   expand %{
11231     stackSlotL tmpS;
11232     regD tmpD;
11233     regL_to_stkL(tmpS, src);             // Store long to stack.
11234     moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
11235     convL2FRaw_regF(dst, tmpD);          // Convert to float.
11236   %}
11237 %}
11238 
11239 // L2F to avoid runtime call.  Special version for Power8.
11240 instruct convL2F_ireg_mtfprd_Ex(regF dst, iRegLsrc src) %{
11241   match(Set dst (ConvL2F src));
11242   predicate(VM_Version::has_fcfids() && VM_Version::has_mtfprd());
11243   ins_cost(DEFAULT_COST);
11244 
11245   expand %{
11246     regD tmpD;
11247     moveL2D_reg(tmpD, src);
11248     convL2FRaw_regF(dst, tmpD);          // Convert to float.
11249   %}
11250 %}
11251 
11252 // Moved up as used in expand.
11253 //instruct convD2F_reg(regF dst, regD src) %{%}
11254 
11255 // Convert to Double
11256 
11257 // Integer to Double conversion.
11258 instruct convI2D_reg_Ex(regD dst, iRegIsrc src) %{
11259   match(Set dst (ConvI2D src));
11260   predicate(!VM_Version::has_mtfprd());
11261   ins_cost(DEFAULT_COST);
11262 
11263   expand %{
11264     iRegLdst tmpL;
11265     stackSlotL tmpS;
11266     regD tmpD;
11267     convI2L_reg(tmpL, src);              // Sign-extension int to long.
11268     regL_to_stkL(tmpS, tmpL);            // Store long to stack.
11269     moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
11270     convL2DRaw_regD(dst, tmpD);          // Convert to double.
11271   %}
11272 %}
11273 
11274 // Integer to Double conversion. Special version for Power8.
11275 instruct convI2D_reg_mtfprd_Ex(regD dst, iRegIsrc src) %{
11276   match(Set dst (ConvI2D src));
11277   predicate(VM_Version::has_mtfprd());
11278   ins_cost(DEFAULT_COST);
11279 
11280   expand %{
11281     regD tmpD;
11282     moveI2D_reg(tmpD, src);
11283     convL2DRaw_regD(dst, tmpD);          // Convert to double.
11284   %}
11285 %}
11286 
11287 // Long to Double conversion
11288 instruct convL2D_reg_Ex(regD dst, stackSlotL src) %{
11289   match(Set dst (ConvL2D src));
11290   ins_cost(DEFAULT_COST + MEMORY_REF_COST);
11291 
11292   expand %{
11293     regD tmpD;
11294     moveL2D_stack_reg(tmpD, src);
11295     convL2DRaw_regD(dst, tmpD);
11296   %}
11297 %}
11298 
11299 // Long to Double conversion. Special version for Power8.
11300 instruct convL2D_reg_mtfprd_Ex(regD dst, iRegLsrc src) %{
11301   match(Set dst (ConvL2D src));
11302   predicate(VM_Version::has_mtfprd());
11303   ins_cost(DEFAULT_COST);
11304 
11305   expand %{
11306     regD tmpD;
11307     moveL2D_reg(tmpD, src);
11308     convL2DRaw_regD(dst, tmpD);          // Convert to double.
11309   %}
11310 %}
11311 
11312 instruct convF2D_reg(regD dst, regF src) %{
11313   match(Set dst (ConvF2D src));
11314   format %{ "FMR     $dst, $src \t// float->double" %}
11315   // variable size, 0 or 4
11316   ins_encode %{
11317     __ fmr_if_needed($dst$$FloatRegister, $src$$FloatRegister);
11318   %}
11319   ins_pipe(pipe_class_default);
11320 %}
11321 
11322 //----------Control Flow Instructions------------------------------------------
11323 // Compare Instructions
11324 
11325 // Compare Integers
11326 instruct cmpI_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{
11327   match(Set crx (CmpI src1 src2));
11328   size(4);
11329   format %{ "CMPW    $crx, $src1, $src2" %}
11330   ins_encode %{
11331     __ cmpw($crx$$CondRegister, $src1$$Register, $src2$$Register);
11332   %}
11333   ins_pipe(pipe_class_compare);
11334 %}
11335 
11336 instruct cmpI_reg_imm16(flagsReg crx, iRegIsrc src1, immI16 src2) %{
11337   match(Set crx (CmpI src1 src2));
11338   format %{ "CMPWI   $crx, $src1, $src2" %}
11339   size(4);
11340   ins_encode %{
11341     __ cmpwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11342   %}
11343   ins_pipe(pipe_class_compare);
11344 %}
11345 
11346 // (src1 & src2) == 0?
11347 instruct testI_reg_imm(flagsRegCR0 cr0, iRegIsrc src1, uimmI16 src2, immI_0 zero) %{
11348   match(Set cr0 (CmpI (AndI src1 src2) zero));
11349   // r0 is killed
11350   format %{ "ANDI    R0, $src1, $src2 \t// BTST int" %}
11351   size(4);
11352   ins_encode %{
11353     __ andi_(R0, $src1$$Register, $src2$$constant);
11354   %}
11355   ins_pipe(pipe_class_compare);
11356 %}
11357 
11358 instruct cmpL_reg_reg(flagsReg crx, iRegLsrc src1, iRegLsrc src2) %{
11359   match(Set crx (CmpL src1 src2));
11360   format %{ "CMPD    $crx, $src1, $src2" %}
11361   size(4);
11362   ins_encode %{
11363     __ cmpd($crx$$CondRegister, $src1$$Register, $src2$$Register);
11364   %}
11365   ins_pipe(pipe_class_compare);
11366 %}
11367 
11368 instruct cmpL_reg_imm16(flagsReg crx, iRegLsrc src1, immL16 src2) %{
11369   match(Set crx (CmpL src1 src2));
11370   format %{ "CMPDI   $crx, $src1, $src2" %}
11371   size(4);
11372   ins_encode %{
11373     __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11374   %}
11375   ins_pipe(pipe_class_compare);
11376 %}
11377 
11378 // Added CmpUL for LoopPredicate.
11379 instruct cmpUL_reg_reg(flagsReg crx, iRegLsrc src1, iRegLsrc src2) %{
11380   match(Set crx (CmpUL src1 src2));
11381   format %{ "CMPLD   $crx, $src1, $src2" %}
11382   size(4);
11383   ins_encode %{
11384     __ cmpld($crx$$CondRegister, $src1$$Register, $src2$$Register);
11385   %}
11386   ins_pipe(pipe_class_compare);
11387 %}
11388 
11389 instruct cmpUL_reg_imm16(flagsReg crx, iRegLsrc src1, uimmL16 src2) %{
11390   match(Set crx (CmpUL src1 src2));
11391   format %{ "CMPLDI  $crx, $src1, $src2" %}
11392   size(4);
11393   ins_encode %{
11394     __ cmpldi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11395   %}
11396   ins_pipe(pipe_class_compare);
11397 %}
11398 
11399 instruct testL_reg_reg(flagsRegCR0 cr0, iRegLsrc src1, iRegLsrc src2, immL_0 zero) %{
11400   match(Set cr0 (CmpL (AndL src1 src2) zero));
11401   // r0 is killed
11402   format %{ "AND     R0, $src1, $src2 \t// BTST long" %}
11403   size(4);
11404   ins_encode %{
11405     __ and_(R0, $src1$$Register, $src2$$Register);
11406   %}
11407   ins_pipe(pipe_class_compare);
11408 %}
11409 
11410 instruct testL_reg_imm(flagsRegCR0 cr0, iRegLsrc src1, uimmL16 src2, immL_0 zero) %{
11411   match(Set cr0 (CmpL (AndL src1 src2) zero));
11412   // r0 is killed
11413   format %{ "ANDI    R0, $src1, $src2 \t// BTST long" %}
11414   size(4);
11415   ins_encode %{
11416     __ andi_(R0, $src1$$Register, $src2$$constant);
11417   %}
11418   ins_pipe(pipe_class_compare);
11419 %}
11420 
11421 // Manifest a CmpL3 result in an integer register.
11422 instruct cmpL3_reg_reg(iRegIdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
11423   match(Set dst (CmpL3 src1 src2));
11424   effect(KILL cr0);
11425   ins_cost(DEFAULT_COST * 5);
11426   size(VM_Version::has_brw() ? 16 : 20);
11427 
11428   format %{ "cmpL3_reg_reg $dst, $src1, $src2" %}
11429 
11430   ins_encode %{
11431     __ cmpd(CCR0, $src1$$Register, $src2$$Register);
11432     __ set_cmp3($dst$$Register);
11433   %}
11434   ins_pipe(pipe_class_default);
11435 %}
11436 
11437 // Implicit range checks.
11438 // A range check in the ideal world has one of the following shapes:
11439 //  - (If le (CmpU length index)), (IfTrue  throw exception)
11440 //  - (If lt (CmpU index length)), (IfFalse throw exception)
11441 //
11442 // Match range check 'If le (CmpU length index)'.
11443 instruct rangeCheck_iReg_uimm15(cmpOp cmp, iRegIsrc src_length, uimmI15 index, label labl) %{
11444   match(If cmp (CmpU src_length index));
11445   effect(USE labl);
11446   predicate(TrapBasedRangeChecks &&
11447             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le &&
11448             PROB_UNLIKELY(_leaf->as_If()->_prob) >= PROB_ALWAYS &&
11449             (Matcher::branches_to_uncommon_trap(_leaf)));
11450 
11451   ins_is_TrapBasedCheckNode(true);
11452 
11453   format %{ "TWI     $index $cmp $src_length \t// RangeCheck => trap $labl" %}
11454   size(4);
11455   ins_encode %{
11456     if ($cmp$$cmpcode == 0x1 /* less_equal */) {
11457       __ trap_range_check_le($src_length$$Register, $index$$constant);
11458     } else {
11459       // Both successors are uncommon traps, probability is 0.
11460       // Node got flipped during fixup flow.
11461       assert($cmp$$cmpcode == 0x9, "must be greater");
11462       __ trap_range_check_g($src_length$$Register, $index$$constant);
11463     }
11464   %}
11465   ins_pipe(pipe_class_trap);
11466 %}
11467 
11468 // Match range check 'If lt (CmpU index length)'.
11469 instruct rangeCheck_iReg_iReg(cmpOp cmp, iRegIsrc src_index, iRegIsrc src_length, label labl) %{
11470   match(If cmp (CmpU src_index src_length));
11471   effect(USE labl);
11472   predicate(TrapBasedRangeChecks &&
11473             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
11474             _leaf->as_If()->_prob >= PROB_ALWAYS &&
11475             (Matcher::branches_to_uncommon_trap(_leaf)));
11476 
11477   ins_is_TrapBasedCheckNode(true);
11478 
11479   format %{ "TW      $src_index $cmp $src_length \t// RangeCheck => trap $labl" %}
11480   size(4);
11481   ins_encode %{
11482     if ($cmp$$cmpcode == 0x0 /* greater_equal */) {
11483       __ trap_range_check_ge($src_index$$Register, $src_length$$Register);
11484     } else {
11485       // Both successors are uncommon traps, probability is 0.
11486       // Node got flipped during fixup flow.
11487       assert($cmp$$cmpcode == 0x8, "must be less");
11488       __ trap_range_check_l($src_index$$Register, $src_length$$Register);
11489     }
11490   %}
11491   ins_pipe(pipe_class_trap);
11492 %}
11493 
11494 // Match range check 'If lt (CmpU index length)'.
11495 instruct rangeCheck_uimm15_iReg(cmpOp cmp, iRegIsrc src_index, uimmI15 length, label labl) %{
11496   match(If cmp (CmpU src_index length));
11497   effect(USE labl);
11498   predicate(TrapBasedRangeChecks &&
11499             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
11500             _leaf->as_If()->_prob >= PROB_ALWAYS &&
11501             (Matcher::branches_to_uncommon_trap(_leaf)));
11502 
11503   ins_is_TrapBasedCheckNode(true);
11504 
11505   format %{ "TWI     $src_index $cmp $length \t// RangeCheck => trap $labl" %}
11506   size(4);
11507   ins_encode %{
11508     if ($cmp$$cmpcode == 0x0 /* greater_equal */) {
11509       __ trap_range_check_ge($src_index$$Register, $length$$constant);
11510     } else {
11511       // Both successors are uncommon traps, probability is 0.
11512       // Node got flipped during fixup flow.
11513       assert($cmp$$cmpcode == 0x8, "must be less");
11514       __ trap_range_check_l($src_index$$Register, $length$$constant);
11515     }
11516   %}
11517   ins_pipe(pipe_class_trap);
11518 %}
11519 
11520 instruct compU_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{
11521   match(Set crx (CmpU src1 src2));
11522   format %{ "CMPLW   $crx, $src1, $src2 \t// unsigned" %}
11523   size(4);
11524   ins_encode %{
11525     __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register);
11526   %}
11527   ins_pipe(pipe_class_compare);
11528 %}
11529 
11530 instruct compU_reg_uimm16(flagsReg crx, iRegIsrc src1, uimmI16 src2) %{
11531   match(Set crx (CmpU src1 src2));
11532   size(4);
11533   format %{ "CMPLWI  $crx, $src1, $src2" %}
11534   ins_encode %{
11535     __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11536   %}
11537   ins_pipe(pipe_class_compare);
11538 %}
11539 
11540 // Implicit zero checks (more implicit null checks).
11541 // No constant pool entries required.
11542 instruct zeroCheckN_iReg_imm0(cmpOp cmp, iRegNsrc value, immN_0 zero, label labl) %{
11543   match(If cmp (CmpN value zero));
11544   effect(USE labl);
11545   predicate(TrapBasedNullChecks &&
11546             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
11547             _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) &&
11548             Matcher::branches_to_uncommon_trap(_leaf));
11549   ins_cost(1);
11550 
11551   ins_is_TrapBasedCheckNode(true);
11552 
11553   format %{ "TDI     $value $cmp $zero \t// ZeroCheckN => trap $labl" %}
11554   size(4);
11555   ins_encode %{
11556     if ($cmp$$cmpcode == 0xA) {
11557       __ trap_null_check($value$$Register);
11558     } else {
11559       // Both successors are uncommon traps, probability is 0.
11560       // Node got flipped during fixup flow.
11561       assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)");
11562       __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned);
11563     }
11564   %}
11565   ins_pipe(pipe_class_trap);
11566 %}
11567 
11568 // Compare narrow oops.
11569 instruct cmpN_reg_reg(flagsReg crx, iRegNsrc src1, iRegNsrc src2) %{
11570   match(Set crx (CmpN src1 src2));
11571 
11572   size(4);
11573   ins_cost(2);
11574   format %{ "CMPLW   $crx, $src1, $src2 \t// compressed ptr" %}
11575   ins_encode %{
11576     __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register);
11577   %}
11578   ins_pipe(pipe_class_compare);
11579 %}
11580 
11581 instruct cmpN_reg_imm0(flagsReg crx, iRegNsrc src1, immN_0 src2) %{
11582   match(Set crx (CmpN src1 src2));
11583   // Make this more expensive than zeroCheckN_iReg_imm0.
11584   ins_cost(2);
11585 
11586   format %{ "CMPLWI  $crx, $src1, $src2 \t// compressed ptr" %}
11587   size(4);
11588   ins_encode %{
11589     __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11590   %}
11591   ins_pipe(pipe_class_compare);
11592 %}
11593 
11594 // Implicit zero checks (more implicit null checks).
11595 // No constant pool entries required.
11596 instruct zeroCheckP_reg_imm0(cmpOp cmp, iRegP_N2P value, immP_0 zero, label labl) %{
11597   match(If cmp (CmpP value zero));
11598   effect(USE labl);
11599   predicate(TrapBasedNullChecks &&
11600             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
11601             _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) &&
11602             Matcher::branches_to_uncommon_trap(_leaf));
11603   ins_cost(1); // Should not be cheaper than zeroCheckN.
11604 
11605   ins_is_TrapBasedCheckNode(true);
11606 
11607   format %{ "TDI     $value $cmp $zero \t// ZeroCheckP => trap $labl" %}
11608   size(4);
11609   ins_encode %{
11610     if ($cmp$$cmpcode == 0xA) {
11611       __ trap_null_check($value$$Register);
11612     } else {
11613       // Both successors are uncommon traps, probability is 0.
11614       // Node got flipped during fixup flow.
11615       assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)");
11616       __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned);
11617     }
11618   %}
11619   ins_pipe(pipe_class_trap);
11620 %}
11621 
11622 // Compare Pointers
11623 instruct cmpP_reg_reg(flagsReg crx, iRegP_N2P src1, iRegP_N2P src2) %{
11624   match(Set crx (CmpP src1 src2));
11625   format %{ "CMPLD   $crx, $src1, $src2 \t// ptr" %}
11626   size(4);
11627   ins_encode %{
11628     __ cmpld($crx$$CondRegister, $src1$$Register, $src2$$Register);
11629   %}
11630   ins_pipe(pipe_class_compare);
11631 %}
11632 
11633 instruct cmpP_reg_null(flagsReg crx, iRegP_N2P src1, immP_0or1 src2) %{
11634   match(Set crx (CmpP src1 src2));
11635   format %{ "CMPLDI   $crx, $src1, $src2 \t// ptr" %}
11636   size(4);
11637   ins_encode %{
11638     __ cmpldi($crx$$CondRegister, $src1$$Register, (int)((short)($src2$$constant & 0xFFFF)));
11639   %}
11640   ins_pipe(pipe_class_compare);
11641 %}
11642 
11643 // Used in postalloc expand.
11644 instruct cmpP_reg_imm16(flagsReg crx, iRegPsrc src1, immL16 src2) %{
11645   // This match rule prevents reordering of node before a safepoint.
11646   // This only makes sense if this instructions is used exclusively
11647   // for the expansion of EncodeP!
11648   match(Set crx (CmpP src1 src2));
11649   predicate(false);
11650 
11651   format %{ "CMPDI   $crx, $src1, $src2" %}
11652   size(4);
11653   ins_encode %{
11654     __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11655   %}
11656   ins_pipe(pipe_class_compare);
11657 %}
11658 
11659 //----------Float Compares----------------------------------------------------
11660 
11661 instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{
11662   // Needs matchrule, see cmpDUnordered.
11663   match(Set crx (CmpF src1 src2));
11664   // no match-rule, false predicate
11665   predicate(false);
11666 
11667   format %{ "cmpFUrd $crx, $src1, $src2" %}
11668   size(4);
11669   ins_encode %{
11670     __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister);
11671   %}
11672   ins_pipe(pipe_class_default);
11673 %}
11674 
11675 instruct cmov_bns_less(flagsReg crx) %{
11676   // no match-rule, false predicate
11677   effect(DEF crx);
11678   predicate(false);
11679 
11680   ins_variable_size_depending_on_alignment(true);
11681 
11682   format %{ "cmov    $crx" %}
11683   // Worst case is branch + move + stop, no stop without scheduler.
11684   size(12);
11685   ins_encode %{
11686     Label done;
11687     __ bns($crx$$CondRegister, done);        // not unordered -> keep crx
11688     __ li(R0, 0);
11689     __ cmpwi($crx$$CondRegister, R0, 1);     // unordered -> set crx to 'less'
11690     __ bind(done);
11691   %}
11692   ins_pipe(pipe_class_default);
11693 %}
11694 
11695 // Compare floating, generate condition code.
11696 instruct cmpF_reg_reg_Ex(flagsReg crx, regF src1, regF src2) %{
11697   // FIXME: should we match 'If cmp (CmpF src1 src2))' ??
11698   //
11699   // The following code sequence occurs a lot in mpegaudio:
11700   //
11701   // block BXX:
11702   // 0: instruct cmpFUnordered_reg_reg (cmpF_reg_reg-0):
11703   //    cmpFUrd CCR6, F11, F9
11704   // 4: instruct cmov_bns_less (cmpF_reg_reg-1):
11705   //    cmov CCR6
11706   // 8: instruct branchConSched:
11707   //    B_FARle CCR6, B56  P=0.500000 C=-1.000000
11708   match(Set crx (CmpF src1 src2));
11709   ins_cost(DEFAULT_COST+BRANCH_COST);
11710 
11711   format %{ "CmpF    $crx, $src1, $src2 \t// postalloc expanded" %}
11712   postalloc_expand %{
11713     //
11714     // replaces
11715     //
11716     //   region  src1  src2
11717     //    \       |     |
11718     //     crx=cmpF_reg_reg
11719     //
11720     // with
11721     //
11722     //   region  src1  src2
11723     //    \       |     |
11724     //     crx=cmpFUnordered_reg_reg
11725     //      |
11726     //      ^  region
11727     //      |   \
11728     //      crx=cmov_bns_less
11729     //
11730 
11731     // Create new nodes.
11732     MachNode *m1 = new cmpFUnordered_reg_regNode();
11733     MachNode *m2 = new cmov_bns_lessNode();
11734 
11735     // inputs for new nodes
11736     m1->add_req(n_region, n_src1, n_src2);
11737     m2->add_req(n_region);
11738     m2->add_prec(m1);
11739 
11740     // operands for new nodes
11741     m1->_opnds[0] = op_crx;
11742     m1->_opnds[1] = op_src1;
11743     m1->_opnds[2] = op_src2;
11744     m2->_opnds[0] = op_crx;
11745 
11746     // registers for new nodes
11747     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11748     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11749 
11750     // Insert new nodes.
11751     nodes->push(m1);
11752     nodes->push(m2);
11753   %}
11754 %}
11755 
11756 // Compare float, generate -1,0,1
11757 instruct cmpF3_reg_reg(iRegIdst dst, regF src1, regF src2, flagsRegCR0 cr0) %{
11758   match(Set dst (CmpF3 src1 src2));
11759   effect(KILL cr0);
11760   ins_cost(DEFAULT_COST * 6);
11761   size(VM_Version::has_brw() ? 20 : 24);
11762 
11763   format %{ "cmpF3_reg_reg $dst, $src1, $src2" %}
11764 
11765   ins_encode %{
11766     __ fcmpu(CCR0, $src1$$FloatRegister, $src2$$FloatRegister);
11767     __ set_cmpu3($dst$$Register, true); // C2 requires unordered to get treated like less
11768   %}
11769   ins_pipe(pipe_class_default);
11770 %}
11771 
11772 instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
11773   // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the
11774   // node right before the conditional move using it.
11775   // In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7,
11776   // compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle
11777   // crashed in register allocation where the flags Reg between cmpDUnoredered and a
11778   // conditional move was supposed to be spilled.
11779   match(Set crx (CmpD src1 src2));
11780   // False predicate, shall not be matched.
11781   predicate(false);
11782 
11783   format %{ "cmpFUrd $crx, $src1, $src2" %}
11784   size(4);
11785   ins_encode %{
11786     __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister);
11787   %}
11788   ins_pipe(pipe_class_default);
11789 %}
11790 
11791 instruct cmpD_reg_reg_Ex(flagsReg crx, regD src1, regD src2) %{
11792   match(Set crx (CmpD src1 src2));
11793   ins_cost(DEFAULT_COST+BRANCH_COST);
11794 
11795   format %{ "CmpD    $crx, $src1, $src2 \t// postalloc expanded" %}
11796   postalloc_expand %{
11797     //
11798     // replaces
11799     //
11800     //   region  src1  src2
11801     //    \       |     |
11802     //     crx=cmpD_reg_reg
11803     //
11804     // with
11805     //
11806     //   region  src1  src2
11807     //    \       |     |
11808     //     crx=cmpDUnordered_reg_reg
11809     //      |
11810     //      ^  region
11811     //      |   \
11812     //      crx=cmov_bns_less
11813     //
11814 
11815     // create new nodes
11816     MachNode *m1 = new cmpDUnordered_reg_regNode();
11817     MachNode *m2 = new cmov_bns_lessNode();
11818 
11819     // inputs for new nodes
11820     m1->add_req(n_region, n_src1, n_src2);
11821     m2->add_req(n_region);
11822     m2->add_prec(m1);
11823 
11824     // operands for new nodes
11825     m1->_opnds[0] = op_crx;
11826     m1->_opnds[1] = op_src1;
11827     m1->_opnds[2] = op_src2;
11828     m2->_opnds[0] = op_crx;
11829 
11830     // registers for new nodes
11831     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11832     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11833 
11834     // Insert new nodes.
11835     nodes->push(m1);
11836     nodes->push(m2);
11837   %}
11838 %}
11839 
11840 // Compare double, generate -1,0,1
11841 instruct cmpD3_reg_reg(iRegIdst dst, regD src1, regD src2, flagsRegCR0 cr0) %{
11842   match(Set dst (CmpD3 src1 src2));
11843   effect(KILL cr0);
11844   ins_cost(DEFAULT_COST * 6);
11845   size(VM_Version::has_brw() ? 20 : 24);
11846 
11847   format %{ "cmpD3_reg_reg $dst, $src1, $src2" %}
11848 
11849   ins_encode %{
11850     __ fcmpu(CCR0, $src1$$FloatRegister, $src2$$FloatRegister);
11851     __ set_cmpu3($dst$$Register, true); // C2 requires unordered to get treated like less
11852   %}
11853   ins_pipe(pipe_class_default);
11854 %}
11855 
11856 // Compare char
11857 instruct cmprb_Digit_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11858   match(Set dst (Digit src1));
11859   effect(TEMP src2, TEMP crx);
11860   ins_cost(3 * DEFAULT_COST);
11861 
11862   format %{ "LI      $src2, 0x3930\n\t"
11863             "CMPRB   $crx, 0, $src1, $src2\n\t"
11864             "SETB    $dst, $crx" %}
11865   size(12);
11866   ins_encode %{
11867     // 0x30: 0, 0x39: 9
11868     __ li($src2$$Register, 0x3930);
11869     // compare src1 with ranges 0x30 to 0x39
11870     __ cmprb($crx$$CondRegister, 0, $src1$$Register, $src2$$Register);
11871     __ setb($dst$$Register, $crx$$CondRegister);
11872   %}
11873   ins_pipe(pipe_class_default);
11874 %}
11875 
11876 instruct cmprb_LowerCase_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11877   match(Set dst (LowerCase src1));
11878   effect(TEMP src2, TEMP crx);
11879   ins_cost(12 * DEFAULT_COST);
11880 
11881   format %{ "LI      $src2, 0x7A61\n\t"
11882             "CMPRB   $crx, 0, $src1, $src2\n\t"
11883             "BGT     $crx, done\n\t"
11884             "LIS     $src2, (signed short)0xF6DF\n\t"
11885             "ORI     $src2, $src2, 0xFFF8\n\t"
11886             "CMPRB   $crx, 1, $src1, $src2\n\t"
11887             "BGT     $crx, done\n\t"
11888             "LIS     $src2, (signed short)0xAAB5\n\t"
11889             "ORI     $src2, $src2, 0xBABA\n\t"
11890             "INSRDI  $src2, $src2, 32, 0\n\t"
11891             "CMPEQB  $crx, 1, $src1, $src2\n"
11892             "done:\n\t"
11893             "SETB    $dst, $crx" %}
11894 
11895   size(48);
11896   ins_encode %{
11897     Label done;
11898     // 0x61: a, 0x7A: z
11899     __ li($src2$$Register, 0x7A61);
11900     // compare src1 with ranges 0x61 to 0x7A
11901     __ cmprb($crx$$CondRegister, 0, $src1$$Register, $src2$$Register);
11902     __ bgt($crx$$CondRegister, done);
11903 
11904     // 0xDF: sharp s, 0xFF: y with diaeresis, 0xF7 is not the lower case
11905     __ lis($src2$$Register, (signed short)0xF6DF);
11906     __ ori($src2$$Register, $src2$$Register, 0xFFF8);
11907     // compare src1 with ranges 0xDF to 0xF6 and 0xF8 to 0xFF
11908     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
11909     __ bgt($crx$$CondRegister, done);
11910 
11911     // 0xAA: feminine ordinal indicator
11912     // 0xB5: micro sign
11913     // 0xBA: masculine ordinal indicator
11914     __ lis($src2$$Register, (signed short)0xAAB5);
11915     __ ori($src2$$Register, $src2$$Register, 0xBABA);
11916     __ insrdi($src2$$Register, $src2$$Register, 32, 0);
11917     // compare src1 with 0xAA, 0xB5, and 0xBA
11918     __ cmpeqb($crx$$CondRegister, $src1$$Register, $src2$$Register);
11919 
11920     __ bind(done);
11921     __ setb($dst$$Register, $crx$$CondRegister);
11922   %}
11923   ins_pipe(pipe_class_default);
11924 %}
11925 
11926 instruct cmprb_UpperCase_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11927   match(Set dst (UpperCase src1));
11928   effect(TEMP src2, TEMP crx);
11929   ins_cost(7 * DEFAULT_COST);
11930 
11931   format %{ "LI      $src2, 0x5A41\n\t"
11932             "CMPRB   $crx, 0, $src1, $src2\n\t"
11933             "BGT     $crx, done\n\t"
11934             "LIS     $src2, (signed short)0xD6C0\n\t"
11935             "ORI     $src2, $src2, 0xDED8\n\t"
11936             "CMPRB   $crx, 1, $src1, $src2\n"
11937             "done:\n\t"
11938             "SETB    $dst, $crx" %}
11939 
11940   size(28);
11941   ins_encode %{
11942     Label done;
11943     // 0x41: A, 0x5A: Z
11944     __ li($src2$$Register, 0x5A41);
11945     // compare src1 with a range 0x41 to 0x5A
11946     __ cmprb($crx$$CondRegister, 0, $src1$$Register, $src2$$Register);
11947     __ bgt($crx$$CondRegister, done);
11948 
11949     // 0xC0: a with grave, 0xDE: thorn, 0xD7 is not the upper case
11950     __ lis($src2$$Register, (signed short)0xD6C0);
11951     __ ori($src2$$Register, $src2$$Register, 0xDED8);
11952     // compare src1 with ranges 0xC0 to 0xD6 and 0xD8 to 0xDE
11953     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
11954 
11955     __ bind(done);
11956     __ setb($dst$$Register, $crx$$CondRegister);
11957   %}
11958   ins_pipe(pipe_class_default);
11959 %}
11960 
11961 instruct cmprb_Whitespace_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11962   match(Set dst (Whitespace src1));
11963   predicate(PowerArchitecturePPC64 <= 9);
11964   effect(TEMP src2, TEMP crx);
11965   ins_cost(4 * DEFAULT_COST);
11966 
11967   format %{ "LI      $src2, 0x0D09\n\t"
11968             "ADDIS   $src2, 0x201C\n\t"
11969             "CMPRB   $crx, 1, $src1, $src2\n\t"
11970             "SETB    $dst, $crx" %}
11971   size(16);
11972   ins_encode %{
11973     // 0x09 to 0x0D, 0x1C to 0x20
11974     __ li($src2$$Register, 0x0D09);
11975     __ addis($src2$$Register, $src2$$Register, 0x0201C);
11976     // compare src with ranges 0x09 to 0x0D and 0x1C to 0x20
11977     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
11978     __ setb($dst$$Register, $crx$$CondRegister);
11979   %}
11980   ins_pipe(pipe_class_default);
11981 %}
11982 
11983 // Power 10 version, using prefixed addi to load 32-bit constant
11984 instruct cmprb_Whitespace_reg_reg_prefixed(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11985   match(Set dst (Whitespace src1));
11986   predicate(PowerArchitecturePPC64 >= 10);
11987   effect(TEMP src2, TEMP crx);
11988   ins_cost(3 * DEFAULT_COST);
11989 
11990   format %{ "PLI     $src2, 0x201C0D09\n\t"
11991             "CMPRB   $crx, 1, $src1, $src2\n\t"
11992             "SETB    $dst, $crx" %}
11993   size(16);
11994   ins_encode %{
11995     // 0x09 to 0x0D, 0x1C to 0x20
11996     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
11997     __ pli($src2$$Register, 0x201C0D09);
11998     // compare src with ranges 0x09 to 0x0D and 0x1C to 0x20
11999     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
12000     __ setb($dst$$Register, $crx$$CondRegister);
12001   %}
12002   ins_pipe(pipe_class_default);
12003   ins_alignment(2);
12004 %}
12005 
12006 //----------Branches---------------------------------------------------------
12007 // Jump
12008 
12009 // Direct Branch.
12010 instruct branch(label labl) %{
12011   match(Goto);
12012   effect(USE labl);
12013   ins_cost(BRANCH_COST);
12014 
12015   format %{ "B       $labl" %}
12016   size(4);
12017   ins_encode %{
12018      Label d;    // dummy
12019      __ bind(d);
12020      Label* p = $labl$$label;
12021      // `p' is `NULL' when this encoding class is used only to
12022      // determine the size of the encoded instruction.
12023      Label& l = (NULL == p)? d : *(p);
12024      __ b(l);
12025   %}
12026   ins_pipe(pipe_class_default);
12027 %}
12028 
12029 // Conditional Near Branch
12030 instruct branchCon(cmpOp cmp, flagsRegSrc crx, label lbl) %{
12031   // Same match rule as `branchConFar'.
12032   match(If cmp crx);
12033   effect(USE lbl);
12034   ins_cost(BRANCH_COST);
12035 
12036   // If set to 1 this indicates that the current instruction is a
12037   // short variant of a long branch. This avoids using this
12038   // instruction in first-pass matching. It will then only be used in
12039   // the `Shorten_branches' pass.
12040   ins_short_branch(1);
12041 
12042   format %{ "B$cmp     $crx, $lbl" %}
12043   size(4);
12044   ins_encode( enc_bc(crx, cmp, lbl) );
12045   ins_pipe(pipe_class_default);
12046 %}
12047 
12048 // This is for cases when the ppc64 `bc' instruction does not
12049 // reach far enough. So we emit a far branch here, which is more
12050 // expensive.
12051 //
12052 // Conditional Far Branch
12053 instruct branchConFar(cmpOp cmp, flagsRegSrc crx, label lbl) %{
12054   // Same match rule as `branchCon'.
12055   match(If cmp crx);
12056   effect(USE crx, USE lbl);
12057   // Higher cost than `branchCon'.
12058   ins_cost(5*BRANCH_COST);
12059 
12060   // This is not a short variant of a branch, but the long variant.
12061   ins_short_branch(0);
12062 
12063   format %{ "B_FAR$cmp $crx, $lbl" %}
12064   size(8);
12065   ins_encode( enc_bc_far(crx, cmp, lbl) );
12066   ins_pipe(pipe_class_default);
12067 %}
12068 
12069 instruct branchLoopEnd(cmpOp cmp, flagsRegSrc crx, label labl) %{
12070   match(CountedLoopEnd cmp crx);
12071   effect(USE labl);
12072   ins_cost(BRANCH_COST);
12073 
12074   // short variant.
12075   ins_short_branch(1);
12076 
12077   format %{ "B$cmp     $crx, $labl \t// counted loop end" %}
12078   size(4);
12079   ins_encode( enc_bc(crx, cmp, labl) );
12080   ins_pipe(pipe_class_default);
12081 %}
12082 
12083 instruct branchLoopEndFar(cmpOp cmp, flagsRegSrc crx, label labl) %{
12084   match(CountedLoopEnd cmp crx);
12085   effect(USE labl);
12086   ins_cost(BRANCH_COST);
12087 
12088   // Long variant.
12089   ins_short_branch(0);
12090 
12091   format %{ "B_FAR$cmp $crx, $labl \t// counted loop end" %}
12092   size(8);
12093   ins_encode( enc_bc_far(crx, cmp, labl) );
12094   ins_pipe(pipe_class_default);
12095 %}
12096 
12097 // ============================================================================
12098 // Java runtime operations, intrinsics and other complex operations.
12099 
12100 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
12101 // array for an instance of the superklass. Set a hidden internal cache on a
12102 // hit (cache is checked with exposed code in gen_subtype_check()). Return
12103 // not zero for a miss or zero for a hit. The encoding ALSO sets flags.
12104 //
12105 // GL TODO: Improve this.
12106 // - result should not be a TEMP
12107 // - Add match rule as on sparc avoiding additional Cmp.
12108 instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P superklass,
12109                              iRegPdst tmp_klass, iRegPdst tmp_arrayptr) %{
12110   match(Set result (PartialSubtypeCheck subklass superklass));
12111   effect(TEMP_DEF result, TEMP tmp_klass, TEMP tmp_arrayptr);
12112   ins_cost(DEFAULT_COST*10);
12113 
12114   format %{ "PartialSubtypeCheck $result = ($subklass instanceOf $superklass) tmp: $tmp_klass, $tmp_arrayptr" %}
12115   ins_encode %{
12116     __ check_klass_subtype_slow_path($subklass$$Register, $superklass$$Register, $tmp_arrayptr$$Register,
12117                                      $tmp_klass$$Register, NULL, $result$$Register);
12118   %}
12119   ins_pipe(pipe_class_default);
12120 %}
12121 
12122 // inlined locking and unlocking
12123 
12124 instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2) %{
12125   match(Set crx (FastLock oop box));
12126   effect(TEMP tmp1, TEMP tmp2);
12127   predicate(!Compile::current()->use_rtm());
12128 
12129   format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2" %}
12130   ins_encode %{
12131     __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
12132                                  $tmp1$$Register, $tmp2$$Register, /*tmp3*/ R0);
12133     // If locking was successfull, crx should indicate 'EQ'.
12134     // The compiler generates a branch to the runtime call to
12135     // _complete_monitor_locking_Java for the case where crx is 'NE'.
12136   %}
12137   ins_pipe(pipe_class_compare);
12138 %}
12139 
12140 // Separate version for TM. Use bound register for box to enable USE_KILL.
12141 instruct cmpFastLock_tm(flagsReg crx, iRegPdst oop, rarg2RegP box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
12142   match(Set crx (FastLock oop box));
12143   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL box);
12144   predicate(Compile::current()->use_rtm());
12145 
12146   format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2, $tmp3 (TM)" %}
12147   ins_encode %{
12148     __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
12149                                  $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
12150                                  _rtm_counters, _stack_rtm_counters,
12151                                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12152                                  /*RTM*/ true, ra_->C->profile_rtm());
12153     // If locking was successfull, crx should indicate 'EQ'.
12154     // The compiler generates a branch to the runtime call to
12155     // _complete_monitor_locking_Java for the case where crx is 'NE'.
12156   %}
12157   ins_pipe(pipe_class_compare);
12158 %}
12159 
12160 instruct cmpFastUnlock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
12161   match(Set crx (FastUnlock oop box));
12162   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
12163   predicate(!Compile::current()->use_rtm());
12164 
12165   format %{ "FASTUNLOCK  $oop, $box, $tmp1, $tmp2" %}
12166   ins_encode %{
12167     __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
12168                                    $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
12169                                    false);
12170     // If unlocking was successfull, crx should indicate 'EQ'.
12171     // The compiler generates a branch to the runtime call to
12172     // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
12173   %}
12174   ins_pipe(pipe_class_compare);
12175 %}
12176 
12177 instruct cmpFastUnlock_tm(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
12178   match(Set crx (FastUnlock oop box));
12179   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
12180   predicate(Compile::current()->use_rtm());
12181 
12182   format %{ "FASTUNLOCK  $oop, $box, $tmp1, $tmp2 (TM)" %}
12183   ins_encode %{
12184     __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
12185                                    $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
12186                                    /*RTM*/ true);
12187     // If unlocking was successfull, crx should indicate 'EQ'.
12188     // The compiler generates a branch to the runtime call to
12189     // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
12190   %}
12191   ins_pipe(pipe_class_compare);
12192 %}
12193 
12194 // Align address.
12195 instruct align_addr(iRegPdst dst, iRegPsrc src, immLnegpow2 mask) %{
12196   match(Set dst (CastX2P (AndL (CastP2X src) mask)));
12197 
12198   format %{ "ANDDI   $dst, $src, $mask \t// next aligned address" %}
12199   size(4);
12200   ins_encode %{
12201     __ clrrdi($dst$$Register, $src$$Register, log2i_exact(-(julong)$mask$$constant));
12202   %}
12203   ins_pipe(pipe_class_default);
12204 %}
12205 
12206 // Array size computation.
12207 instruct array_size(iRegLdst dst, iRegPsrc end, iRegPsrc start) %{
12208   match(Set dst (SubL (CastP2X end) (CastP2X start)));
12209 
12210   format %{ "SUB     $dst, $end, $start \t// array size in bytes" %}
12211   size(4);
12212   ins_encode %{
12213     __ subf($dst$$Register, $start$$Register, $end$$Register);
12214   %}
12215   ins_pipe(pipe_class_default);
12216 %}
12217 
12218 // Clear-array with constant short array length. The versions below can use dcbz with cnt > 30.
12219 instruct inlineCallClearArrayShort(immLmax30 cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{
12220   match(Set dummy (ClearArray cnt base));
12221   effect(USE_KILL base, KILL ctr);
12222   ins_cost(2 * MEMORY_REF_COST);
12223 
12224   format %{ "ClearArray $cnt, $base" %}
12225   ins_encode %{
12226     __ clear_memory_constlen($base$$Register, $cnt$$constant, R0); // kills base, R0
12227   %}
12228   ins_pipe(pipe_class_default);
12229 %}
12230 
12231 // Clear-array with constant large array length.
12232 instruct inlineCallClearArrayLarge(immL cnt, rarg2RegP base, Universe dummy, iRegLdst tmp, regCTR ctr) %{
12233   match(Set dummy (ClearArray cnt base));
12234   effect(USE_KILL base, TEMP tmp, KILL ctr);
12235   ins_cost(3 * MEMORY_REF_COST);
12236 
12237   format %{ "ClearArray $cnt, $base \t// KILL $tmp" %}
12238   ins_encode %{
12239     __ clear_memory_doubleword($base$$Register, $tmp$$Register, R0, $cnt$$constant); // kills base, R0
12240   %}
12241   ins_pipe(pipe_class_default);
12242 %}
12243 
12244 // Clear-array with dynamic array length.
12245 instruct inlineCallClearArray(rarg1RegL cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{
12246   match(Set dummy (ClearArray cnt base));
12247   effect(USE_KILL cnt, USE_KILL base, KILL ctr);
12248   ins_cost(4 * MEMORY_REF_COST);
12249 
12250   format %{ "ClearArray $cnt, $base" %}
12251   ins_encode %{
12252     __ clear_memory_doubleword($base$$Register, $cnt$$Register, R0); // kills cnt, base, R0
12253   %}
12254   ins_pipe(pipe_class_default);
12255 %}
12256 
12257 instruct string_compareL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
12258                          iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12259   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12260   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12261   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
12262   ins_cost(300);
12263   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
12264   ins_encode %{
12265     __ string_compare($str1$$Register, $str2$$Register,
12266                       $cnt1$$Register, $cnt2$$Register,
12267                       $tmp$$Register,
12268                       $result$$Register, StrIntrinsicNode::LL);
12269   %}
12270   ins_pipe(pipe_class_default);
12271 %}
12272 
12273 instruct string_compareU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
12274                          iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12275   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
12276   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12277   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
12278   ins_cost(300);
12279   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
12280   ins_encode %{
12281     __ string_compare($str1$$Register, $str2$$Register,
12282                       $cnt1$$Register, $cnt2$$Register,
12283                       $tmp$$Register,
12284                       $result$$Register, StrIntrinsicNode::UU);
12285   %}
12286   ins_pipe(pipe_class_default);
12287 %}
12288 
12289 instruct string_compareLU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
12290                           iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12291   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
12292   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12293   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
12294   ins_cost(300);
12295   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
12296   ins_encode %{
12297     __ string_compare($str1$$Register, $str2$$Register,
12298                       $cnt1$$Register, $cnt2$$Register,
12299                       $tmp$$Register,
12300                       $result$$Register, StrIntrinsicNode::LU);
12301   %}
12302   ins_pipe(pipe_class_default);
12303 %}
12304 
12305 instruct string_compareUL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
12306                           iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12307   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
12308   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12309   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
12310   ins_cost(300);
12311   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
12312   ins_encode %{
12313     __ string_compare($str2$$Register, $str1$$Register,
12314                       $cnt2$$Register, $cnt1$$Register,
12315                       $tmp$$Register,
12316                       $result$$Register, StrIntrinsicNode::UL);
12317   %}
12318   ins_pipe(pipe_class_default);
12319 %}
12320 
12321 instruct string_equalsL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt, iRegIdst result,
12322                         iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12323   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
12324   match(Set result (StrEquals (Binary str1 str2) cnt));
12325   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp, KILL ctr, KILL cr0);
12326   ins_cost(300);
12327   format %{ "String Equals byte[] $str1,$str2,$cnt -> $result \t// KILL $tmp" %}
12328   ins_encode %{
12329     __ array_equals(false, $str1$$Register, $str2$$Register,
12330                     $cnt$$Register, $tmp$$Register,
12331                     $result$$Register, true /* byte */);
12332   %}
12333   ins_pipe(pipe_class_default);
12334 %}
12335 
12336 instruct string_equalsU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt, iRegIdst result,
12337                         iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12338   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
12339   match(Set result (StrEquals (Binary str1 str2) cnt));
12340   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp, KILL ctr, KILL cr0);
12341   ins_cost(300);
12342   format %{ "String Equals char[]  $str1,$str2,$cnt -> $result \t// KILL $tmp" %}
12343   ins_encode %{
12344     __ array_equals(false, $str1$$Register, $str2$$Register,
12345                     $cnt$$Register, $tmp$$Register,
12346                     $result$$Register, false /* byte */);
12347   %}
12348   ins_pipe(pipe_class_default);
12349 %}
12350 
12351 instruct array_equalsB(rarg1RegP ary1, rarg2RegP ary2, iRegIdst result,
12352                        iRegIdst tmp1, iRegIdst tmp2, regCTR ctr, flagsRegCR0 cr0, flagsRegCR0 cr1) %{
12353   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12354   match(Set result (AryEq ary1 ary2));
12355   effect(TEMP_DEF result, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0, KILL cr1);
12356   ins_cost(300);
12357   format %{ "Array Equals $ary1,$ary2 -> $result \t// KILL $tmp1,$tmp2" %}
12358   ins_encode %{
12359     __ array_equals(true, $ary1$$Register, $ary2$$Register,
12360                     $tmp1$$Register, $tmp2$$Register,
12361                     $result$$Register, true /* byte */);
12362   %}
12363   ins_pipe(pipe_class_default);
12364 %}
12365 
12366 instruct array_equalsC(rarg1RegP ary1, rarg2RegP ary2, iRegIdst result,
12367                        iRegIdst tmp1, iRegIdst tmp2, regCTR ctr, flagsRegCR0 cr0, flagsRegCR0 cr1) %{
12368   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12369   match(Set result (AryEq ary1 ary2));
12370   effect(TEMP_DEF result, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0, KILL cr1);
12371   ins_cost(300);
12372   format %{ "Array Equals $ary1,$ary2 -> $result \t// KILL $tmp1,$tmp2" %}
12373   ins_encode %{
12374     __ array_equals(true, $ary1$$Register, $ary2$$Register,
12375                     $tmp1$$Register, $tmp2$$Register,
12376                     $result$$Register, false /* byte */);
12377   %}
12378   ins_pipe(pipe_class_default);
12379 %}
12380 
12381 instruct indexOf_imm1_char_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12382                              immP needleImm, immL offsetImm, immI_1 needlecntImm,
12383                              iRegIdst tmp1, iRegIdst tmp2,
12384                              flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12385   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
12386   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12387   // Required for EA: check if it is still a type_array.
12388   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
12389   ins_cost(150);
12390 
12391   format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
12392             "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12393 
12394   ins_encode %{
12395     immPOper *needleOper = (immPOper *)$needleImm;
12396     const TypeOopPtr *t = needleOper->type()->isa_oopptr();
12397     ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
12398     jchar chr;
12399 #ifdef VM_LITTLE_ENDIAN
12400     chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) |
12401            ((jchar)(unsigned char)needle_values->element_value(0).as_byte());
12402 #else
12403     chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
12404            ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
12405 #endif
12406     __ string_indexof_char($result$$Register,
12407                            $haystack$$Register, $haycnt$$Register,
12408                            R0, chr,
12409                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12410   %}
12411   ins_pipe(pipe_class_compare);
12412 %}
12413 
12414 instruct indexOf_imm1_char_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12415                              immP needleImm, immL offsetImm, immI_1 needlecntImm,
12416                              iRegIdst tmp1, iRegIdst tmp2,
12417                              flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12418   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
12419   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12420   // Required for EA: check if it is still a type_array.
12421   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
12422   ins_cost(150);
12423 
12424   format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
12425             "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12426 
12427   ins_encode %{
12428     immPOper *needleOper = (immPOper *)$needleImm;
12429     const TypeOopPtr *t = needleOper->type()->isa_oopptr();
12430     ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
12431     jchar chr = (jchar)needle_values->element_value(0).as_byte();
12432     __ string_indexof_char($result$$Register,
12433                            $haystack$$Register, $haycnt$$Register,
12434                            R0, chr,
12435                            $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
12436   %}
12437   ins_pipe(pipe_class_compare);
12438 %}
12439 
12440 instruct indexOf_imm1_char_UL(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12441                               immP needleImm, immL offsetImm, immI_1 needlecntImm,
12442                               iRegIdst tmp1, iRegIdst tmp2,
12443                               flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12444   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
12445   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12446   // Required for EA: check if it is still a type_array.
12447   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
12448   ins_cost(150);
12449 
12450   format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
12451             "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12452 
12453   ins_encode %{
12454     immPOper *needleOper = (immPOper *)$needleImm;
12455     const TypeOopPtr *t = needleOper->type()->isa_oopptr();
12456     ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
12457     jchar chr = (jchar)needle_values->element_value(0).as_byte();
12458     __ string_indexof_char($result$$Register,
12459                            $haystack$$Register, $haycnt$$Register,
12460                            R0, chr,
12461                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12462   %}
12463   ins_pipe(pipe_class_compare);
12464 %}
12465 
12466 instruct indexOf_imm1_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12467                         rscratch2RegP needle, immI_1 needlecntImm,
12468                         iRegIdst tmp1, iRegIdst tmp2,
12469                         flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12470   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12471   effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12472   // Required for EA: check if it is still a type_array.
12473   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU &&
12474             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12475             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12476   ins_cost(180);
12477 
12478   format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12479             " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
12480   ins_encode %{
12481     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12482     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12483     guarantee(needle_values, "sanity");
12484     jchar chr;
12485 #ifdef VM_LITTLE_ENDIAN
12486     chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) |
12487            ((jchar)(unsigned char)needle_values->element_value(0).as_byte());
12488 #else
12489     chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
12490            ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
12491 #endif
12492     __ string_indexof_char($result$$Register,
12493                            $haystack$$Register, $haycnt$$Register,
12494                            R0, chr,
12495                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12496   %}
12497   ins_pipe(pipe_class_compare);
12498 %}
12499 
12500 instruct indexOf_imm1_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12501                         rscratch2RegP needle, immI_1 needlecntImm,
12502                         iRegIdst tmp1, iRegIdst tmp2,
12503                         flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12504   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12505   effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12506   // Required for EA: check if it is still a type_array.
12507   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL &&
12508             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12509             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12510   ins_cost(180);
12511 
12512   format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12513             " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
12514   ins_encode %{
12515     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12516     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12517     guarantee(needle_values, "sanity");
12518     jchar chr = (jchar)needle_values->element_value(0).as_byte();
12519     __ string_indexof_char($result$$Register,
12520                            $haystack$$Register, $haycnt$$Register,
12521                            R0, chr,
12522                            $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
12523   %}
12524   ins_pipe(pipe_class_compare);
12525 %}
12526 
12527 instruct indexOf_imm1_UL(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12528                          rscratch2RegP needle, immI_1 needlecntImm,
12529                          iRegIdst tmp1, iRegIdst tmp2,
12530                          flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12531   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12532   effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12533   // Required for EA: check if it is still a type_array.
12534   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL &&
12535             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12536             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12537   ins_cost(180);
12538 
12539   format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12540             " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
12541   ins_encode %{
12542     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12543     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12544     guarantee(needle_values, "sanity");
12545     jchar chr = (jchar)needle_values->element_value(0).as_byte();
12546     __ string_indexof_char($result$$Register,
12547                            $haystack$$Register, $haycnt$$Register,
12548                            R0, chr,
12549                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12550   %}
12551   ins_pipe(pipe_class_compare);
12552 %}
12553 
12554 instruct indexOfChar_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12555                        iRegIsrc ch, iRegIdst tmp1, iRegIdst tmp2,
12556                        flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12557   match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
12558   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12559   predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
12560   ins_cost(180);
12561 
12562   format %{ "StringUTF16 IndexOfChar $haystack[0..$haycnt], $ch"
12563             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12564   ins_encode %{
12565     __ string_indexof_char($result$$Register,
12566                            $haystack$$Register, $haycnt$$Register,
12567                            $ch$$Register, 0 /* this is not used if the character is already in a register */,
12568                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12569   %}
12570   ins_pipe(pipe_class_compare);
12571 %}
12572 
12573 instruct indexOfChar_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12574                        iRegIsrc ch, iRegIdst tmp1, iRegIdst tmp2,
12575                        flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12576   match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
12577   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12578   predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
12579   ins_cost(180);
12580 
12581   format %{ "StringLatin1 IndexOfChar $haystack[0..$haycnt], $ch"
12582             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12583   ins_encode %{
12584     __ string_indexof_char($result$$Register,
12585                            $haystack$$Register, $haycnt$$Register,
12586                            $ch$$Register, 0 /* this is not used if the character is already in a register */,
12587                            $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
12588   %}
12589   ins_pipe(pipe_class_compare);
12590 %}
12591 
12592 instruct indexOf_imm_U(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
12593                        iRegPsrc needle, uimmI15 needlecntImm,
12594                        iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
12595                        flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12596   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12597   effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
12598          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12599   // Required for EA: check if it is still a type_array.
12600   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU &&
12601             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12602             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12603   ins_cost(250);
12604 
12605   format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12606             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
12607   ins_encode %{
12608     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12609     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12610 
12611     __ string_indexof($result$$Register,
12612                       $haystack$$Register, $haycnt$$Register,
12613                       $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
12614                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UU);
12615   %}
12616   ins_pipe(pipe_class_compare);
12617 %}
12618 
12619 instruct indexOf_imm_L(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
12620                        iRegPsrc needle, uimmI15 needlecntImm,
12621                        iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
12622                        flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12623   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12624   effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
12625          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12626   // Required for EA: check if it is still a type_array.
12627   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL &&
12628             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12629             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12630   ins_cost(250);
12631 
12632   format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12633             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
12634   ins_encode %{
12635     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12636     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12637 
12638     __ string_indexof($result$$Register,
12639                       $haystack$$Register, $haycnt$$Register,
12640                       $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
12641                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::LL);
12642   %}
12643   ins_pipe(pipe_class_compare);
12644 %}
12645 
12646 instruct indexOf_imm_UL(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
12647                         iRegPsrc needle, uimmI15 needlecntImm,
12648                         iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
12649                         flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12650   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12651   effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
12652          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12653   // Required for EA: check if it is still a type_array.
12654   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL &&
12655             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12656             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12657   ins_cost(250);
12658 
12659   format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12660             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
12661   ins_encode %{
12662     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12663     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12664 
12665     __ string_indexof($result$$Register,
12666                       $haystack$$Register, $haycnt$$Register,
12667                       $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
12668                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UL);
12669   %}
12670   ins_pipe(pipe_class_compare);
12671 %}
12672 
12673 instruct indexOf_U(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
12674                    iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
12675                    flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12676   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
12677   effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
12678          TEMP_DEF result,
12679          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12680   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
12681   ins_cost(300);
12682 
12683   format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
12684              " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
12685   ins_encode %{
12686     __ string_indexof($result$$Register,
12687                       $haystack$$Register, $haycnt$$Register,
12688                       $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
12689                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UU);
12690   %}
12691   ins_pipe(pipe_class_compare);
12692 %}
12693 
12694 instruct indexOf_L(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
12695                    iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
12696                    flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12697   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
12698   effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
12699          TEMP_DEF result,
12700          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12701   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
12702   ins_cost(300);
12703 
12704   format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
12705              " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
12706   ins_encode %{
12707     __ string_indexof($result$$Register,
12708                       $haystack$$Register, $haycnt$$Register,
12709                       $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
12710                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::LL);
12711   %}
12712   ins_pipe(pipe_class_compare);
12713 %}
12714 
12715 instruct indexOf_UL(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
12716                     iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
12717                     flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12718   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
12719   effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
12720          TEMP_DEF result,
12721          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12722   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
12723   ins_cost(300);
12724 
12725   format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
12726              " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
12727   ins_encode %{
12728     __ string_indexof($result$$Register,
12729                       $haystack$$Register, $haycnt$$Register,
12730                       $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
12731                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UL);
12732   %}
12733   ins_pipe(pipe_class_compare);
12734 %}
12735 
12736 // char[] to byte[] compression
12737 instruct string_compress(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1,
12738                          iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
12739   match(Set result (StrCompressedCopy src (Binary dst len)));
12740   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
12741          USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
12742   ins_cost(300);
12743   format %{ "String Compress $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
12744   ins_encode %{
12745     Label Lskip, Ldone;
12746     __ li($result$$Register, 0);
12747     __ string_compress_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
12748                           $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, Ldone);
12749     __ rldicl_($tmp1$$Register, $len$$Register, 0, 64-3); // Remaining characters.
12750     __ beq(CCR0, Lskip);
12751     __ string_compress($src$$Register, $dst$$Register, $tmp1$$Register, $tmp2$$Register, Ldone);
12752     __ bind(Lskip);
12753     __ mr($result$$Register, $len$$Register);
12754     __ bind(Ldone);
12755   %}
12756   ins_pipe(pipe_class_default);
12757 %}
12758 
12759 // byte[] to char[] inflation
12760 instruct string_inflate(Universe dummy, rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegLdst tmp1,
12761                         iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
12762   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12763   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
12764   ins_cost(300);
12765   format %{ "String Inflate $src,$dst,$len \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
12766   ins_encode %{
12767     Label Ldone;
12768     __ string_inflate_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
12769                          $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register);
12770     __ rldicl_($tmp1$$Register, $len$$Register, 0, 64-3); // Remaining characters.
12771     __ beq(CCR0, Ldone);
12772     __ string_inflate($src$$Register, $dst$$Register, $tmp1$$Register, $tmp2$$Register);
12773     __ bind(Ldone);
12774   %}
12775   ins_pipe(pipe_class_default);
12776 %}
12777 
12778 // StringCoding.java intrinsics
12779 instruct has_negatives(rarg1RegP ary1, iRegIsrc len, iRegIdst result, iRegLdst tmp1, iRegLdst tmp2,
12780                        regCTR ctr, flagsRegCR0 cr0)
12781 %{
12782   match(Set result (HasNegatives ary1 len));
12783   effect(TEMP_DEF result, USE_KILL ary1, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0);
12784   ins_cost(300);
12785   format %{ "has negatives byte[] $ary1,$len -> $result \t// KILL $tmp1, $tmp2" %}
12786   ins_encode %{
12787     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register,
12788                      $tmp1$$Register, $tmp2$$Register);
12789   %}
12790   ins_pipe(pipe_class_default);
12791 %}
12792 
12793 // encode char[] to byte[] in ISO_8859_1
12794 instruct encode_iso_array(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1,
12795                           iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
12796   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12797   match(Set result (EncodeISOArray src (Binary dst len)));
12798   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
12799          USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
12800   ins_cost(300);
12801   format %{ "Encode array $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
12802   ins_encode %{
12803     Label Lslow, Lfailure1, Lfailure2, Ldone;
12804     __ string_compress_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
12805                           $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, Lfailure1);
12806     __ rldicl_($result$$Register, $len$$Register, 0, 64-3); // Remaining characters.
12807     __ beq(CCR0, Ldone);
12808     __ bind(Lslow);
12809     __ string_compress($src$$Register, $dst$$Register, $result$$Register, $tmp2$$Register, Lfailure2);
12810     __ li($result$$Register, 0);
12811     __ b(Ldone);
12812 
12813     __ bind(Lfailure1);
12814     __ mr($result$$Register, $len$$Register);
12815     __ mfctr($tmp1$$Register);
12816     __ rldimi_($result$$Register, $tmp1$$Register, 3, 0); // Remaining characters.
12817     __ beq(CCR0, Ldone);
12818     __ b(Lslow);
12819 
12820     __ bind(Lfailure2);
12821     __ mfctr($result$$Register); // Remaining characters.
12822 
12823     __ bind(Ldone);
12824     __ subf($result$$Register, $result$$Register, $len$$Register);
12825   %}
12826   ins_pipe(pipe_class_default);
12827 %}
12828 
12829 
12830 //---------- Min/Max Instructions ---------------------------------------------
12831 
12832 instruct minI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
12833   match(Set dst (MinI src1 src2));
12834   ins_cost(DEFAULT_COST*6);
12835 
12836   expand %{
12837     iRegLdst src1s;
12838     iRegLdst src2s;
12839     iRegLdst diff;
12840     iRegLdst sm;
12841     iRegLdst doz; // difference or zero
12842     convI2L_reg(src1s, src1); // Ensure proper sign extension.
12843     convI2L_reg(src2s, src2); // Ensure proper sign extension.
12844     subL_reg_reg(diff, src2s, src1s);
12845     // Need to consider >=33 bit result, therefore we need signmaskL.
12846     signmask64L_regL(sm, diff);
12847     andL_reg_reg(doz, diff, sm); // <=0
12848     addI_regL_regL(dst, doz, src1s);
12849   %}
12850 %}
12851 
12852 instruct minI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
12853   match(Set dst (MinI src1 src2));
12854   effect(KILL cr0);
12855   predicate(VM_Version::has_isel());
12856   ins_cost(DEFAULT_COST*2);
12857 
12858   ins_encode %{
12859     __ cmpw(CCR0, $src1$$Register, $src2$$Register);
12860     __ isel($dst$$Register, CCR0, Assembler::less, /*invert*/false, $src1$$Register, $src2$$Register);
12861   %}
12862   ins_pipe(pipe_class_default);
12863 %}
12864 
12865 instruct maxI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
12866   match(Set dst (MaxI src1 src2));
12867   ins_cost(DEFAULT_COST*6);
12868 
12869   expand %{
12870     iRegLdst src1s;
12871     iRegLdst src2s;
12872     iRegLdst diff;
12873     iRegLdst sm;
12874     iRegLdst doz; // difference or zero
12875     convI2L_reg(src1s, src1); // Ensure proper sign extension.
12876     convI2L_reg(src2s, src2); // Ensure proper sign extension.
12877     subL_reg_reg(diff, src2s, src1s);
12878     // Need to consider >=33 bit result, therefore we need signmaskL.
12879     signmask64L_regL(sm, diff);
12880     andcL_reg_reg(doz, diff, sm); // >=0
12881     addI_regL_regL(dst, doz, src1s);
12882   %}
12883 %}
12884 
12885 instruct maxI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
12886   match(Set dst (MaxI src1 src2));
12887   effect(KILL cr0);
12888   predicate(VM_Version::has_isel());
12889   ins_cost(DEFAULT_COST*2);
12890 
12891   ins_encode %{
12892     __ cmpw(CCR0, $src1$$Register, $src2$$Register);
12893     __ isel($dst$$Register, CCR0, Assembler::greater, /*invert*/false, $src1$$Register, $src2$$Register);
12894   %}
12895   ins_pipe(pipe_class_default);
12896 %}
12897 
12898 //---------- Population Count Instructions ------------------------------------
12899 
12900 // Popcnt for Power7.
12901 instruct popCountI(iRegIdst dst, iRegIsrc src) %{
12902   match(Set dst (PopCountI src));
12903   predicate(UsePopCountInstruction && VM_Version::has_popcntw());
12904   ins_cost(DEFAULT_COST);
12905 
12906   format %{ "POPCNTW $dst, $src" %}
12907   size(4);
12908   ins_encode %{
12909     __ popcntw($dst$$Register, $src$$Register);
12910   %}
12911   ins_pipe(pipe_class_default);
12912 %}
12913 
12914 // Popcnt for Power7.
12915 instruct popCountL(iRegIdst dst, iRegLsrc src) %{
12916   predicate(UsePopCountInstruction && VM_Version::has_popcntw());
12917   match(Set dst (PopCountL src));
12918   ins_cost(DEFAULT_COST);
12919 
12920   format %{ "POPCNTD $dst, $src" %}
12921   size(4);
12922   ins_encode %{
12923     __ popcntd($dst$$Register, $src$$Register);
12924   %}
12925   ins_pipe(pipe_class_default);
12926 %}
12927 
12928 instruct countLeadingZerosI(iRegIdst dst, iRegIsrc src) %{
12929   match(Set dst (CountLeadingZerosI src));
12930   predicate(UseCountLeadingZerosInstructionsPPC64);  // See Matcher::match_rule_supported.
12931   ins_cost(DEFAULT_COST);
12932 
12933   format %{ "CNTLZW  $dst, $src" %}
12934   size(4);
12935   ins_encode %{
12936     __ cntlzw($dst$$Register, $src$$Register);
12937   %}
12938   ins_pipe(pipe_class_default);
12939 %}
12940 
12941 instruct countLeadingZerosL(iRegIdst dst, iRegLsrc src) %{
12942   match(Set dst (CountLeadingZerosL src));
12943   predicate(UseCountLeadingZerosInstructionsPPC64);  // See Matcher::match_rule_supported.
12944   ins_cost(DEFAULT_COST);
12945 
12946   format %{ "CNTLZD  $dst, $src" %}
12947   size(4);
12948   ins_encode %{
12949     __ cntlzd($dst$$Register, $src$$Register);
12950   %}
12951   ins_pipe(pipe_class_default);
12952 %}
12953 
12954 instruct countLeadingZerosP(iRegIdst dst, iRegPsrc src) %{
12955   // no match-rule, false predicate
12956   effect(DEF dst, USE src);
12957   predicate(false);
12958 
12959   format %{ "CNTLZD  $dst, $src" %}
12960   size(4);
12961   ins_encode %{
12962     __ cntlzd($dst$$Register, $src$$Register);
12963   %}
12964   ins_pipe(pipe_class_default);
12965 %}
12966 
12967 instruct countTrailingZerosI_Ex(iRegIdst dst, iRegIsrc src) %{
12968   match(Set dst (CountTrailingZerosI src));
12969   predicate(UseCountLeadingZerosInstructionsPPC64 && !UseCountTrailingZerosInstructionsPPC64);
12970   ins_cost(DEFAULT_COST);
12971 
12972   expand %{
12973     immI16 imm1 %{ (int)-1 %}
12974     immI16 imm2 %{ (int)32 %}
12975     immI_minus1 m1 %{ -1 %}
12976     iRegIdst tmpI1;
12977     iRegIdst tmpI2;
12978     iRegIdst tmpI3;
12979     addI_reg_imm16(tmpI1, src, imm1);
12980     andcI_reg_reg(tmpI2, src, m1, tmpI1);
12981     countLeadingZerosI(tmpI3, tmpI2);
12982     subI_imm16_reg(dst, imm2, tmpI3);
12983   %}
12984 %}
12985 
12986 instruct countTrailingZerosI_cnttzw(iRegIdst dst, iRegIsrc src) %{
12987   match(Set dst (CountTrailingZerosI src));
12988   predicate(UseCountTrailingZerosInstructionsPPC64);
12989   ins_cost(DEFAULT_COST);
12990 
12991   format %{ "CNTTZW  $dst, $src" %}
12992   size(4);
12993   ins_encode %{
12994     __ cnttzw($dst$$Register, $src$$Register);
12995   %}
12996   ins_pipe(pipe_class_default);
12997 %}
12998 
12999 instruct countTrailingZerosL_Ex(iRegIdst dst, iRegLsrc src) %{
13000   match(Set dst (CountTrailingZerosL src));
13001   predicate(UseCountLeadingZerosInstructionsPPC64 && !UseCountTrailingZerosInstructionsPPC64);
13002   ins_cost(DEFAULT_COST);
13003 
13004   expand %{
13005     immL16 imm1 %{ (long)-1 %}
13006     immI16 imm2 %{ (int)64 %}
13007     iRegLdst tmpL1;
13008     iRegLdst tmpL2;
13009     iRegIdst tmpL3;
13010     addL_reg_imm16(tmpL1, src, imm1);
13011     andcL_reg_reg(tmpL2, tmpL1, src);
13012     countLeadingZerosL(tmpL3, tmpL2);
13013     subI_imm16_reg(dst, imm2, tmpL3);
13014  %}
13015 %}
13016 
13017 instruct countTrailingZerosL_cnttzd(iRegIdst dst, iRegLsrc src) %{
13018   match(Set dst (CountTrailingZerosL src));
13019   predicate(UseCountTrailingZerosInstructionsPPC64);
13020   ins_cost(DEFAULT_COST);
13021 
13022   format %{ "CNTTZD  $dst, $src" %}
13023   size(4);
13024   ins_encode %{
13025     __ cnttzd($dst$$Register, $src$$Register);
13026   %}
13027   ins_pipe(pipe_class_default);
13028 %}
13029 
13030 // Expand nodes for byte_reverse_int.
13031 instruct insrwi_a(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
13032   effect(DEF dst, USE src, USE pos, USE shift);
13033   predicate(false);
13034 
13035   format %{ "INSRWI  $dst, $src, $pos, $shift" %}
13036   size(4);
13037   ins_encode %{
13038     __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant);
13039   %}
13040   ins_pipe(pipe_class_default);
13041 %}
13042 
13043 // As insrwi_a, but with USE_DEF.
13044 instruct insrwi(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
13045   effect(USE_DEF dst, USE src, USE pos, USE shift);
13046   predicate(false);
13047 
13048   format %{ "INSRWI  $dst, $src, $pos, $shift" %}
13049   size(4);
13050   ins_encode %{
13051     __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant);
13052   %}
13053   ins_pipe(pipe_class_default);
13054 %}
13055 
13056 // Just slightly faster than java implementation.
13057 instruct bytes_reverse_int_Ex(iRegIdst dst, iRegIsrc src) %{
13058   match(Set dst (ReverseBytesI src));
13059   predicate(!UseByteReverseInstructions);
13060   ins_cost(7*DEFAULT_COST);
13061 
13062   expand %{
13063     immI16 imm24 %{ (int) 24 %}
13064     immI16 imm16 %{ (int) 16 %}
13065     immI16  imm8 %{ (int)  8 %}
13066     immI16  imm4 %{ (int)  4 %}
13067     immI16  imm0 %{ (int)  0 %}
13068     iRegLdst tmpI1;
13069     iRegLdst tmpI2;
13070     iRegLdst tmpI3;
13071 
13072     urShiftI_reg_imm(tmpI1, src, imm24);
13073     insrwi_a(dst, tmpI1, imm24, imm8);
13074     urShiftI_reg_imm(tmpI2, src, imm16);
13075     insrwi(dst, tmpI2, imm8, imm16);
13076     urShiftI_reg_imm(tmpI3, src, imm8);
13077     insrwi(dst, tmpI3, imm8, imm8);
13078     insrwi(dst, src, imm0, imm8);
13079   %}
13080 %}
13081 
13082 instruct bytes_reverse_int_vec(iRegIdst dst, iRegIsrc src, vecX tmpV) %{
13083   match(Set dst (ReverseBytesI src));
13084   predicate(UseVectorByteReverseInstructionsPPC64);
13085   effect(TEMP tmpV);
13086   ins_cost(DEFAULT_COST*3);
13087   size(12);
13088   format %{ "MTVSRWZ $tmpV, $src\n"
13089             "\tXXBRW   $tmpV, $tmpV\n"
13090             "\tMFVSRWZ $dst, $tmpV" %}
13091 
13092   ins_encode %{
13093     __ mtvsrwz($tmpV$$VectorSRegister, $src$$Register);
13094     __ xxbrw($tmpV$$VectorSRegister, $tmpV$$VectorSRegister);
13095     __ mfvsrwz($dst$$Register, $tmpV$$VectorSRegister);
13096   %}
13097   ins_pipe(pipe_class_default);
13098 %}
13099 
13100 instruct bytes_reverse_int(iRegIdst dst, iRegIsrc src) %{
13101   match(Set dst (ReverseBytesI src));
13102   predicate(UseByteReverseInstructions);
13103   ins_cost(DEFAULT_COST);
13104   size(4);
13105 
13106   format %{ "BRW  $dst, $src" %}
13107 
13108   ins_encode %{
13109     __ brw($dst$$Register, $src$$Register);
13110   %}
13111   ins_pipe(pipe_class_default);
13112 %}
13113 
13114 instruct bytes_reverse_long_Ex(iRegLdst dst, iRegLsrc src) %{
13115   match(Set dst (ReverseBytesL src));
13116   predicate(!UseByteReverseInstructions);
13117   ins_cost(15*DEFAULT_COST);
13118 
13119   expand %{
13120     immI16 imm56 %{ (int) 56 %}
13121     immI16 imm48 %{ (int) 48 %}
13122     immI16 imm40 %{ (int) 40 %}
13123     immI16 imm32 %{ (int) 32 %}
13124     immI16 imm24 %{ (int) 24 %}
13125     immI16 imm16 %{ (int) 16 %}
13126     immI16  imm8 %{ (int)  8 %}
13127     immI16  imm0 %{ (int)  0 %}
13128     iRegLdst tmpL1;
13129     iRegLdst tmpL2;
13130     iRegLdst tmpL3;
13131     iRegLdst tmpL4;
13132     iRegLdst tmpL5;
13133     iRegLdst tmpL6;
13134 
13135                                         // src   : |a|b|c|d|e|f|g|h|
13136     rldicl(tmpL1, src, imm8, imm24);    // tmpL1 : | | | |e|f|g|h|a|
13137     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |a| | | |e|
13138     rldicl(tmpL3, tmpL2, imm32, imm0);  // tmpL3 : | | | |e| | | |a|
13139     rldicl(tmpL1, src, imm16, imm24);   // tmpL1 : | | | |f|g|h|a|b|
13140     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |b| | | |f|
13141     rldicl(tmpL4, tmpL2, imm40, imm0);  // tmpL4 : | | |f| | | |b| |
13142     orL_reg_reg(tmpL5, tmpL3, tmpL4);   // tmpL5 : | | |f|e| | |b|a|
13143     rldicl(tmpL1, src, imm24, imm24);   // tmpL1 : | | | |g|h|a|b|c|
13144     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |c| | | |g|
13145     rldicl(tmpL3, tmpL2, imm48, imm0);  // tmpL3 : | |g| | | |c| | |
13146     rldicl(tmpL1, src, imm32, imm24);   // tmpL1 : | | | |h|a|b|c|d|
13147     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |d| | | |h|
13148     rldicl(tmpL4, tmpL2, imm56, imm0);  // tmpL4 : |h| | | |d| | | |
13149     orL_reg_reg(tmpL6, tmpL3, tmpL4);   // tmpL6 : |h|g| | |d|c| | |
13150     orL_reg_reg(dst, tmpL5, tmpL6);     // dst   : |h|g|f|e|d|c|b|a|
13151   %}
13152 %}
13153 
13154 instruct bytes_reverse_long_vec(iRegLdst dst, iRegLsrc src, vecX tmpV) %{
13155   match(Set dst (ReverseBytesL src));
13156   predicate(UseVectorByteReverseInstructionsPPC64);
13157   effect(TEMP tmpV);
13158   ins_cost(DEFAULT_COST*3);
13159   size(12);
13160   format %{ "MTVSRD  $tmpV, $src\n"
13161             "\tXXBRD   $tmpV, $tmpV\n"
13162             "\tMFVSRD  $dst, $tmpV" %}
13163 
13164   ins_encode %{
13165     __ mtvsrd($tmpV$$VectorSRegister, $src$$Register);
13166     __ xxbrd($tmpV$$VectorSRegister, $tmpV$$VectorSRegister);
13167     __ mfvsrd($dst$$Register, $tmpV$$VectorSRegister);
13168   %}
13169   ins_pipe(pipe_class_default);
13170 %}
13171 
13172 instruct bytes_reverse_long(iRegLdst dst, iRegLsrc src) %{
13173   match(Set dst (ReverseBytesL src));
13174   predicate(UseByteReverseInstructions);
13175   ins_cost(DEFAULT_COST);
13176   size(4);
13177 
13178   format %{ "BRD  $dst, $src" %}
13179 
13180   ins_encode %{
13181     __ brd($dst$$Register, $src$$Register);
13182   %}
13183   ins_pipe(pipe_class_default);
13184 %}
13185 
13186 instruct bytes_reverse_ushort_Ex(iRegIdst dst, iRegIsrc src) %{
13187   match(Set dst (ReverseBytesUS src));
13188   predicate(!UseByteReverseInstructions);
13189   ins_cost(2*DEFAULT_COST);
13190 
13191   expand %{
13192     immI16  imm16 %{ (int) 16 %}
13193     immI16   imm8 %{ (int)  8 %}
13194 
13195     urShiftI_reg_imm(dst, src, imm8);
13196     insrwi(dst, src, imm16, imm8);
13197   %}
13198 %}
13199 
13200 instruct bytes_reverse_ushort(iRegIdst dst, iRegIsrc src) %{
13201   match(Set dst (ReverseBytesUS src));
13202   predicate(UseByteReverseInstructions);
13203   ins_cost(DEFAULT_COST);
13204   size(4);
13205 
13206   format %{ "BRH  $dst, $src" %}
13207 
13208   ins_encode %{
13209     __ brh($dst$$Register, $src$$Register);
13210   %}
13211   ins_pipe(pipe_class_default);
13212 %}
13213 
13214 instruct bytes_reverse_short_Ex(iRegIdst dst, iRegIsrc src) %{
13215   match(Set dst (ReverseBytesS src));
13216   predicate(!UseByteReverseInstructions);
13217   ins_cost(3*DEFAULT_COST);
13218 
13219   expand %{
13220     immI16  imm16 %{ (int) 16 %}
13221     immI16   imm8 %{ (int)  8 %}
13222     iRegLdst tmpI1;
13223 
13224     urShiftI_reg_imm(tmpI1, src, imm8);
13225     insrwi(tmpI1, src, imm16, imm8);
13226     extsh(dst, tmpI1);
13227   %}
13228 %}
13229 
13230 instruct bytes_reverse_short(iRegIdst dst, iRegIsrc src) %{
13231   match(Set dst (ReverseBytesS src));
13232   predicate(UseByteReverseInstructions);
13233   ins_cost(DEFAULT_COST);
13234   size(8);
13235 
13236   format %{ "BRH   $dst, $src\n\t"
13237             "EXTSH $dst, $dst" %}
13238 
13239   ins_encode %{
13240     __ brh($dst$$Register, $src$$Register);
13241     __ extsh($dst$$Register, $dst$$Register);
13242   %}
13243   ins_pipe(pipe_class_default);
13244 %}
13245 
13246 // Load Integer reversed byte order
13247 instruct loadI_reversed(iRegIdst dst, indirect mem) %{
13248   match(Set dst (ReverseBytesI (LoadI mem)));
13249   predicate(n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)));
13250   ins_cost(MEMORY_REF_COST);
13251 
13252   size(4);
13253   ins_encode %{
13254     __ lwbrx($dst$$Register, $mem$$Register);
13255   %}
13256   ins_pipe(pipe_class_default);
13257 %}
13258 
13259 instruct loadI_reversed_acquire(iRegIdst dst, indirect mem) %{
13260   match(Set dst (ReverseBytesI (LoadI mem)));
13261   ins_cost(2 * MEMORY_REF_COST);
13262 
13263   size(12);
13264   ins_encode %{
13265     __ lwbrx($dst$$Register, $mem$$Register);
13266     __ twi_0($dst$$Register);
13267     __ isync();
13268   %}
13269   ins_pipe(pipe_class_default);
13270 %}
13271 
13272 // Load Long - aligned and reversed
13273 instruct loadL_reversed(iRegLdst dst, indirect mem) %{
13274   match(Set dst (ReverseBytesL (LoadL mem)));
13275   predicate(VM_Version::has_ldbrx() && (n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1))));
13276   ins_cost(MEMORY_REF_COST);
13277 
13278   size(4);
13279   ins_encode %{
13280     __ ldbrx($dst$$Register, $mem$$Register);
13281   %}
13282   ins_pipe(pipe_class_default);
13283 %}
13284 
13285 instruct loadL_reversed_acquire(iRegLdst dst, indirect mem) %{
13286   match(Set dst (ReverseBytesL (LoadL mem)));
13287   predicate(VM_Version::has_ldbrx());
13288   ins_cost(2 * MEMORY_REF_COST);
13289 
13290   size(12);
13291   ins_encode %{
13292     __ ldbrx($dst$$Register, $mem$$Register);
13293     __ twi_0($dst$$Register);
13294     __ isync();
13295   %}
13296   ins_pipe(pipe_class_default);
13297 %}
13298 
13299 // Load unsigned short / char reversed byte order
13300 instruct loadUS_reversed(iRegIdst dst, indirect mem) %{
13301   match(Set dst (ReverseBytesUS (LoadUS mem)));
13302   predicate(n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)));
13303   ins_cost(MEMORY_REF_COST);
13304 
13305   size(4);
13306   ins_encode %{
13307     __ lhbrx($dst$$Register, $mem$$Register);
13308   %}
13309   ins_pipe(pipe_class_default);
13310 %}
13311 
13312 instruct loadUS_reversed_acquire(iRegIdst dst, indirect mem) %{
13313   match(Set dst (ReverseBytesUS (LoadUS mem)));
13314   ins_cost(2 * MEMORY_REF_COST);
13315 
13316   size(12);
13317   ins_encode %{
13318     __ lhbrx($dst$$Register, $mem$$Register);
13319     __ twi_0($dst$$Register);
13320     __ isync();
13321   %}
13322   ins_pipe(pipe_class_default);
13323 %}
13324 
13325 // Load short reversed byte order
13326 instruct loadS_reversed(iRegIdst dst, indirect mem) %{
13327   match(Set dst (ReverseBytesS (LoadS mem)));
13328   predicate(n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)));
13329   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
13330 
13331   size(8);
13332   ins_encode %{
13333     __ lhbrx($dst$$Register, $mem$$Register);
13334     __ extsh($dst$$Register, $dst$$Register);
13335   %}
13336   ins_pipe(pipe_class_default);
13337 %}
13338 
13339 instruct loadS_reversed_acquire(iRegIdst dst, indirect mem) %{
13340   match(Set dst (ReverseBytesS (LoadS mem)));
13341   ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
13342 
13343   size(16);
13344   ins_encode %{
13345     __ lhbrx($dst$$Register, $mem$$Register);
13346     __ twi_0($dst$$Register);
13347     __ extsh($dst$$Register, $dst$$Register);
13348     __ isync();
13349   %}
13350   ins_pipe(pipe_class_default);
13351 %}
13352 
13353 // Store Integer reversed byte order
13354 instruct storeI_reversed(iRegIsrc src, indirect mem) %{
13355   match(Set mem (StoreI mem (ReverseBytesI src)));
13356   ins_cost(MEMORY_REF_COST);
13357 
13358   size(4);
13359   ins_encode %{
13360     __ stwbrx($src$$Register, $mem$$Register);
13361   %}
13362   ins_pipe(pipe_class_default);
13363 %}
13364 
13365 // Store Long reversed byte order
13366 instruct storeL_reversed(iRegLsrc src, indirect mem) %{
13367   match(Set mem (StoreL mem (ReverseBytesL src)));
13368   predicate(VM_Version::has_stdbrx());
13369   ins_cost(MEMORY_REF_COST);
13370 
13371   size(4);
13372   ins_encode %{
13373     __ stdbrx($src$$Register, $mem$$Register);
13374   %}
13375   ins_pipe(pipe_class_default);
13376 %}
13377 
13378 // Store unsigned short / char reversed byte order
13379 instruct storeUS_reversed(iRegIsrc src, indirect mem) %{
13380   match(Set mem (StoreC mem (ReverseBytesUS src)));
13381   ins_cost(MEMORY_REF_COST);
13382 
13383   size(4);
13384   ins_encode %{
13385     __ sthbrx($src$$Register, $mem$$Register);
13386   %}
13387   ins_pipe(pipe_class_default);
13388 %}
13389 
13390 // Store short reversed byte order
13391 instruct storeS_reversed(iRegIsrc src, indirect mem) %{
13392   match(Set mem (StoreC mem (ReverseBytesS src)));
13393   ins_cost(MEMORY_REF_COST);
13394 
13395   size(4);
13396   ins_encode %{
13397     __ sthbrx($src$$Register, $mem$$Register);
13398   %}
13399   ins_pipe(pipe_class_default);
13400 %}
13401 
13402 instruct mtvsrwz(vecX temp1, iRegIsrc src) %{
13403   effect(DEF temp1, USE src);
13404 
13405   format %{ "MTVSRWZ $temp1, $src \t// Move to 16-byte register" %}
13406   size(4);
13407   ins_encode %{
13408     __ mtvsrwz($temp1$$VectorSRegister, $src$$Register);
13409   %}
13410   ins_pipe(pipe_class_default);
13411 %}
13412 
13413 instruct xxspltw(vecX dst, vecX src, immI8 imm1) %{
13414   effect(DEF dst, USE src, USE imm1);
13415 
13416   format %{ "XXSPLTW $dst, $src, $imm1 \t// Splat word" %}
13417   size(4);
13418   ins_encode %{
13419     __ xxspltw($dst$$VectorSRegister, $src$$VectorSRegister, $imm1$$constant);
13420   %}
13421   ins_pipe(pipe_class_default);
13422 %}
13423 
13424 instruct xscvdpspn_regF(vecX dst, regF src) %{
13425   effect(DEF dst, USE src);
13426 
13427   format %{ "XSCVDPSPN $dst, $src \t// Convert scalar single precision to vector single precision" %}
13428   size(4);
13429   ins_encode %{
13430     __ xscvdpspn($dst$$VectorSRegister, $src$$FloatRegister->to_vsr());
13431   %}
13432   ins_pipe(pipe_class_default);
13433 %}
13434 
13435 //---------- Replicate Vector Instructions ------------------------------------
13436 
13437 // Insrdi does replicate if src == dst.
13438 instruct repl32(iRegLdst dst) %{
13439   predicate(false);
13440   effect(USE_DEF dst);
13441 
13442   format %{ "INSRDI  $dst, #0, $dst, #32 \t// replicate" %}
13443   size(4);
13444   ins_encode %{
13445     __ insrdi($dst$$Register, $dst$$Register, 32, 0);
13446   %}
13447   ins_pipe(pipe_class_default);
13448 %}
13449 
13450 // Insrdi does replicate if src == dst.
13451 instruct repl48(iRegLdst dst) %{
13452   predicate(false);
13453   effect(USE_DEF dst);
13454 
13455   format %{ "INSRDI  $dst, #0, $dst, #48 \t// replicate" %}
13456   size(4);
13457   ins_encode %{
13458     __ insrdi($dst$$Register, $dst$$Register, 48, 0);
13459   %}
13460   ins_pipe(pipe_class_default);
13461 %}
13462 
13463 // Insrdi does replicate if src == dst.
13464 instruct repl56(iRegLdst dst) %{
13465   predicate(false);
13466   effect(USE_DEF dst);
13467 
13468   format %{ "INSRDI  $dst, #0, $dst, #56 \t// replicate" %}
13469   size(4);
13470   ins_encode %{
13471     __ insrdi($dst$$Register, $dst$$Register, 56, 0);
13472   %}
13473   ins_pipe(pipe_class_default);
13474 %}
13475 
13476 instruct repl8B_reg_Ex(iRegLdst dst, iRegIsrc src) %{
13477   match(Set dst (ReplicateB src));
13478   predicate(n->as_Vector()->length() == 8);
13479   expand %{
13480     moveReg(dst, src);
13481     repl56(dst);
13482     repl48(dst);
13483     repl32(dst);
13484   %}
13485 %}
13486 
13487 instruct repl8B_immI0(iRegLdst dst, immI_0 zero) %{
13488   match(Set dst (ReplicateB zero));
13489   predicate(n->as_Vector()->length() == 8);
13490   format %{ "LI      $dst, #0 \t// replicate8B" %}
13491   size(4);
13492   ins_encode %{
13493     __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
13494   %}
13495   ins_pipe(pipe_class_default);
13496 %}
13497 
13498 instruct repl8B_immIminus1(iRegLdst dst, immI_minus1 src) %{
13499   match(Set dst (ReplicateB src));
13500   predicate(n->as_Vector()->length() == 8);
13501   format %{ "LI      $dst, #-1 \t// replicate8B" %}
13502   size(4);
13503   ins_encode %{
13504     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
13505   %}
13506   ins_pipe(pipe_class_default);
13507 %}
13508 
13509 instruct repl16B_reg_Ex(vecX dst, iRegIsrc src) %{
13510   match(Set dst (ReplicateB src));
13511   predicate(n->as_Vector()->length() == 16);
13512 
13513   expand %{
13514     iRegLdst tmpL;
13515     vecX tmpV;
13516     immI8  imm1 %{ (int)  1 %}
13517     moveReg(tmpL, src);
13518     repl56(tmpL);
13519     repl48(tmpL);
13520     mtvsrwz(tmpV, tmpL);
13521     xxspltw(dst, tmpV, imm1);
13522   %}
13523 %}
13524 
13525 instruct repl16B_immI0(vecX dst, immI_0 zero) %{
13526   match(Set dst (ReplicateB zero));
13527   predicate(n->as_Vector()->length() == 16);
13528 
13529   format %{ "XXLXOR      $dst, $zero \t// replicate16B" %}
13530   size(4);
13531   ins_encode %{
13532     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13533   %}
13534   ins_pipe(pipe_class_default);
13535 %}
13536 
13537 instruct repl16B_immIminus1(vecX dst, immI_minus1 src) %{
13538   match(Set dst (ReplicateB src));
13539   predicate(n->as_Vector()->length() == 16);
13540 
13541   format %{ "XXLEQV      $dst, $src \t// replicate16B" %}
13542   size(4);
13543   ins_encode %{
13544     __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13545   %}
13546   ins_pipe(pipe_class_default);
13547 %}
13548 
13549 instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{
13550   match(Set dst (ReplicateS src));
13551   predicate(n->as_Vector()->length() == 4);
13552   expand %{
13553     moveReg(dst, src);
13554     repl48(dst);
13555     repl32(dst);
13556   %}
13557 %}
13558 
13559 instruct repl4S_immI0(iRegLdst dst, immI_0 zero) %{
13560   match(Set dst (ReplicateS zero));
13561   predicate(n->as_Vector()->length() == 4);
13562   format %{ "LI      $dst, #0 \t// replicate4S" %}
13563   size(4);
13564   ins_encode %{
13565     __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
13566   %}
13567   ins_pipe(pipe_class_default);
13568 %}
13569 
13570 instruct repl4S_immIminus1(iRegLdst dst, immI_minus1 src) %{
13571   match(Set dst (ReplicateS src));
13572   predicate(n->as_Vector()->length() == 4);
13573   format %{ "LI      $dst, -1 \t// replicate4S" %}
13574   size(4);
13575   ins_encode %{
13576     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
13577   %}
13578   ins_pipe(pipe_class_default);
13579 %}
13580 
13581 instruct repl8S_reg_Ex(vecX dst, iRegIsrc src) %{
13582   match(Set dst (ReplicateS src));
13583   predicate(n->as_Vector()->length() == 8);
13584 
13585   expand %{
13586     iRegLdst tmpL;
13587     vecX tmpV;
13588     immI8  zero %{ (int)  0 %}
13589     moveReg(tmpL, src);
13590     repl48(tmpL);
13591     repl32(tmpL);
13592     mtvsrd(tmpV, tmpL);
13593     xxpermdi(dst, tmpV, tmpV, zero);
13594   %}
13595 %}
13596 
13597 instruct repl8S_immI0(vecX dst, immI_0 zero) %{
13598   match(Set dst (ReplicateS zero));
13599   predicate(n->as_Vector()->length() == 8);
13600 
13601   format %{ "XXLXOR      $dst, $zero \t// replicate8S" %}
13602   size(4);
13603   ins_encode %{
13604     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13605   %}
13606   ins_pipe(pipe_class_default);
13607 %}
13608 
13609 instruct repl8S_immIminus1(vecX dst, immI_minus1 src) %{
13610   match(Set dst (ReplicateS src));
13611   predicate(n->as_Vector()->length() == 8);
13612 
13613   format %{ "XXLEQV      $dst, $src \t// replicate8S" %}
13614   size(4);
13615   ins_encode %{
13616     __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13617   %}
13618   ins_pipe(pipe_class_default);
13619 %}
13620 
13621 instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{
13622   match(Set dst (ReplicateI src));
13623   predicate(n->as_Vector()->length() == 2);
13624   ins_cost(2 * DEFAULT_COST);
13625   expand %{
13626     moveReg(dst, src);
13627     repl32(dst);
13628   %}
13629 %}
13630 
13631 instruct repl2I_immI0(iRegLdst dst, immI_0 zero) %{
13632   match(Set dst (ReplicateI zero));
13633   predicate(n->as_Vector()->length() == 2);
13634   format %{ "LI      $dst, #0 \t// replicate2I" %}
13635   size(4);
13636   ins_encode %{
13637     __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
13638   %}
13639   ins_pipe(pipe_class_default);
13640 %}
13641 
13642 instruct repl2I_immIminus1(iRegLdst dst, immI_minus1 src) %{
13643   match(Set dst (ReplicateI src));
13644   predicate(n->as_Vector()->length() == 2);
13645   format %{ "LI      $dst, -1 \t// replicate2I" %}
13646   size(4);
13647   ins_encode %{
13648     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
13649   %}
13650   ins_pipe(pipe_class_default);
13651 %}
13652 
13653 instruct repl4I_reg_Ex(vecX dst, iRegIsrc src) %{
13654   match(Set dst (ReplicateI src));
13655   predicate(n->as_Vector()->length() == 4);
13656   ins_cost(2 * DEFAULT_COST);
13657 
13658   expand %{
13659     iRegLdst tmpL;
13660     vecX tmpV;
13661     immI8  zero %{ (int)  0 %}
13662     moveReg(tmpL, src);
13663     repl32(tmpL);
13664     mtvsrd(tmpV, tmpL);
13665     xxpermdi(dst, tmpV, tmpV, zero);
13666   %}
13667 %}
13668 
13669 instruct repl4I_immI0(vecX dst, immI_0 zero) %{
13670   match(Set dst (ReplicateI zero));
13671   predicate(n->as_Vector()->length() == 4);
13672 
13673   format %{ "XXLXOR      $dst, $zero \t// replicate4I" %}
13674   size(4);
13675   ins_encode %{
13676     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13677   %}
13678   ins_pipe(pipe_class_default);
13679 %}
13680 
13681 instruct repl4I_immIminus1(vecX dst, immI_minus1 src) %{
13682   match(Set dst (ReplicateI src));
13683   predicate(n->as_Vector()->length() == 4);
13684 
13685   format %{ "XXLEQV      $dst, $dst, $dst \t// replicate4I" %}
13686   size(4);
13687   ins_encode %{
13688     __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13689   %}
13690   ins_pipe(pipe_class_default);
13691 %}
13692 
13693 // Move float to int register via stack, replicate.
13694 instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{
13695   match(Set dst (ReplicateF src));
13696   predicate(n->as_Vector()->length() == 2);
13697   ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
13698   expand %{
13699     stackSlotL tmpS;
13700     iRegIdst tmpI;
13701     moveF2I_reg_stack(tmpS, src);   // Move float to stack.
13702     moveF2I_stack_reg(tmpI, tmpS);  // Move stack to int reg.
13703     moveReg(dst, tmpI);             // Move int to long reg.
13704     repl32(dst);                    // Replicate bitpattern.
13705   %}
13706 %}
13707 
13708 // Replicate scalar constant to packed float values in Double register
13709 instruct repl2F_immF_Ex(iRegLdst dst, immF src) %{
13710   match(Set dst (ReplicateF src));
13711   predicate(n->as_Vector()->length() == 2);
13712   ins_cost(5 * DEFAULT_COST);
13713 
13714   format %{ "LD      $dst, offset, $constanttablebase\t// load replicated float $src $src from table, postalloc expanded" %}
13715   postalloc_expand( postalloc_expand_load_replF_constant(dst, src, constanttablebase) );
13716 %}
13717 
13718 // Replicate scalar zero constant to packed float values in Double register
13719 instruct repl2F_immF0(iRegLdst dst, immF_0 zero) %{
13720   match(Set dst (ReplicateF zero));
13721   predicate(n->as_Vector()->length() == 2);
13722 
13723   format %{ "LI      $dst, #0 \t// replicate2F" %}
13724   ins_encode %{
13725     __ li($dst$$Register, 0x0);
13726   %}
13727   ins_pipe(pipe_class_default);
13728 %}
13729 
13730 
13731 //----------Vector Arithmetic Instructions--------------------------------------
13732 
13733 // Vector Addition Instructions
13734 
13735 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
13736   match(Set dst (AddVB src1 src2));
13737   predicate(n->as_Vector()->length() == 16);
13738   format %{ "VADDUBM  $dst,$src1,$src2\t// add packed16B" %}
13739   size(4);
13740   ins_encode %{
13741     __ vaddubm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13742   %}
13743   ins_pipe(pipe_class_default);
13744 %}
13745 
13746 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
13747   match(Set dst (AddVS src1 src2));
13748   predicate(n->as_Vector()->length() == 8);
13749   format %{ "VADDUHM  $dst,$src1,$src2\t// add packed8S" %}
13750   size(4);
13751   ins_encode %{
13752     __ vadduhm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13753   %}
13754   ins_pipe(pipe_class_default);
13755 %}
13756 
13757 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
13758   match(Set dst (AddVI src1 src2));
13759   predicate(n->as_Vector()->length() == 4);
13760   format %{ "VADDUWM  $dst,$src1,$src2\t// add packed4I" %}
13761   size(4);
13762   ins_encode %{
13763     __ vadduwm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13764   %}
13765   ins_pipe(pipe_class_default);
13766 %}
13767 
13768 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
13769   match(Set dst (AddVF src1 src2));
13770   predicate(n->as_Vector()->length() == 4);
13771   format %{ "VADDFP  $dst,$src1,$src2\t// add packed4F" %}
13772   size(4);
13773   ins_encode %{
13774     __ vaddfp($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13775   %}
13776   ins_pipe(pipe_class_default);
13777 %}
13778 
13779 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
13780   match(Set dst (AddVL src1 src2));
13781   predicate(n->as_Vector()->length() == 2);
13782   format %{ "VADDUDM  $dst,$src1,$src2\t// add packed2L" %}
13783   size(4);
13784   ins_encode %{
13785     __ vaddudm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13786   %}
13787   ins_pipe(pipe_class_default);
13788 %}
13789 
13790 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
13791   match(Set dst (AddVD src1 src2));
13792   predicate(n->as_Vector()->length() == 2);
13793   format %{ "XVADDDP  $dst,$src1,$src2\t// add packed2D" %}
13794   size(4);
13795   ins_encode %{
13796     __ xvadddp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13797   %}
13798   ins_pipe(pipe_class_default);
13799 %}
13800 
13801 // Vector Subtraction Instructions
13802 
13803 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
13804   match(Set dst (SubVB src1 src2));
13805   predicate(n->as_Vector()->length() == 16);
13806   format %{ "VSUBUBM  $dst,$src1,$src2\t// sub packed16B" %}
13807   size(4);
13808   ins_encode %{
13809     __ vsububm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13810   %}
13811   ins_pipe(pipe_class_default);
13812 %}
13813 
13814 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
13815   match(Set dst (SubVS src1 src2));
13816   predicate(n->as_Vector()->length() == 8);
13817   format %{ "VSUBUHM  $dst,$src1,$src2\t// sub packed8S" %}
13818   size(4);
13819   ins_encode %{
13820     __ vsubuhm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13821   %}
13822   ins_pipe(pipe_class_default);
13823 %}
13824 
13825 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
13826   match(Set dst (SubVI src1 src2));
13827   predicate(n->as_Vector()->length() == 4);
13828   format %{ "VSUBUWM  $dst,$src1,$src2\t// sub packed4I" %}
13829   size(4);
13830   ins_encode %{
13831     __ vsubuwm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13832   %}
13833   ins_pipe(pipe_class_default);
13834 %}
13835 
13836 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
13837   match(Set dst (SubVF src1 src2));
13838   predicate(n->as_Vector()->length() == 4);
13839   format %{ "VSUBFP  $dst,$src1,$src2\t// sub packed4F" %}
13840   size(4);
13841   ins_encode %{
13842     __ vsubfp($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13843   %}
13844   ins_pipe(pipe_class_default);
13845 %}
13846 
13847 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
13848   match(Set dst (SubVL src1 src2));
13849   predicate(n->as_Vector()->length() == 2);
13850   format %{ "VSUBUDM  $dst,$src1,$src2\t// sub packed2L" %}
13851   size(4);
13852   ins_encode %{
13853     __ vsubudm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13854   %}
13855   ins_pipe(pipe_class_default);
13856 %}
13857 
13858 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
13859   match(Set dst (SubVD src1 src2));
13860   predicate(n->as_Vector()->length() == 2);
13861   format %{ "XVSUBDP  $dst,$src1,$src2\t// sub packed2D" %}
13862   size(4);
13863   ins_encode %{
13864     __ xvsubdp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13865   %}
13866   ins_pipe(pipe_class_default);
13867 %}
13868 
13869 // Vector Multiplication Instructions
13870 
13871 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2, vecX tmp) %{
13872   match(Set dst (MulVS src1 src2));
13873   predicate(n->as_Vector()->length() == 8);
13874   effect(TEMP tmp);
13875   format %{ "VSPLTISH  $tmp,0\t// mul packed8S" %}
13876   format %{ "VMLADDUHM  $dst,$src1,$src2\t// mul packed8S" %}
13877   size(8);
13878   ins_encode %{
13879     __ vspltish($tmp$$VectorSRegister->to_vr(), 0);
13880     __ vmladduhm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr(), $tmp$$VectorSRegister->to_vr());
13881   %}
13882   ins_pipe(pipe_class_default);
13883 %}
13884 
13885 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
13886   match(Set dst (MulVI src1 src2));
13887   predicate(n->as_Vector()->length() == 4);
13888   format %{ "VMULUWM  $dst,$src1,$src2\t// mul packed4I" %}
13889   size(4);
13890   ins_encode %{
13891     __ vmuluwm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13892   %}
13893   ins_pipe(pipe_class_default);
13894 %}
13895 
13896 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
13897   match(Set dst (MulVF src1 src2));
13898   predicate(n->as_Vector()->length() == 4);
13899   format %{ "XVMULSP  $dst,$src1,$src2\t// mul packed4F" %}
13900   size(4);
13901   ins_encode %{
13902     __ xvmulsp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13903   %}
13904   ins_pipe(pipe_class_default);
13905 %}
13906 
13907 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
13908   match(Set dst (MulVD src1 src2));
13909   predicate(n->as_Vector()->length() == 2);
13910   format %{ "XVMULDP  $dst,$src1,$src2\t// mul packed2D" %}
13911   size(4);
13912   ins_encode %{
13913     __ xvmuldp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13914   %}
13915   ins_pipe(pipe_class_default);
13916 %}
13917 
13918 // Vector Division Instructions
13919 
13920 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
13921   match(Set dst (DivVF src1 src2));
13922   predicate(n->as_Vector()->length() == 4);
13923   format %{ "XVDIVSP  $dst,$src1,$src2\t// div packed4F" %}
13924   size(4);
13925   ins_encode %{
13926     __ xvdivsp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13927   %}
13928   ins_pipe(pipe_class_default);
13929 %}
13930 
13931 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
13932   match(Set dst (DivVD src1 src2));
13933   predicate(n->as_Vector()->length() == 2);
13934   format %{ "XVDIVDP  $dst,$src1,$src2\t// div packed2D" %}
13935   size(4);
13936   ins_encode %{
13937     __ xvdivdp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13938   %}
13939   ins_pipe(pipe_class_default);
13940 %}
13941 
13942 // Vector Absolute Instructions
13943 
13944 instruct vabs4F_reg(vecX dst, vecX src) %{
13945   match(Set dst (AbsVF src));
13946   predicate(n->as_Vector()->length() == 4);
13947   format %{ "XVABSSP $dst,$src\t// absolute packed4F" %}
13948   size(4);
13949   ins_encode %{
13950     __ xvabssp($dst$$VectorSRegister, $src$$VectorSRegister);
13951   %}
13952   ins_pipe(pipe_class_default);
13953 %}
13954 
13955 instruct vabs2D_reg(vecX dst, vecX src) %{
13956   match(Set dst (AbsVD src));
13957   predicate(n->as_Vector()->length() == 2);
13958   format %{ "XVABSDP $dst,$src\t// absolute packed2D" %}
13959   size(4);
13960   ins_encode %{
13961     __ xvabsdp($dst$$VectorSRegister, $src$$VectorSRegister);
13962   %}
13963   ins_pipe(pipe_class_default);
13964 %}
13965 
13966 // Round Instructions
13967 instruct roundD_reg(regD dst, regD src, immI8 rmode) %{
13968   match(Set dst (RoundDoubleMode src rmode));
13969   format %{ "RoundDoubleMode $src,$rmode" %}
13970   size(4);
13971   ins_encode %{
13972     switch ($rmode$$constant) {
13973       case RoundDoubleModeNode::rmode_rint:
13974         __ xvrdpic($dst$$FloatRegister->to_vsr(), $src$$FloatRegister->to_vsr());
13975         break;
13976       case RoundDoubleModeNode::rmode_floor:
13977         __ frim($dst$$FloatRegister, $src$$FloatRegister);
13978         break;
13979       case RoundDoubleModeNode::rmode_ceil:
13980         __ frip($dst$$FloatRegister, $src$$FloatRegister);
13981         break;
13982       default:
13983         ShouldNotReachHere();
13984     }
13985   %}
13986   ins_pipe(pipe_class_default);
13987 %}
13988 
13989 // Vector Round Instructions
13990 instruct vround2D_reg(vecX dst, vecX src, immI8 rmode) %{
13991   match(Set dst (RoundDoubleModeV src rmode));
13992   predicate(n->as_Vector()->length() == 2);
13993   format %{ "RoundDoubleModeV $src,$rmode" %}
13994   size(4);
13995   ins_encode %{
13996     switch ($rmode$$constant) {
13997       case RoundDoubleModeNode::rmode_rint:
13998         __ xvrdpic($dst$$VectorSRegister, $src$$VectorSRegister);
13999         break;
14000       case RoundDoubleModeNode::rmode_floor:
14001         __ xvrdpim($dst$$VectorSRegister, $src$$VectorSRegister);
14002         break;
14003       case RoundDoubleModeNode::rmode_ceil:
14004         __ xvrdpip($dst$$VectorSRegister, $src$$VectorSRegister);
14005         break;
14006       default:
14007         ShouldNotReachHere();
14008     }
14009   %}
14010   ins_pipe(pipe_class_default);
14011 %}
14012 
14013 // Vector Negate Instructions
14014 
14015 instruct vneg4F_reg(vecX dst, vecX src) %{
14016   match(Set dst (NegVF src));
14017   predicate(n->as_Vector()->length() == 4);
14018   format %{ "XVNEGSP $dst,$src\t// negate packed4F" %}
14019   size(4);
14020   ins_encode %{
14021     __ xvnegsp($dst$$VectorSRegister, $src$$VectorSRegister);
14022   %}
14023   ins_pipe(pipe_class_default);
14024 %}
14025 
14026 instruct vneg2D_reg(vecX dst, vecX src) %{
14027   match(Set dst (NegVD src));
14028   predicate(n->as_Vector()->length() == 2);
14029   format %{ "XVNEGDP $dst,$src\t// negate packed2D" %}
14030   size(4);
14031   ins_encode %{
14032     __ xvnegdp($dst$$VectorSRegister, $src$$VectorSRegister);
14033   %}
14034   ins_pipe(pipe_class_default);
14035 %}
14036 
14037 // Vector Square Root Instructions
14038 
14039 instruct vsqrt4F_reg(vecX dst, vecX src) %{
14040   match(Set dst (SqrtVF src));
14041   predicate(n->as_Vector()->length() == 4);
14042   format %{ "XVSQRTSP $dst,$src\t// sqrt packed4F" %}
14043   size(4);
14044   ins_encode %{
14045     __ xvsqrtsp($dst$$VectorSRegister, $src$$VectorSRegister);
14046   %}
14047   ins_pipe(pipe_class_default);
14048 %}
14049 
14050 instruct vsqrt2D_reg(vecX dst, vecX src) %{
14051   match(Set dst (SqrtVD src));
14052   predicate(n->as_Vector()->length() == 2);
14053   format %{ "XVSQRTDP  $dst,$src\t// sqrt packed2D" %}
14054   size(4);
14055   ins_encode %{
14056     __ xvsqrtdp($dst$$VectorSRegister, $src$$VectorSRegister);
14057   %}
14058   ins_pipe(pipe_class_default);
14059 %}
14060 
14061 // Vector Population Count Instructions
14062 
14063 instruct vpopcnt4I_reg(vecX dst, vecX src) %{
14064   match(Set dst (PopCountVI src));
14065   predicate(n->as_Vector()->length() == 4);
14066   format %{ "VPOPCNTW $dst,$src\t// pop count packed4I" %}
14067   size(4);
14068   ins_encode %{
14069     __ vpopcntw($dst$$VectorSRegister->to_vr(), $src$$VectorSRegister->to_vr());
14070   %}
14071   ins_pipe(pipe_class_default);
14072 %}
14073 
14074 // --------------------------------- FMA --------------------------------------
14075 // dst + src1 * src2
14076 instruct vfma4F(vecX dst, vecX src1, vecX src2) %{
14077   match(Set dst (FmaVF dst (Binary src1 src2)));
14078   predicate(n->as_Vector()->length() == 4);
14079 
14080   format %{ "XVMADDASP   $dst, $src1, $src2" %}
14081 
14082   size(4);
14083   ins_encode %{
14084     __ xvmaddasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14085   %}
14086   ins_pipe(pipe_class_default);
14087 %}
14088 
14089 // dst - src1 * src2
14090 instruct vfma4F_neg1(vecX dst, vecX src1, vecX src2) %{
14091   match(Set dst (FmaVF dst (Binary (NegVF src1) src2)));
14092   match(Set dst (FmaVF dst (Binary src1 (NegVF src2))));
14093   predicate(n->as_Vector()->length() == 4);
14094 
14095   format %{ "XVNMSUBASP   $dst, $src1, $src2" %}
14096 
14097   size(4);
14098   ins_encode %{
14099     __ xvnmsubasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14100   %}
14101   ins_pipe(pipe_class_default);
14102 %}
14103 
14104 // - dst + src1 * src2
14105 instruct vfma4F_neg2(vecX dst, vecX src1, vecX src2) %{
14106   match(Set dst (FmaVF (NegVF dst) (Binary src1 src2)));
14107   predicate(n->as_Vector()->length() == 4);
14108 
14109   format %{ "XVMSUBASP   $dst, $src1, $src2" %}
14110 
14111   size(4);
14112   ins_encode %{
14113     __ xvmsubasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14114   %}
14115   ins_pipe(pipe_class_default);
14116 %}
14117 
14118 // dst + src1 * src2
14119 instruct vfma2D(vecX dst, vecX src1, vecX src2) %{
14120   match(Set dst (FmaVD  dst (Binary src1 src2)));
14121   predicate(n->as_Vector()->length() == 2);
14122 
14123   format %{ "XVMADDADP   $dst, $src1, $src2" %}
14124 
14125   size(4);
14126   ins_encode %{
14127     __ xvmaddadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14128   %}
14129   ins_pipe(pipe_class_default);
14130 %}
14131 
14132 // dst - src1 * src2
14133 instruct vfma2D_neg1(vecX dst, vecX src1, vecX src2) %{
14134   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
14135   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
14136   predicate(n->as_Vector()->length() == 2);
14137 
14138   format %{ "XVNMSUBADP   $dst, $src1, $src2" %}
14139 
14140   size(4);
14141   ins_encode %{
14142     __ xvnmsubadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14143   %}
14144   ins_pipe(pipe_class_default);
14145 %}
14146 
14147 // - dst + src1 * src2
14148 instruct vfma2D_neg2(vecX dst, vecX src1, vecX src2) %{
14149   match(Set dst (FmaVD (NegVD dst) (Binary src1 src2)));
14150   predicate(n->as_Vector()->length() == 2);
14151 
14152   format %{ "XVMSUBADP   $dst, $src1, $src2" %}
14153 
14154   size(4);
14155   ins_encode %{
14156     __ xvmsubadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14157   %}
14158   ins_pipe(pipe_class_default);
14159 %}
14160 
14161 //----------Overflow Math Instructions-----------------------------------------
14162 
14163 // Note that we have to make sure that XER.SO is reset before using overflow instructions.
14164 // Simple Overflow operations can be matched by very few instructions (e.g. addExact: xor, and_, bc).
14165 // Seems like only Long intrinsincs have an advantage. (The only expensive one is OverflowMulL.)
14166 
14167 instruct overflowAddL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
14168   match(Set cr0 (OverflowAddL op1 op2));
14169 
14170   format %{ "add_    $op1, $op2\t# overflow check long" %}
14171   ins_encode %{
14172     __ li(R0, 0);
14173     __ mtxer(R0); // clear XER.SO
14174     __ addo_(R0, $op1$$Register, $op2$$Register);
14175   %}
14176   ins_pipe(pipe_class_default);
14177 %}
14178 
14179 instruct overflowSubL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
14180   match(Set cr0 (OverflowSubL op1 op2));
14181 
14182   format %{ "subfo_  R0, $op2, $op1\t# overflow check long" %}
14183   ins_encode %{
14184     __ li(R0, 0);
14185     __ mtxer(R0); // clear XER.SO
14186     __ subfo_(R0, $op2$$Register, $op1$$Register);
14187   %}
14188   ins_pipe(pipe_class_default);
14189 %}
14190 
14191 instruct overflowNegL_reg(flagsRegCR0 cr0, immL_0 zero, iRegLsrc op2) %{
14192   match(Set cr0 (OverflowSubL zero op2));
14193 
14194   format %{ "nego_   R0, $op2\t# overflow check long" %}
14195   ins_encode %{
14196     __ li(R0, 0);
14197     __ mtxer(R0); // clear XER.SO
14198     __ nego_(R0, $op2$$Register);
14199   %}
14200   ins_pipe(pipe_class_default);
14201 %}
14202 
14203 instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
14204   match(Set cr0 (OverflowMulL op1 op2));
14205 
14206   format %{ "mulldo_ R0, $op1, $op2\t# overflow check long" %}
14207   ins_encode %{
14208     __ li(R0, 0);
14209     __ mtxer(R0); // clear XER.SO
14210     __ mulldo_(R0, $op1$$Register, $op2$$Register);
14211   %}
14212   ins_pipe(pipe_class_default);
14213 %}
14214 
14215 instruct repl4F_reg_Ex(vecX dst, regF src) %{
14216   match(Set dst (ReplicateF src));
14217   predicate(n->as_Vector()->length() == 4);
14218   ins_cost(DEFAULT_COST);
14219   expand %{
14220     vecX tmpV;
14221     immI8  zero %{ (int)  0 %}
14222 
14223     xscvdpspn_regF(tmpV, src);
14224     xxspltw(dst, tmpV, zero);
14225   %}
14226 %}
14227 
14228 instruct repl4F_immF_Ex(vecX dst, immF src, iRegLdst tmp) %{
14229   match(Set dst (ReplicateF src));
14230   predicate(n->as_Vector()->length() == 4);
14231   effect(TEMP tmp);
14232   ins_cost(10 * DEFAULT_COST);
14233 
14234   postalloc_expand( postalloc_expand_load_replF_constant_vsx(dst, src, constanttablebase, tmp) );
14235 %}
14236 
14237 instruct repl4F_immF0(vecX dst, immF_0 zero) %{
14238   match(Set dst (ReplicateF zero));
14239   predicate(n->as_Vector()->length() == 4);
14240 
14241   format %{ "XXLXOR      $dst, $zero \t// replicate4F" %}
14242   ins_encode %{
14243     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
14244   %}
14245   ins_pipe(pipe_class_default);
14246 %}
14247 
14248 instruct repl2D_reg_Ex(vecX dst, regD src) %{
14249   match(Set dst (ReplicateD src));
14250   predicate(n->as_Vector()->length() == 2);
14251 
14252   format %{ "XXPERMDI      $dst, $src, $src, 0 \t// Splat doubleword" %}
14253   size(4);
14254   ins_encode %{
14255     __ xxpermdi($dst$$VectorSRegister, $src$$FloatRegister->to_vsr(), $src$$FloatRegister->to_vsr(), 0);
14256   %}
14257   ins_pipe(pipe_class_default);
14258 %}
14259 
14260 instruct repl2D_immD0(vecX dst, immD_0 zero) %{
14261   match(Set dst (ReplicateD zero));
14262   predicate(n->as_Vector()->length() == 2);
14263 
14264   format %{ "XXLXOR      $dst, $zero \t// replicate2D" %}
14265   size(4);
14266   ins_encode %{
14267     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
14268   %}
14269   ins_pipe(pipe_class_default);
14270 %}
14271 
14272 instruct mtvsrd(vecX dst, iRegLsrc src) %{
14273   predicate(false);
14274   effect(DEF dst, USE src);
14275 
14276   format %{ "MTVSRD      $dst, $src \t// Move to 16-byte register" %}
14277   size(4);
14278   ins_encode %{
14279     __ mtvsrd($dst$$VectorSRegister, $src$$Register);
14280   %}
14281   ins_pipe(pipe_class_default);
14282 %}
14283 
14284 instruct xxspltd(vecX dst, vecX src, immI8 zero) %{
14285   effect(DEF dst, USE src, USE zero);
14286 
14287   format %{ "XXSPLATD      $dst, $src, $zero \t// Splat doubleword" %}
14288   size(4);
14289   ins_encode %{
14290     __ xxpermdi($dst$$VectorSRegister, $src$$VectorSRegister, $src$$VectorSRegister, $zero$$constant);
14291   %}
14292   ins_pipe(pipe_class_default);
14293 %}
14294 
14295 instruct xxpermdi(vecX dst, vecX src1, vecX src2, immI8 zero) %{
14296   effect(DEF dst, USE src1, USE src2, USE zero);
14297 
14298   format %{ "XXPERMDI      $dst, $src1, $src2, $zero \t// Splat doubleword" %}
14299   size(4);
14300   ins_encode %{
14301     __ xxpermdi($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister, $zero$$constant);
14302   %}
14303   ins_pipe(pipe_class_default);
14304 %}
14305 
14306 instruct repl2L_reg_Ex(vecX dst, iRegLsrc src) %{
14307   match(Set dst (ReplicateL src));
14308   predicate(n->as_Vector()->length() == 2);
14309   expand %{
14310     vecX tmpV;
14311     immI8  zero %{ (int)  0 %}
14312     mtvsrd(tmpV, src);
14313     xxpermdi(dst, tmpV, tmpV, zero);
14314   %}
14315 %}
14316 
14317 instruct repl2L_immI0(vecX dst, immI_0 zero) %{
14318   match(Set dst (ReplicateL zero));
14319   predicate(n->as_Vector()->length() == 2);
14320 
14321   format %{ "XXLXOR      $dst, $zero \t// replicate2L" %}
14322   size(4);
14323   ins_encode %{
14324     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
14325   %}
14326   ins_pipe(pipe_class_default);
14327 %}
14328 
14329 instruct repl2L_immIminus1(vecX dst, immI_minus1 src) %{
14330   match(Set dst (ReplicateL src));
14331   predicate(n->as_Vector()->length() == 2);
14332 
14333   format %{ "XXLEQV      $dst, $src \t// replicate2L" %}
14334   size(4);
14335   ins_encode %{
14336     __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
14337   %}
14338   ins_pipe(pipe_class_default);
14339 %}
14340 
14341 // ============================================================================
14342 // Safepoint Instruction
14343 
14344 instruct safePoint_poll(iRegPdst poll) %{
14345   match(SafePoint poll);
14346 
14347   // It caused problems to add the effect that r0 is killed, but this
14348   // effect no longer needs to be mentioned, since r0 is not contained
14349   // in a reg_class.
14350 
14351   format %{ "LD      R0, #0, $poll \t// Safepoint poll for GC" %}
14352   size(4);
14353   ins_encode( enc_poll(0x0, poll) );
14354   ins_pipe(pipe_class_default);
14355 %}
14356 
14357 // ============================================================================
14358 // Call Instructions
14359 
14360 // Call Java Static Instruction
14361 
14362 // Schedulable version of call static node.
14363 instruct CallStaticJavaDirect(method meth) %{
14364   match(CallStaticJava);
14365   effect(USE meth);
14366   ins_cost(CALL_COST);
14367 
14368   ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */);
14369 
14370   format %{ "CALL,static $meth \t// ==> " %}
14371   size(4);
14372   ins_encode( enc_java_static_call(meth) );
14373   ins_pipe(pipe_class_call);
14374 %}
14375 
14376 // Call Java Dynamic Instruction
14377 
14378 // Used by postalloc expand of CallDynamicJavaDirectSchedEx (actual call).
14379 // Loading of IC was postalloc expanded. The nodes loading the IC are reachable
14380 // via fields ins_field_load_ic_hi_node and ins_field_load_ic_node.
14381 // The call destination must still be placed in the constant pool.
14382 instruct CallDynamicJavaDirectSched(method meth) %{
14383   match(CallDynamicJava); // To get all the data fields we need ...
14384   effect(USE meth);
14385   predicate(false);       // ... but never match.
14386 
14387   ins_field_load_ic_hi_node(loadConL_hiNode*);
14388   ins_field_load_ic_node(loadConLNode*);
14389   ins_num_consts(1 /* 1 patchable constant: call destination */);
14390 
14391   format %{ "BL        \t// dynamic $meth ==> " %}
14392   size(4);
14393   ins_encode( enc_java_dynamic_call_sched(meth) );
14394   ins_pipe(pipe_class_call);
14395 %}
14396 
14397 // Schedulable (i.e. postalloc expanded) version of call dynamic java.
14398 // We use postalloc expanded calls if we use inline caches
14399 // and do not update method data.
14400 //
14401 // This instruction has two constants: inline cache (IC) and call destination.
14402 // Loading the inline cache will be postalloc expanded, thus leaving a call with
14403 // one constant.
14404 instruct CallDynamicJavaDirectSched_Ex(method meth) %{
14405   match(CallDynamicJava);
14406   effect(USE meth);
14407   predicate(UseInlineCaches);
14408   ins_cost(CALL_COST);
14409 
14410   ins_num_consts(2 /* 2 patchable constants: inline cache, call destination. */);
14411 
14412   format %{ "CALL,dynamic $meth \t// postalloc expanded" %}
14413   postalloc_expand( postalloc_expand_java_dynamic_call_sched(meth, constanttablebase) );
14414 %}
14415 
14416 // Compound version of call dynamic java
14417 // We use postalloc expanded calls if we use inline caches
14418 // and do not update method data.
14419 instruct CallDynamicJavaDirect(method meth) %{
14420   match(CallDynamicJava);
14421   effect(USE meth);
14422   predicate(!UseInlineCaches);
14423   ins_cost(CALL_COST);
14424 
14425   // Enc_java_to_runtime_call needs up to 4 constants (method data oop).
14426   ins_num_consts(4);
14427 
14428   format %{ "CALL,dynamic $meth \t// ==> " %}
14429   ins_encode( enc_java_dynamic_call(meth, constanttablebase) );
14430   ins_pipe(pipe_class_call);
14431 %}
14432 
14433 // Call Runtime Instruction
14434 
14435 instruct CallRuntimeDirect(method meth) %{
14436   match(CallRuntime);
14437   effect(USE meth);
14438   ins_cost(CALL_COST);
14439 
14440   // Enc_java_to_runtime_call needs up to 3 constants: call target,
14441   // env for callee, C-toc.
14442   ins_num_consts(3);
14443 
14444   format %{ "CALL,runtime" %}
14445   ins_encode( enc_java_to_runtime_call(meth) );
14446   ins_pipe(pipe_class_call);
14447 %}
14448 
14449 // Call Leaf
14450 
14451 // Used by postalloc expand of CallLeafDirect_Ex (mtctr).
14452 instruct CallLeafDirect_mtctr(iRegLdst dst, iRegLsrc src) %{
14453   effect(DEF dst, USE src);
14454 
14455   ins_num_consts(1);
14456 
14457   format %{ "MTCTR   $src" %}
14458   size(4);
14459   ins_encode( enc_leaf_call_mtctr(src) );
14460   ins_pipe(pipe_class_default);
14461 %}
14462 
14463 // Used by postalloc expand of CallLeafDirect_Ex (actual call).
14464 instruct CallLeafDirect(method meth) %{
14465   match(CallLeaf);   // To get the data all the data fields we need ...
14466   effect(USE meth);
14467   predicate(false);  // but never match.
14468 
14469   format %{ "BCTRL     \t// leaf call $meth ==> " %}
14470   size(4);
14471   ins_encode %{
14472     __ bctrl();
14473   %}
14474   ins_pipe(pipe_class_call);
14475 %}
14476 
14477 // postalloc expand of CallLeafDirect.
14478 // Load adress to call from TOC, then bl to it.
14479 instruct CallLeafDirect_Ex(method meth) %{
14480   match(CallLeaf);
14481   effect(USE meth);
14482   ins_cost(CALL_COST);
14483 
14484   // Postalloc_expand_java_to_runtime_call needs up to 3 constants: call target,
14485   // env for callee, C-toc.
14486   ins_num_consts(3);
14487 
14488   format %{ "CALL,runtime leaf $meth \t// postalloc expanded" %}
14489   postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) );
14490 %}
14491 
14492 // Call runtime without safepoint - same as CallLeaf.
14493 // postalloc expand of CallLeafNoFPDirect.
14494 // Load adress to call from TOC, then bl to it.
14495 instruct CallLeafNoFPDirect_Ex(method meth) %{
14496   match(CallLeafNoFP);
14497   effect(USE meth);
14498   ins_cost(CALL_COST);
14499 
14500   // Enc_java_to_runtime_call needs up to 3 constants: call target,
14501   // env for callee, C-toc.
14502   ins_num_consts(3);
14503 
14504   format %{ "CALL,runtime leaf nofp $meth \t// postalloc expanded" %}
14505   postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) );
14506 %}
14507 
14508 // Tail Call; Jump from runtime stub to Java code.
14509 // Also known as an 'interprocedural jump'.
14510 // Target of jump will eventually return to caller.
14511 // TailJump below removes the return address.
14512 instruct TailCalljmpInd(iRegPdstNoScratch jump_target, inline_cache_regP method_ptr) %{
14513   match(TailCall jump_target method_ptr);
14514   ins_cost(CALL_COST);
14515 
14516   format %{ "MTCTR   $jump_target \t// $method_ptr holds method\n\t"
14517             "BCTR         \t// tail call" %}
14518   size(8);
14519   ins_encode %{
14520     __ mtctr($jump_target$$Register);
14521     __ bctr();
14522   %}
14523   ins_pipe(pipe_class_call);
14524 %}
14525 
14526 // Return Instruction
14527 instruct Ret() %{
14528   match(Return);
14529   format %{ "BLR      \t// branch to link register" %}
14530   size(4);
14531   ins_encode %{
14532     // LR is restored in MachEpilogNode. Just do the RET here.
14533     __ blr();
14534   %}
14535   ins_pipe(pipe_class_default);
14536 %}
14537 
14538 // Tail Jump; remove the return address; jump to target.
14539 // TailCall above leaves the return address around.
14540 // TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
14541 // ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
14542 // "restore" before this instruction (in Epilogue), we need to materialize it
14543 // in %i0.
14544 instruct tailjmpInd(iRegPdstNoScratch jump_target, rarg1RegP ex_oop) %{
14545   match(TailJump jump_target ex_oop);
14546   ins_cost(CALL_COST);
14547 
14548   format %{ "LD      R4_ARG2 = LR\n\t"
14549             "MTCTR   $jump_target\n\t"
14550             "BCTR     \t// TailJump, exception oop: $ex_oop" %}
14551   size(12);
14552   ins_encode %{
14553     __ ld(R4_ARG2/* issuing pc */, _abi0(lr), R1_SP);
14554     __ mtctr($jump_target$$Register);
14555     __ bctr();
14556   %}
14557   ins_pipe(pipe_class_call);
14558 %}
14559 
14560 // Create exception oop: created by stack-crawling runtime code.
14561 // Created exception is now available to this handler, and is setup
14562 // just prior to jumping to this handler. No code emitted.
14563 instruct CreateException(rarg1RegP ex_oop) %{
14564   match(Set ex_oop (CreateEx));
14565   ins_cost(0);
14566 
14567   format %{ " -- \t// exception oop; no code emitted" %}
14568   size(0);
14569   ins_encode( /*empty*/ );
14570   ins_pipe(pipe_class_default);
14571 %}
14572 
14573 // Rethrow exception: The exception oop will come in the first
14574 // argument position. Then JUMP (not call) to the rethrow stub code.
14575 instruct RethrowException() %{
14576   match(Rethrow);
14577   ins_cost(CALL_COST);
14578 
14579   format %{ "Jmp     rethrow_stub" %}
14580   ins_encode %{
14581     cbuf.set_insts_mark();
14582     __ b64_patchable((address)OptoRuntime::rethrow_stub(), relocInfo::runtime_call_type);
14583   %}
14584   ins_pipe(pipe_class_call);
14585 %}
14586 
14587 // Die now.
14588 instruct ShouldNotReachHere() %{
14589   match(Halt);
14590   ins_cost(CALL_COST);
14591 
14592   format %{ "ShouldNotReachHere" %}
14593   ins_encode %{
14594     if (is_reachable()) {
14595       __ stop(_halt_reason);
14596     }
14597   %}
14598   ins_pipe(pipe_class_default);
14599 %}
14600 
14601 // This name is KNOWN by the ADLC and cannot be changed.  The ADLC
14602 // forces a 'TypeRawPtr::BOTTOM' output type for this guy.
14603 // Get a DEF on threadRegP, no costs, no encoding, use
14604 // 'ins_should_rematerialize(true)' to avoid spilling.
14605 instruct tlsLoadP(threadRegP dst) %{
14606   match(Set dst (ThreadLocal));
14607   ins_cost(0);
14608 
14609   ins_should_rematerialize(true);
14610 
14611   format %{ " -- \t// $dst=Thread::current(), empty" %}
14612   size(0);
14613   ins_encode( /*empty*/ );
14614   ins_pipe(pipe_class_empty);
14615 %}
14616 
14617 //---Some PPC specific nodes---------------------------------------------------
14618 
14619 // Stop a group.
14620 instruct endGroup() %{
14621   ins_cost(0);
14622 
14623   ins_is_nop(true);
14624 
14625   format %{ "End Bundle (ori r1, r1, 0)" %}
14626   size(4);
14627   ins_encode %{
14628     __ endgroup();
14629   %}
14630   ins_pipe(pipe_class_default);
14631 %}
14632 
14633 // Nop instructions
14634 
14635 instruct fxNop() %{
14636   ins_cost(0);
14637 
14638   ins_is_nop(true);
14639 
14640   format %{ "fxNop" %}
14641   size(4);
14642   ins_encode %{
14643     __ nop();
14644   %}
14645   ins_pipe(pipe_class_default);
14646 %}
14647 
14648 instruct fpNop0() %{
14649   ins_cost(0);
14650 
14651   ins_is_nop(true);
14652 
14653   format %{ "fpNop0" %}
14654   size(4);
14655   ins_encode %{
14656     __ fpnop0();
14657   %}
14658   ins_pipe(pipe_class_default);
14659 %}
14660 
14661 instruct fpNop1() %{
14662   ins_cost(0);
14663 
14664   ins_is_nop(true);
14665 
14666   format %{ "fpNop1" %}
14667   size(4);
14668   ins_encode %{
14669     __ fpnop1();
14670   %}
14671   ins_pipe(pipe_class_default);
14672 %}
14673 
14674 instruct brNop0() %{
14675   ins_cost(0);
14676   size(4);
14677   format %{ "brNop0" %}
14678   ins_encode %{
14679     __ brnop0();
14680   %}
14681   ins_is_nop(true);
14682   ins_pipe(pipe_class_default);
14683 %}
14684 
14685 instruct brNop1() %{
14686   ins_cost(0);
14687 
14688   ins_is_nop(true);
14689 
14690   format %{ "brNop1" %}
14691   size(4);
14692   ins_encode %{
14693     __ brnop1();
14694   %}
14695   ins_pipe(pipe_class_default);
14696 %}
14697 
14698 instruct brNop2() %{
14699   ins_cost(0);
14700 
14701   ins_is_nop(true);
14702 
14703   format %{ "brNop2" %}
14704   size(4);
14705   ins_encode %{
14706     __ brnop2();
14707   %}
14708   ins_pipe(pipe_class_default);
14709 %}
14710 
14711 instruct cacheWB(indirect addr)
14712 %{
14713   match(CacheWB addr);
14714 
14715   ins_cost(100);
14716   format %{ "cache writeback, address = $addr" %}
14717   ins_encode %{
14718     assert($addr->index_position() < 0, "should be");
14719     assert($addr$$disp == 0, "should be");
14720     __ cache_wb(Address($addr$$base$$Register));
14721   %}
14722   ins_pipe(pipe_class_default);
14723 %}
14724 
14725 instruct cacheWBPreSync()
14726 %{
14727   match(CacheWBPreSync);
14728 
14729   ins_cost(0);
14730   format %{ "cache writeback presync" %}
14731   ins_encode %{
14732     __ cache_wbsync(true);
14733   %}
14734   ins_pipe(pipe_class_default);
14735 %}
14736 
14737 instruct cacheWBPostSync()
14738 %{
14739   match(CacheWBPostSync);
14740 
14741   ins_cost(100);
14742   format %{ "cache writeback postsync" %}
14743   ins_encode %{
14744     __ cache_wbsync(false);
14745   %}
14746   ins_pipe(pipe_class_default);
14747 %}
14748 
14749 //----------PEEPHOLE RULES-----------------------------------------------------
14750 // These must follow all instruction definitions as they use the names
14751 // defined in the instructions definitions.
14752 //
14753 // peepmatch ( root_instr_name [preceeding_instruction]* );
14754 //
14755 // peepconstraint %{
14756 // (instruction_number.operand_name relational_op instruction_number.operand_name
14757 //  [, ...] );
14758 // // instruction numbers are zero-based using left to right order in peepmatch
14759 //
14760 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
14761 // // provide an instruction_number.operand_name for each operand that appears
14762 // // in the replacement instruction's match rule
14763 //
14764 // ---------VM FLAGS---------------------------------------------------------
14765 //
14766 // All peephole optimizations can be turned off using -XX:-OptoPeephole
14767 //
14768 // Each peephole rule is given an identifying number starting with zero and
14769 // increasing by one in the order seen by the parser. An individual peephole
14770 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
14771 // on the command-line.
14772 //
14773 // ---------CURRENT LIMITATIONS----------------------------------------------
14774 //
14775 // Only match adjacent instructions in same basic block
14776 // Only equality constraints
14777 // Only constraints between operands, not (0.dest_reg == EAX_enc)
14778 // Only one replacement instruction
14779 //
14780 // ---------EXAMPLE----------------------------------------------------------
14781 //
14782 // // pertinent parts of existing instructions in architecture description
14783 // instruct movI(eRegI dst, eRegI src) %{
14784 //   match(Set dst (CopyI src));
14785 // %}
14786 //
14787 // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
14788 //   match(Set dst (AddI dst src));
14789 //   effect(KILL cr);
14790 // %}
14791 //
14792 // // Change (inc mov) to lea
14793 // peephole %{
14794 //   // increment preceeded by register-register move
14795 //   peepmatch ( incI_eReg movI );
14796 //   // require that the destination register of the increment
14797 //   // match the destination register of the move
14798 //   peepconstraint ( 0.dst == 1.dst );
14799 //   // construct a replacement instruction that sets
14800 //   // the destination to ( move's source register + one )
14801 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14802 // %}
14803 //
14804 // Implementation no longer uses movX instructions since
14805 // machine-independent system no longer uses CopyX nodes.
14806 //
14807 // peephole %{
14808 //   peepmatch ( incI_eReg movI );
14809 //   peepconstraint ( 0.dst == 1.dst );
14810 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14811 // %}
14812 //
14813 // peephole %{
14814 //   peepmatch ( decI_eReg movI );
14815 //   peepconstraint ( 0.dst == 1.dst );
14816 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14817 // %}
14818 //
14819 // peephole %{
14820 //   peepmatch ( addI_eReg_imm movI );
14821 //   peepconstraint ( 0.dst == 1.dst );
14822 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14823 // %}
14824 //
14825 // peephole %{
14826 //   peepmatch ( addP_eReg_imm movP );
14827 //   peepconstraint ( 0.dst == 1.dst );
14828 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
14829 // %}
14830 
14831 // // Change load of spilled value to only a spill
14832 // instruct storeI(memory mem, eRegI src) %{
14833 //   match(Set mem (StoreI mem src));
14834 // %}
14835 //
14836 // instruct loadI(eRegI dst, memory mem) %{
14837 //   match(Set dst (LoadI mem));
14838 // %}
14839 //
14840 peephole %{
14841   peepmatch ( loadI storeI );
14842   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14843   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14844 %}
14845 
14846 peephole %{
14847   peepmatch ( loadL storeL );
14848   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14849   peepreplace ( storeL( 1.mem 1.mem 1.src ) );
14850 %}
14851 
14852 peephole %{
14853   peepmatch ( loadP storeP );
14854   peepconstraint ( 1.src == 0.dst, 1.dst == 0.mem );
14855   peepreplace ( storeP( 1.dst 1.dst 1.src ) );
14856 %}
14857 
14858 //----------SMARTSPILL RULES---------------------------------------------------
14859 // These must follow all instruction definitions as they use the names
14860 // defined in the instructions definitions.