1 //
    2 // Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved.
    3 // Copyright (c) 2012, 2021 SAP SE. All rights reserved.
    4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    5 //
    6 // This code is free software; you can redistribute it and/or modify it
    7 // under the terms of the GNU General Public License version 2 only, as
    8 // published by the Free Software Foundation.
    9 //
   10 // This code is distributed in the hope that it will be useful, but WITHOUT
   11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   13 // version 2 for more details (a copy is included in the LICENSE file that
   14 // accompanied this code).
   15 //
   16 // You should have received a copy of the GNU General Public License version
   17 // 2 along with this work; if not, write to the Free Software Foundation,
   18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   19 //
   20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   21 // or visit www.oracle.com if you need additional information or have any
   22 // questions.
   23 //
   24 //
   25 
   26 //
   27 // PPC64 Architecture Description File
   28 //
   29 
   30 //----------REGISTER DEFINITION BLOCK------------------------------------------
   31 // This information is used by the matcher and the register allocator to
   32 // describe individual registers and classes of registers within the target
   33 // architecture.
   34 register %{
   35 //----------Architecture Description Register Definitions----------------------
   36 // General Registers
   37 // "reg_def"  name (register save type, C convention save type,
   38 //                  ideal register type, encoding);
   39 //
   40 // Register Save Types:
   41 //
   42 //   NS  = No-Save:     The register allocator assumes that these registers
   43 //                      can be used without saving upon entry to the method, &
   44 //                      that they do not need to be saved at call sites.
   45 //
   46 //   SOC = Save-On-Call: The register allocator assumes that these registers
   47 //                      can be used without saving upon entry to the method,
   48 //                      but that they must be saved at call sites.
   49 //                      These are called "volatiles" on ppc.
   50 //
   51 //   SOE = Save-On-Entry: The register allocator assumes that these registers
   52 //                      must be saved before using them upon entry to the
   53 //                      method, but they do not need to be saved at call
   54 //                      sites.
   55 //                      These are called "nonvolatiles" on ppc.
   56 //
   57 //   AS  = Always-Save:   The register allocator assumes that these registers
   58 //                      must be saved before using them upon entry to the
   59 //                      method, & that they must be saved at call sites.
   60 //
   61 // Ideal Register Type is used to determine how to save & restore a
   62 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   63 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
   64 //
   65 // The encoding number is the actual bit-pattern placed into the opcodes.
   66 //
   67 // PPC64 register definitions, based on the 64-bit PowerPC ELF ABI
   68 // Supplement Version 1.7 as of 2003-10-29.
   69 //
   70 // For each 64-bit register we must define two registers: the register
   71 // itself, e.g. R3, and a corresponding virtual other (32-bit-)'half',
   72 // e.g. R3_H, which is needed by the allocator, but is not used
   73 // for stores, loads, etc.
   74 
   75 // ----------------------------
   76 // Integer/Long Registers
   77 // ----------------------------
   78 
   79   // PPC64 has 32 64-bit integer registers.
   80 
   81   // types: v = volatile, nv = non-volatile, s = system
   82   reg_def R0   ( SOC, SOC, Op_RegI,  0, R0->as_VMReg()         );  // v   used in prologs
   83   reg_def R0_H ( SOC, SOC, Op_RegI, 99, R0->as_VMReg()->next() );
   84   reg_def R1   ( NS,  NS,  Op_RegI,  1, R1->as_VMReg()         );  // s   SP
   85   reg_def R1_H ( NS,  NS,  Op_RegI, 99, R1->as_VMReg()->next() );
   86   reg_def R2   ( SOC, SOC, Op_RegI,  2, R2->as_VMReg()         );  // v   TOC
   87   reg_def R2_H ( SOC, SOC, Op_RegI, 99, R2->as_VMReg()->next() );
   88   reg_def R3   ( SOC, SOC, Op_RegI,  3, R3->as_VMReg()         );  // v   iarg1 & iret
   89   reg_def R3_H ( SOC, SOC, Op_RegI, 99, R3->as_VMReg()->next() );
   90   reg_def R4   ( SOC, SOC, Op_RegI,  4, R4->as_VMReg()         );  //     iarg2
   91   reg_def R4_H ( SOC, SOC, Op_RegI, 99, R4->as_VMReg()->next() );
   92   reg_def R5   ( SOC, SOC, Op_RegI,  5, R5->as_VMReg()         );  // v   iarg3
   93   reg_def R5_H ( SOC, SOC, Op_RegI, 99, R5->as_VMReg()->next() );
   94   reg_def R6   ( SOC, SOC, Op_RegI,  6, R6->as_VMReg()         );  // v   iarg4
   95   reg_def R6_H ( SOC, SOC, Op_RegI, 99, R6->as_VMReg()->next() );
   96   reg_def R7   ( SOC, SOC, Op_RegI,  7, R7->as_VMReg()         );  // v   iarg5
   97   reg_def R7_H ( SOC, SOC, Op_RegI, 99, R7->as_VMReg()->next() );
   98   reg_def R8   ( SOC, SOC, Op_RegI,  8, R8->as_VMReg()         );  // v   iarg6
   99   reg_def R8_H ( SOC, SOC, Op_RegI, 99, R8->as_VMReg()->next() );
  100   reg_def R9   ( SOC, SOC, Op_RegI,  9, R9->as_VMReg()         );  // v   iarg7
  101   reg_def R9_H ( SOC, SOC, Op_RegI, 99, R9->as_VMReg()->next() );
  102   reg_def R10  ( SOC, SOC, Op_RegI, 10, R10->as_VMReg()        );  // v   iarg8
  103   reg_def R10_H( SOC, SOC, Op_RegI, 99, R10->as_VMReg()->next());
  104   reg_def R11  ( SOC, SOC, Op_RegI, 11, R11->as_VMReg()        );  // v   ENV / scratch
  105   reg_def R11_H( SOC, SOC, Op_RegI, 99, R11->as_VMReg()->next());
  106   reg_def R12  ( SOC, SOC, Op_RegI, 12, R12->as_VMReg()        );  // v   scratch
  107   reg_def R12_H( SOC, SOC, Op_RegI, 99, R12->as_VMReg()->next());
  108   reg_def R13  ( NS,  NS,  Op_RegI, 13, R13->as_VMReg()        );  // s   system thread id
  109   reg_def R13_H( NS,  NS,  Op_RegI, 99, R13->as_VMReg()->next());
  110   reg_def R14  ( SOC, SOE, Op_RegI, 14, R14->as_VMReg()        );  // nv
  111   reg_def R14_H( SOC, SOE, Op_RegI, 99, R14->as_VMReg()->next());
  112   reg_def R15  ( SOC, SOE, Op_RegI, 15, R15->as_VMReg()        );  // nv
  113   reg_def R15_H( SOC, SOE, Op_RegI, 99, R15->as_VMReg()->next());
  114   reg_def R16  ( SOC, SOE, Op_RegI, 16, R16->as_VMReg()        );  // nv
  115   reg_def R16_H( SOC, SOE, Op_RegI, 99, R16->as_VMReg()->next());
  116   reg_def R17  ( SOC, SOE, Op_RegI, 17, R17->as_VMReg()        );  // nv
  117   reg_def R17_H( SOC, SOE, Op_RegI, 99, R17->as_VMReg()->next());
  118   reg_def R18  ( SOC, SOE, Op_RegI, 18, R18->as_VMReg()        );  // nv
  119   reg_def R18_H( SOC, SOE, Op_RegI, 99, R18->as_VMReg()->next());
  120   reg_def R19  ( SOC, SOE, Op_RegI, 19, R19->as_VMReg()        );  // nv
  121   reg_def R19_H( SOC, SOE, Op_RegI, 99, R19->as_VMReg()->next());
  122   reg_def R20  ( SOC, SOE, Op_RegI, 20, R20->as_VMReg()        );  // nv
  123   reg_def R20_H( SOC, SOE, Op_RegI, 99, R20->as_VMReg()->next());
  124   reg_def R21  ( SOC, SOE, Op_RegI, 21, R21->as_VMReg()        );  // nv
  125   reg_def R21_H( SOC, SOE, Op_RegI, 99, R21->as_VMReg()->next());
  126   reg_def R22  ( SOC, SOE, Op_RegI, 22, R22->as_VMReg()        );  // nv
  127   reg_def R22_H( SOC, SOE, Op_RegI, 99, R22->as_VMReg()->next());
  128   reg_def R23  ( SOC, SOE, Op_RegI, 23, R23->as_VMReg()        );  // nv
  129   reg_def R23_H( SOC, SOE, Op_RegI, 99, R23->as_VMReg()->next());
  130   reg_def R24  ( SOC, SOE, Op_RegI, 24, R24->as_VMReg()        );  // nv
  131   reg_def R24_H( SOC, SOE, Op_RegI, 99, R24->as_VMReg()->next());
  132   reg_def R25  ( SOC, SOE, Op_RegI, 25, R25->as_VMReg()        );  // nv
  133   reg_def R25_H( SOC, SOE, Op_RegI, 99, R25->as_VMReg()->next());
  134   reg_def R26  ( SOC, SOE, Op_RegI, 26, R26->as_VMReg()        );  // nv
  135   reg_def R26_H( SOC, SOE, Op_RegI, 99, R26->as_VMReg()->next());
  136   reg_def R27  ( SOC, SOE, Op_RegI, 27, R27->as_VMReg()        );  // nv
  137   reg_def R27_H( SOC, SOE, Op_RegI, 99, R27->as_VMReg()->next());
  138   reg_def R28  ( SOC, SOE, Op_RegI, 28, R28->as_VMReg()        );  // nv
  139   reg_def R28_H( SOC, SOE, Op_RegI, 99, R28->as_VMReg()->next());
  140   reg_def R29  ( SOC, SOE, Op_RegI, 29, R29->as_VMReg()        );  // nv
  141   reg_def R29_H( SOC, SOE, Op_RegI, 99, R29->as_VMReg()->next());
  142   reg_def R30  ( SOC, SOE, Op_RegI, 30, R30->as_VMReg()        );  // nv
  143   reg_def R30_H( SOC, SOE, Op_RegI, 99, R30->as_VMReg()->next());
  144   reg_def R31  ( SOC, SOE, Op_RegI, 31, R31->as_VMReg()        );  // nv
  145   reg_def R31_H( SOC, SOE, Op_RegI, 99, R31->as_VMReg()->next());
  146 
  147 
  148 // ----------------------------
  149 // Float/Double Registers
  150 // ----------------------------
  151 
  152   // Double Registers
  153   // The rules of ADL require that double registers be defined in pairs.
  154   // Each pair must be two 32-bit values, but not necessarily a pair of
  155   // single float registers. In each pair, ADLC-assigned register numbers
  156   // must be adjacent, with the lower number even. Finally, when the
  157   // CPU stores such a register pair to memory, the word associated with
  158   // the lower ADLC-assigned number must be stored to the lower address.
  159 
  160   // PPC64 has 32 64-bit floating-point registers. Each can store a single
  161   // or double precision floating-point value.
  162 
  163   // types: v = volatile, nv = non-volatile, s = system
  164   reg_def F0   ( SOC, SOC, Op_RegF,  0, F0->as_VMReg()         );  // v   scratch
  165   reg_def F0_H ( SOC, SOC, Op_RegF, 99, F0->as_VMReg()->next() );
  166   reg_def F1   ( SOC, SOC, Op_RegF,  1, F1->as_VMReg()         );  // v   farg1 & fret
  167   reg_def F1_H ( SOC, SOC, Op_RegF, 99, F1->as_VMReg()->next() );
  168   reg_def F2   ( SOC, SOC, Op_RegF,  2, F2->as_VMReg()         );  // v   farg2
  169   reg_def F2_H ( SOC, SOC, Op_RegF, 99, F2->as_VMReg()->next() );
  170   reg_def F3   ( SOC, SOC, Op_RegF,  3, F3->as_VMReg()         );  // v   farg3
  171   reg_def F3_H ( SOC, SOC, Op_RegF, 99, F3->as_VMReg()->next() );
  172   reg_def F4   ( SOC, SOC, Op_RegF,  4, F4->as_VMReg()         );  // v   farg4
  173   reg_def F4_H ( SOC, SOC, Op_RegF, 99, F4->as_VMReg()->next() );
  174   reg_def F5   ( SOC, SOC, Op_RegF,  5, F5->as_VMReg()         );  // v   farg5
  175   reg_def F5_H ( SOC, SOC, Op_RegF, 99, F5->as_VMReg()->next() );
  176   reg_def F6   ( SOC, SOC, Op_RegF,  6, F6->as_VMReg()         );  // v   farg6
  177   reg_def F6_H ( SOC, SOC, Op_RegF, 99, F6->as_VMReg()->next() );
  178   reg_def F7   ( SOC, SOC, Op_RegF,  7, F7->as_VMReg()         );  // v   farg7
  179   reg_def F7_H ( SOC, SOC, Op_RegF, 99, F7->as_VMReg()->next() );
  180   reg_def F8   ( SOC, SOC, Op_RegF,  8, F8->as_VMReg()         );  // v   farg8
  181   reg_def F8_H ( SOC, SOC, Op_RegF, 99, F8->as_VMReg()->next() );
  182   reg_def F9   ( SOC, SOC, Op_RegF,  9, F9->as_VMReg()         );  // v   farg9
  183   reg_def F9_H ( SOC, SOC, Op_RegF, 99, F9->as_VMReg()->next() );
  184   reg_def F10  ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()        );  // v   farg10
  185   reg_def F10_H( SOC, SOC, Op_RegF, 99, F10->as_VMReg()->next());
  186   reg_def F11  ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()        );  // v   farg11
  187   reg_def F11_H( SOC, SOC, Op_RegF, 99, F11->as_VMReg()->next());
  188   reg_def F12  ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()        );  // v   farg12
  189   reg_def F12_H( SOC, SOC, Op_RegF, 99, F12->as_VMReg()->next());
  190   reg_def F13  ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()        );  // v   farg13
  191   reg_def F13_H( SOC, SOC, Op_RegF, 99, F13->as_VMReg()->next());
  192   reg_def F14  ( SOC, SOE, Op_RegF, 14, F14->as_VMReg()        );  // nv
  193   reg_def F14_H( SOC, SOE, Op_RegF, 99, F14->as_VMReg()->next());
  194   reg_def F15  ( SOC, SOE, Op_RegF, 15, F15->as_VMReg()        );  // nv
  195   reg_def F15_H( SOC, SOE, Op_RegF, 99, F15->as_VMReg()->next());
  196   reg_def F16  ( SOC, SOE, Op_RegF, 16, F16->as_VMReg()        );  // nv
  197   reg_def F16_H( SOC, SOE, Op_RegF, 99, F16->as_VMReg()->next());
  198   reg_def F17  ( SOC, SOE, Op_RegF, 17, F17->as_VMReg()        );  // nv
  199   reg_def F17_H( SOC, SOE, Op_RegF, 99, F17->as_VMReg()->next());
  200   reg_def F18  ( SOC, SOE, Op_RegF, 18, F18->as_VMReg()        );  // nv
  201   reg_def F18_H( SOC, SOE, Op_RegF, 99, F18->as_VMReg()->next());
  202   reg_def F19  ( SOC, SOE, Op_RegF, 19, F19->as_VMReg()        );  // nv
  203   reg_def F19_H( SOC, SOE, Op_RegF, 99, F19->as_VMReg()->next());
  204   reg_def F20  ( SOC, SOE, Op_RegF, 20, F20->as_VMReg()        );  // nv
  205   reg_def F20_H( SOC, SOE, Op_RegF, 99, F20->as_VMReg()->next());
  206   reg_def F21  ( SOC, SOE, Op_RegF, 21, F21->as_VMReg()        );  // nv
  207   reg_def F21_H( SOC, SOE, Op_RegF, 99, F21->as_VMReg()->next());
  208   reg_def F22  ( SOC, SOE, Op_RegF, 22, F22->as_VMReg()        );  // nv
  209   reg_def F22_H( SOC, SOE, Op_RegF, 99, F22->as_VMReg()->next());
  210   reg_def F23  ( SOC, SOE, Op_RegF, 23, F23->as_VMReg()        );  // nv
  211   reg_def F23_H( SOC, SOE, Op_RegF, 99, F23->as_VMReg()->next());
  212   reg_def F24  ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()        );  // nv
  213   reg_def F24_H( SOC, SOE, Op_RegF, 99, F24->as_VMReg()->next());
  214   reg_def F25  ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()        );  // nv
  215   reg_def F25_H( SOC, SOE, Op_RegF, 99, F25->as_VMReg()->next());
  216   reg_def F26  ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()        );  // nv
  217   reg_def F26_H( SOC, SOE, Op_RegF, 99, F26->as_VMReg()->next());
  218   reg_def F27  ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()        );  // nv
  219   reg_def F27_H( SOC, SOE, Op_RegF, 99, F27->as_VMReg()->next());
  220   reg_def F28  ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()        );  // nv
  221   reg_def F28_H( SOC, SOE, Op_RegF, 99, F28->as_VMReg()->next());
  222   reg_def F29  ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()        );  // nv
  223   reg_def F29_H( SOC, SOE, Op_RegF, 99, F29->as_VMReg()->next());
  224   reg_def F30  ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()        );  // nv
  225   reg_def F30_H( SOC, SOE, Op_RegF, 99, F30->as_VMReg()->next());
  226   reg_def F31  ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()        );  // nv
  227   reg_def F31_H( SOC, SOE, Op_RegF, 99, F31->as_VMReg()->next());
  228 
  229 // ----------------------------
  230 // Special Registers
  231 // ----------------------------
  232 
  233 // Condition Codes Flag Registers
  234 
  235   // PPC64 has 8 condition code "registers" which are all contained
  236   // in the CR register.
  237 
  238   // types: v = volatile, nv = non-volatile, s = system
  239   reg_def CCR0(SOC, SOC, Op_RegFlags, 0, CCR0->as_VMReg());  // v
  240   reg_def CCR1(SOC, SOC, Op_RegFlags, 1, CCR1->as_VMReg());  // v
  241   reg_def CCR2(SOC, SOC, Op_RegFlags, 2, CCR2->as_VMReg());  // nv
  242   reg_def CCR3(SOC, SOC, Op_RegFlags, 3, CCR3->as_VMReg());  // nv
  243   reg_def CCR4(SOC, SOC, Op_RegFlags, 4, CCR4->as_VMReg());  // nv
  244   reg_def CCR5(SOC, SOC, Op_RegFlags, 5, CCR5->as_VMReg());  // v
  245   reg_def CCR6(SOC, SOC, Op_RegFlags, 6, CCR6->as_VMReg());  // v
  246   reg_def CCR7(SOC, SOC, Op_RegFlags, 7, CCR7->as_VMReg());  // v
  247 
  248   // Special registers of PPC64
  249 
  250   reg_def SR_XER(    SOC, SOC, Op_RegP, 0, SR_XER->as_VMReg());     // v
  251   reg_def SR_LR(     SOC, SOC, Op_RegP, 1, SR_LR->as_VMReg());      // v
  252   reg_def SR_CTR(    SOC, SOC, Op_RegP, 2, SR_CTR->as_VMReg());     // v
  253   reg_def SR_VRSAVE( SOC, SOC, Op_RegP, 3, SR_VRSAVE->as_VMReg());  // v
  254   reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v
  255   reg_def SR_PPR(    SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg());     // v
  256 
  257 // ----------------------------
  258 // Vector-Scalar Registers
  259 // ----------------------------
  260   reg_def VSR0 ( SOC, SOC, Op_VecX, 0, NULL);
  261   reg_def VSR1 ( SOC, SOC, Op_VecX, 1, NULL);
  262   reg_def VSR2 ( SOC, SOC, Op_VecX, 2, NULL);
  263   reg_def VSR3 ( SOC, SOC, Op_VecX, 3, NULL);
  264   reg_def VSR4 ( SOC, SOC, Op_VecX, 4, NULL);
  265   reg_def VSR5 ( SOC, SOC, Op_VecX, 5, NULL);
  266   reg_def VSR6 ( SOC, SOC, Op_VecX, 6, NULL);
  267   reg_def VSR7 ( SOC, SOC, Op_VecX, 7, NULL);
  268   reg_def VSR8 ( SOC, SOC, Op_VecX, 8, NULL);
  269   reg_def VSR9 ( SOC, SOC, Op_VecX, 9, NULL);
  270   reg_def VSR10 ( SOC, SOC, Op_VecX, 10, NULL);
  271   reg_def VSR11 ( SOC, SOC, Op_VecX, 11, NULL);
  272   reg_def VSR12 ( SOC, SOC, Op_VecX, 12, NULL);
  273   reg_def VSR13 ( SOC, SOC, Op_VecX, 13, NULL);
  274   reg_def VSR14 ( SOC, SOC, Op_VecX, 14, NULL);
  275   reg_def VSR15 ( SOC, SOC, Op_VecX, 15, NULL);
  276   reg_def VSR16 ( SOC, SOC, Op_VecX, 16, NULL);
  277   reg_def VSR17 ( SOC, SOC, Op_VecX, 17, NULL);
  278   reg_def VSR18 ( SOC, SOC, Op_VecX, 18, NULL);
  279   reg_def VSR19 ( SOC, SOC, Op_VecX, 19, NULL);
  280   reg_def VSR20 ( SOC, SOC, Op_VecX, 20, NULL);
  281   reg_def VSR21 ( SOC, SOC, Op_VecX, 21, NULL);
  282   reg_def VSR22 ( SOC, SOC, Op_VecX, 22, NULL);
  283   reg_def VSR23 ( SOC, SOC, Op_VecX, 23, NULL);
  284   reg_def VSR24 ( SOC, SOC, Op_VecX, 24, NULL);
  285   reg_def VSR25 ( SOC, SOC, Op_VecX, 25, NULL);
  286   reg_def VSR26 ( SOC, SOC, Op_VecX, 26, NULL);
  287   reg_def VSR27 ( SOC, SOC, Op_VecX, 27, NULL);
  288   reg_def VSR28 ( SOC, SOC, Op_VecX, 28, NULL);
  289   reg_def VSR29 ( SOC, SOC, Op_VecX, 29, NULL);
  290   reg_def VSR30 ( SOC, SOC, Op_VecX, 30, NULL);
  291   reg_def VSR31 ( SOC, SOC, Op_VecX, 31, NULL);
  292   reg_def VSR32 ( SOC, SOC, Op_VecX, 32, NULL);
  293   reg_def VSR33 ( SOC, SOC, Op_VecX, 33, NULL);
  294   reg_def VSR34 ( SOC, SOC, Op_VecX, 34, NULL);
  295   reg_def VSR35 ( SOC, SOC, Op_VecX, 35, NULL);
  296   reg_def VSR36 ( SOC, SOC, Op_VecX, 36, NULL);
  297   reg_def VSR37 ( SOC, SOC, Op_VecX, 37, NULL);
  298   reg_def VSR38 ( SOC, SOC, Op_VecX, 38, NULL);
  299   reg_def VSR39 ( SOC, SOC, Op_VecX, 39, NULL);
  300   reg_def VSR40 ( SOC, SOC, Op_VecX, 40, NULL);
  301   reg_def VSR41 ( SOC, SOC, Op_VecX, 41, NULL);
  302   reg_def VSR42 ( SOC, SOC, Op_VecX, 42, NULL);
  303   reg_def VSR43 ( SOC, SOC, Op_VecX, 43, NULL);
  304   reg_def VSR44 ( SOC, SOC, Op_VecX, 44, NULL);
  305   reg_def VSR45 ( SOC, SOC, Op_VecX, 45, NULL);
  306   reg_def VSR46 ( SOC, SOC, Op_VecX, 46, NULL);
  307   reg_def VSR47 ( SOC, SOC, Op_VecX, 47, NULL);
  308   reg_def VSR48 ( SOC, SOC, Op_VecX, 48, NULL);
  309   reg_def VSR49 ( SOC, SOC, Op_VecX, 49, NULL);
  310   reg_def VSR50 ( SOC, SOC, Op_VecX, 50, NULL);
  311   reg_def VSR51 ( SOC, SOC, Op_VecX, 51, NULL);
  312   reg_def VSR52 ( SOC, SOC, Op_VecX, 52, NULL);
  313   reg_def VSR53 ( SOC, SOC, Op_VecX, 53, NULL);
  314   reg_def VSR54 ( SOC, SOC, Op_VecX, 54, NULL);
  315   reg_def VSR55 ( SOC, SOC, Op_VecX, 55, NULL);
  316   reg_def VSR56 ( SOC, SOC, Op_VecX, 56, NULL);
  317   reg_def VSR57 ( SOC, SOC, Op_VecX, 57, NULL);
  318   reg_def VSR58 ( SOC, SOC, Op_VecX, 58, NULL);
  319   reg_def VSR59 ( SOC, SOC, Op_VecX, 59, NULL);
  320   reg_def VSR60 ( SOC, SOC, Op_VecX, 60, NULL);
  321   reg_def VSR61 ( SOC, SOC, Op_VecX, 61, NULL);
  322   reg_def VSR62 ( SOC, SOC, Op_VecX, 62, NULL);
  323   reg_def VSR63 ( SOC, SOC, Op_VecX, 63, NULL);
  324 
  325 // ----------------------------
  326 // Specify priority of register selection within phases of register
  327 // allocation. Highest priority is first. A useful heuristic is to
  328 // give registers a low priority when they are required by machine
  329 // instructions, like EAX and EDX on I486, and choose no-save registers
  330 // before save-on-call, & save-on-call before save-on-entry. Registers
  331 // which participate in fixed calling sequences should come last.
  332 // Registers which are used as pairs must fall on an even boundary.
  333 
  334 // It's worth about 1% on SPEC geomean to get this right.
  335 
  336 // Chunk0, chunk1, and chunk2 form the MachRegisterNumbers enumeration
  337 // in adGlobals_ppc.hpp which defines the <register>_num values, e.g.
  338 // R3_num. Therefore, R3_num may not be (and in reality is not)
  339 // the same as R3->encoding()! Furthermore, we cannot make any
  340 // assumptions on ordering, e.g. R3_num may be less than R2_num.
  341 // Additionally, the function
  342 //   static enum RC rc_class(OptoReg::Name reg )
  343 // maps a given <register>_num value to its chunk type (except for flags)
  344 // and its current implementation relies on chunk0 and chunk1 having a
  345 // size of 64 each.
  346 
  347 // If you change this allocation class, please have a look at the
  348 // default values for the parameters RoundRobinIntegerRegIntervalStart
  349 // and RoundRobinFloatRegIntervalStart
  350 
  351 alloc_class chunk0 (
  352   // Chunk0 contains *all* 64 integer registers halves.
  353 
  354   // "non-volatile" registers
  355   R14, R14_H,
  356   R15, R15_H,
  357   R17, R17_H,
  358   R18, R18_H,
  359   R19, R19_H,
  360   R20, R20_H,
  361   R21, R21_H,
  362   R22, R22_H,
  363   R23, R23_H,
  364   R24, R24_H,
  365   R25, R25_H,
  366   R26, R26_H,
  367   R27, R27_H,
  368   R28, R28_H,
  369   R29, R29_H,
  370   R30, R30_H,
  371   R31, R31_H,
  372 
  373   // scratch/special registers
  374   R11, R11_H,
  375   R12, R12_H,
  376 
  377   // argument registers
  378   R10, R10_H,
  379   R9,  R9_H,
  380   R8,  R8_H,
  381   R7,  R7_H,
  382   R6,  R6_H,
  383   R5,  R5_H,
  384   R4,  R4_H,
  385   R3,  R3_H,
  386 
  387   // special registers, not available for allocation
  388   R16, R16_H,     // R16_thread
  389   R13, R13_H,     // system thread id
  390   R2,  R2_H,      // may be used for TOC
  391   R1,  R1_H,      // SP
  392   R0,  R0_H       // R0 (scratch)
  393 );
  394 
  395 // If you change this allocation class, please have a look at the
  396 // default values for the parameters RoundRobinIntegerRegIntervalStart
  397 // and RoundRobinFloatRegIntervalStart
  398 
  399 alloc_class chunk1 (
  400   // Chunk1 contains *all* 64 floating-point registers halves.
  401 
  402   // scratch register
  403   F0,  F0_H,
  404 
  405   // argument registers
  406   F13, F13_H,
  407   F12, F12_H,
  408   F11, F11_H,
  409   F10, F10_H,
  410   F9,  F9_H,
  411   F8,  F8_H,
  412   F7,  F7_H,
  413   F6,  F6_H,
  414   F5,  F5_H,
  415   F4,  F4_H,
  416   F3,  F3_H,
  417   F2,  F2_H,
  418   F1,  F1_H,
  419 
  420   // non-volatile registers
  421   F14, F14_H,
  422   F15, F15_H,
  423   F16, F16_H,
  424   F17, F17_H,
  425   F18, F18_H,
  426   F19, F19_H,
  427   F20, F20_H,
  428   F21, F21_H,
  429   F22, F22_H,
  430   F23, F23_H,
  431   F24, F24_H,
  432   F25, F25_H,
  433   F26, F26_H,
  434   F27, F27_H,
  435   F28, F28_H,
  436   F29, F29_H,
  437   F30, F30_H,
  438   F31, F31_H
  439 );
  440 
  441 alloc_class chunk2 (
  442   // Chunk2 contains *all* 8 condition code registers.
  443 
  444   CCR0,
  445   CCR1,
  446   CCR2,
  447   CCR3,
  448   CCR4,
  449   CCR5,
  450   CCR6,
  451   CCR7
  452 );
  453 
  454 alloc_class chunk3 (
  455   VSR0,
  456   VSR1,
  457   VSR2,
  458   VSR3,
  459   VSR4,
  460   VSR5,
  461   VSR6,
  462   VSR7,
  463   VSR8,
  464   VSR9,
  465   VSR10,
  466   VSR11,
  467   VSR12,
  468   VSR13,
  469   VSR14,
  470   VSR15,
  471   VSR16,
  472   VSR17,
  473   VSR18,
  474   VSR19,
  475   VSR20,
  476   VSR21,
  477   VSR22,
  478   VSR23,
  479   VSR24,
  480   VSR25,
  481   VSR26,
  482   VSR27,
  483   VSR28,
  484   VSR29,
  485   VSR30,
  486   VSR31,
  487   VSR32,
  488   VSR33,
  489   VSR34,
  490   VSR35,
  491   VSR36,
  492   VSR37,
  493   VSR38,
  494   VSR39,
  495   VSR40,
  496   VSR41,
  497   VSR42,
  498   VSR43,
  499   VSR44,
  500   VSR45,
  501   VSR46,
  502   VSR47,
  503   VSR48,
  504   VSR49,
  505   VSR50,
  506   VSR51,
  507   VSR52,
  508   VSR53,
  509   VSR54,
  510   VSR55,
  511   VSR56,
  512   VSR57,
  513   VSR58,
  514   VSR59,
  515   VSR60,
  516   VSR61,
  517   VSR62,
  518   VSR63
  519 );
  520 
  521 alloc_class chunk4 (
  522   // special registers
  523   // These registers are not allocated, but used for nodes generated by postalloc expand.
  524   SR_XER,
  525   SR_LR,
  526   SR_CTR,
  527   SR_VRSAVE,
  528   SR_SPEFSCR,
  529   SR_PPR
  530 );
  531 
  532 //-------Architecture Description Register Classes-----------------------
  533 
  534 // Several register classes are automatically defined based upon
  535 // information in this architecture description.
  536 
  537 // 1) reg_class inline_cache_reg           ( as defined in frame section )
  538 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  539 //
  540 
  541 // ----------------------------
  542 // 32 Bit Register Classes
  543 // ----------------------------
  544 
  545 // We specify registers twice, once as read/write, and once read-only.
  546 // We use the read-only registers for source operands. With this, we
  547 // can include preset read only registers in this class, as a hard-coded
  548 // '0'-register. (We used to simulate this on ppc.)
  549 
  550 // 32 bit registers that can be read and written i.e. these registers
  551 // can be dest (or src) of normal instructions.
  552 reg_class bits32_reg_rw(
  553 /*R0*/              // R0
  554 /*R1*/              // SP
  555   R2,               // TOC
  556   R3,
  557   R4,
  558   R5,
  559   R6,
  560   R7,
  561   R8,
  562   R9,
  563   R10,
  564   R11,
  565   R12,
  566 /*R13*/             // system thread id
  567   R14,
  568   R15,
  569 /*R16*/             // R16_thread
  570   R17,
  571   R18,
  572   R19,
  573   R20,
  574   R21,
  575   R22,
  576   R23,
  577   R24,
  578   R25,
  579   R26,
  580   R27,
  581   R28,
  582 /*R29,*/             // global TOC
  583   R30,
  584   R31
  585 );
  586 
  587 // 32 bit registers that can only be read i.e. these registers can
  588 // only be src of all instructions.
  589 reg_class bits32_reg_ro(
  590 /*R0*/              // R0
  591 /*R1*/              // SP
  592   R2                // TOC
  593   R3,
  594   R4,
  595   R5,
  596   R6,
  597   R7,
  598   R8,
  599   R9,
  600   R10,
  601   R11,
  602   R12,
  603 /*R13*/             // system thread id
  604   R14,
  605   R15,
  606 /*R16*/             // R16_thread
  607   R17,
  608   R18,
  609   R19,
  610   R20,
  611   R21,
  612   R22,
  613   R23,
  614   R24,
  615   R25,
  616   R26,
  617   R27,
  618   R28,
  619 /*R29,*/
  620   R30,
  621   R31
  622 );
  623 
  624 reg_class rscratch1_bits32_reg(R11);
  625 reg_class rscratch2_bits32_reg(R12);
  626 reg_class rarg1_bits32_reg(R3);
  627 reg_class rarg2_bits32_reg(R4);
  628 reg_class rarg3_bits32_reg(R5);
  629 reg_class rarg4_bits32_reg(R6);
  630 
  631 // ----------------------------
  632 // 64 Bit Register Classes
  633 // ----------------------------
  634 // 64-bit build means 64-bit pointers means hi/lo pairs
  635 
  636 reg_class rscratch1_bits64_reg(R11_H, R11);
  637 reg_class rscratch2_bits64_reg(R12_H, R12);
  638 reg_class rarg1_bits64_reg(R3_H, R3);
  639 reg_class rarg2_bits64_reg(R4_H, R4);
  640 reg_class rarg3_bits64_reg(R5_H, R5);
  641 reg_class rarg4_bits64_reg(R6_H, R6);
  642 // Thread register, 'written' by tlsLoadP, see there.
  643 reg_class thread_bits64_reg(R16_H, R16);
  644 
  645 reg_class r19_bits64_reg(R19_H, R19);
  646 
  647 // 64 bit registers that can be read and written i.e. these registers
  648 // can be dest (or src) of normal instructions.
  649 reg_class bits64_reg_rw(
  650 /*R0_H,  R0*/     // R0
  651 /*R1_H,  R1*/     // SP
  652   R2_H,  R2,      // TOC
  653   R3_H,  R3,
  654   R4_H,  R4,
  655   R5_H,  R5,
  656   R6_H,  R6,
  657   R7_H,  R7,
  658   R8_H,  R8,
  659   R9_H,  R9,
  660   R10_H, R10,
  661   R11_H, R11,
  662   R12_H, R12,
  663 /*R13_H, R13*/   // system thread id
  664   R14_H, R14,
  665   R15_H, R15,
  666 /*R16_H, R16*/   // R16_thread
  667   R17_H, R17,
  668   R18_H, R18,
  669   R19_H, R19,
  670   R20_H, R20,
  671   R21_H, R21,
  672   R22_H, R22,
  673   R23_H, R23,
  674   R24_H, R24,
  675   R25_H, R25,
  676   R26_H, R26,
  677   R27_H, R27,
  678   R28_H, R28,
  679 /*R29_H, R29,*/
  680   R30_H, R30,
  681   R31_H, R31
  682 );
  683 
  684 // 64 bit registers used excluding r2, r11 and r12
  685 // Used to hold the TOC to avoid collisions with expanded LeafCall which uses
  686 // r2, r11 and r12 internally.
  687 reg_class bits64_reg_leaf_call(
  688 /*R0_H,  R0*/     // R0
  689 /*R1_H,  R1*/     // SP
  690 /*R2_H,  R2*/     // TOC
  691   R3_H,  R3,
  692   R4_H,  R4,
  693   R5_H,  R5,
  694   R6_H,  R6,
  695   R7_H,  R7,
  696   R8_H,  R8,
  697   R9_H,  R9,
  698   R10_H, R10,
  699 /*R11_H, R11*/
  700 /*R12_H, R12*/
  701 /*R13_H, R13*/   // system thread id
  702   R14_H, R14,
  703   R15_H, R15,
  704 /*R16_H, R16*/   // R16_thread
  705   R17_H, R17,
  706   R18_H, R18,
  707   R19_H, R19,
  708   R20_H, R20,
  709   R21_H, R21,
  710   R22_H, R22,
  711   R23_H, R23,
  712   R24_H, R24,
  713   R25_H, R25,
  714   R26_H, R26,
  715   R27_H, R27,
  716   R28_H, R28,
  717 /*R29_H, R29,*/
  718   R30_H, R30,
  719   R31_H, R31
  720 );
  721 
  722 // Used to hold the TOC to avoid collisions with expanded DynamicCall
  723 // which uses r19 as inline cache internally and expanded LeafCall which uses
  724 // r2, r11 and r12 internally.
  725 reg_class bits64_constant_table_base(
  726 /*R0_H,  R0*/     // R0
  727 /*R1_H,  R1*/     // SP
  728 /*R2_H,  R2*/     // TOC
  729   R3_H,  R3,
  730   R4_H,  R4,
  731   R5_H,  R5,
  732   R6_H,  R6,
  733   R7_H,  R7,
  734   R8_H,  R8,
  735   R9_H,  R9,
  736   R10_H, R10,
  737 /*R11_H, R11*/
  738 /*R12_H, R12*/
  739 /*R13_H, R13*/   // system thread id
  740   R14_H, R14,
  741   R15_H, R15,
  742 /*R16_H, R16*/   // R16_thread
  743   R17_H, R17,
  744   R18_H, R18,
  745 /*R19_H, R19*/
  746   R20_H, R20,
  747   R21_H, R21,
  748   R22_H, R22,
  749   R23_H, R23,
  750   R24_H, R24,
  751   R25_H, R25,
  752   R26_H, R26,
  753   R27_H, R27,
  754   R28_H, R28,
  755 /*R29_H, R29,*/
  756   R30_H, R30,
  757   R31_H, R31
  758 );
  759 
  760 // 64 bit registers that can only be read i.e. these registers can
  761 // only be src of all instructions.
  762 reg_class bits64_reg_ro(
  763 /*R0_H,  R0*/     // R0
  764   R1_H,  R1,
  765   R2_H,  R2,       // TOC
  766   R3_H,  R3,
  767   R4_H,  R4,
  768   R5_H,  R5,
  769   R6_H,  R6,
  770   R7_H,  R7,
  771   R8_H,  R8,
  772   R9_H,  R9,
  773   R10_H, R10,
  774   R11_H, R11,
  775   R12_H, R12,
  776 /*R13_H, R13*/   // system thread id
  777   R14_H, R14,
  778   R15_H, R15,
  779   R16_H, R16,    // R16_thread
  780   R17_H, R17,
  781   R18_H, R18,
  782   R19_H, R19,
  783   R20_H, R20,
  784   R21_H, R21,
  785   R22_H, R22,
  786   R23_H, R23,
  787   R24_H, R24,
  788   R25_H, R25,
  789   R26_H, R26,
  790   R27_H, R27,
  791   R28_H, R28,
  792 /*R29_H, R29,*/ // TODO: let allocator handle TOC!!
  793   R30_H, R30,
  794   R31_H, R31
  795 );
  796 
  797 
  798 // ----------------------------
  799 // Special Class for Condition Code Flags Register
  800 
  801 reg_class int_flags(
  802 /*CCR0*/             // scratch
  803 /*CCR1*/             // scratch
  804 /*CCR2*/             // nv!
  805 /*CCR3*/             // nv!
  806 /*CCR4*/             // nv!
  807   CCR5,
  808   CCR6,
  809   CCR7
  810 );
  811 
  812 reg_class int_flags_ro(
  813   CCR0,
  814   CCR1,
  815   CCR2,
  816   CCR3,
  817   CCR4,
  818   CCR5,
  819   CCR6,
  820   CCR7
  821 );
  822 
  823 reg_class int_flags_CR0(CCR0);
  824 reg_class int_flags_CR1(CCR1);
  825 reg_class int_flags_CR6(CCR6);
  826 reg_class ctr_reg(SR_CTR);
  827 
  828 // ----------------------------
  829 // Float Register Classes
  830 // ----------------------------
  831 
  832 reg_class flt_reg(
  833   F0,
  834   F1,
  835   F2,
  836   F3,
  837   F4,
  838   F5,
  839   F6,
  840   F7,
  841   F8,
  842   F9,
  843   F10,
  844   F11,
  845   F12,
  846   F13,
  847   F14,              // nv!
  848   F15,              // nv!
  849   F16,              // nv!
  850   F17,              // nv!
  851   F18,              // nv!
  852   F19,              // nv!
  853   F20,              // nv!
  854   F21,              // nv!
  855   F22,              // nv!
  856   F23,              // nv!
  857   F24,              // nv!
  858   F25,              // nv!
  859   F26,              // nv!
  860   F27,              // nv!
  861   F28,              // nv!
  862   F29,              // nv!
  863   F30,              // nv!
  864   F31               // nv!
  865 );
  866 
  867 // Double precision float registers have virtual `high halves' that
  868 // are needed by the allocator.
  869 reg_class dbl_reg(
  870   F0,  F0_H,
  871   F1,  F1_H,
  872   F2,  F2_H,
  873   F3,  F3_H,
  874   F4,  F4_H,
  875   F5,  F5_H,
  876   F6,  F6_H,
  877   F7,  F7_H,
  878   F8,  F8_H,
  879   F9,  F9_H,
  880   F10, F10_H,
  881   F11, F11_H,
  882   F12, F12_H,
  883   F13, F13_H,
  884   F14, F14_H,    // nv!
  885   F15, F15_H,    // nv!
  886   F16, F16_H,    // nv!
  887   F17, F17_H,    // nv!
  888   F18, F18_H,    // nv!
  889   F19, F19_H,    // nv!
  890   F20, F20_H,    // nv!
  891   F21, F21_H,    // nv!
  892   F22, F22_H,    // nv!
  893   F23, F23_H,    // nv!
  894   F24, F24_H,    // nv!
  895   F25, F25_H,    // nv!
  896   F26, F26_H,    // nv!
  897   F27, F27_H,    // nv!
  898   F28, F28_H,    // nv!
  899   F29, F29_H,    // nv!
  900   F30, F30_H,    // nv!
  901   F31, F31_H     // nv!
  902 );
  903 
  904 // ----------------------------
  905 // Vector-Scalar Register Class
  906 // ----------------------------
  907 
  908 reg_class vs_reg(
  909   // Attention: Only these ones are saved & restored at safepoint by RegisterSaver.
  910   VSR32,
  911   VSR33,
  912   VSR34,
  913   VSR35,
  914   VSR36,
  915   VSR37,
  916   VSR38,
  917   VSR39,
  918   VSR40,
  919   VSR41,
  920   VSR42,
  921   VSR43,
  922   VSR44,
  923   VSR45,
  924   VSR46,
  925   VSR47,
  926   VSR48,
  927   VSR49,
  928   VSR50,
  929   VSR51
  930   // VSR52-VSR63 // nv!
  931 );
  932 
  933  %}
  934 
  935 //----------DEFINITION BLOCK---------------------------------------------------
  936 // Define name --> value mappings to inform the ADLC of an integer valued name
  937 // Current support includes integer values in the range [0, 0x7FFFFFFF]
  938 // Format:
  939 //        int_def  <name>         ( <int_value>, <expression>);
  940 // Generated Code in ad_<arch>.hpp
  941 //        #define  <name>   (<expression>)
  942 //        // value == <int_value>
  943 // Generated code in ad_<arch>.cpp adlc_verification()
  944 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
  945 //
  946 definitions %{
  947   // The default cost (of an ALU instruction).
  948   int_def DEFAULT_COST_LOW        (     30,      30);
  949   int_def DEFAULT_COST            (    100,     100);
  950   int_def HUGE_COST               (1000000, 1000000);
  951 
  952   // Memory refs
  953   int_def MEMORY_REF_COST_LOW     (    200, DEFAULT_COST * 2);
  954   int_def MEMORY_REF_COST         (    300, DEFAULT_COST * 3);
  955 
  956   // Branches are even more expensive.
  957   int_def BRANCH_COST             (    900, DEFAULT_COST * 9);
  958   int_def CALL_COST               (   1300, DEFAULT_COST * 13);
  959 %}
  960 
  961 
  962 //----------SOURCE BLOCK-------------------------------------------------------
  963 // This is a block of C++ code which provides values, functions, and
  964 // definitions necessary in the rest of the architecture description.
  965 source_hpp %{
  966   // Header information of the source block.
  967   // Method declarations/definitions which are used outside
  968   // the ad-scope can conveniently be defined here.
  969   //
  970   // To keep related declarations/definitions/uses close together,
  971   // we switch between source %{ }% and source_hpp %{ }% freely as needed.
  972 
  973 #include "opto/convertnode.hpp"
  974 
  975   // Returns true if Node n is followed by a MemBar node that
  976   // will do an acquire. If so, this node must not do the acquire
  977   // operation.
  978   bool followed_by_acquire(const Node *n);
  979 %}
  980 
  981 source %{
  982 
  983 #include "oops/klass.inline.hpp"
  984 
  985 void PhaseOutput::pd_perform_mach_node_analysis() {
  986 }
  987 
  988 int MachNode::pd_alignment_required() const {
  989   return 1;
  990 }
  991 
  992 int MachNode::compute_padding(int current_offset) const {
  993   return 0;
  994 }
  995 
  996 // Should the matcher clone input 'm' of node 'n'?
  997 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
  998   return false;
  999 }
 1000 
 1001 // Should the Matcher clone shifts on addressing modes, expecting them
 1002 // to be subsumed into complex addressing expressions or compute them
 1003 // into registers?
 1004 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 1005   return clone_base_plus_offset_address(m, mstack, address_visited);
 1006 }
 1007 
 1008 // Optimize load-acquire.
 1009 //
 1010 // Check if acquire is unnecessary due to following operation that does
 1011 // acquire anyways.
 1012 // Walk the pattern:
 1013 //
 1014 //      n: Load.acq
 1015 //           |
 1016 //      MemBarAcquire
 1017 //       |         |
 1018 //  Proj(ctrl)  Proj(mem)
 1019 //       |         |
 1020 //   MemBarRelease/Volatile
 1021 //
 1022 bool followed_by_acquire(const Node *load) {
 1023   assert(load->is_Load(), "So far implemented only for loads.");
 1024 
 1025   // Find MemBarAcquire.
 1026   const Node *mba = NULL;
 1027   for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) {
 1028     const Node *out = load->fast_out(i);
 1029     if (out->Opcode() == Op_MemBarAcquire) {
 1030       if (out->in(0) == load) continue; // Skip control edge, membar should be found via precedence edge.
 1031       mba = out;
 1032       break;
 1033     }
 1034   }
 1035   if (!mba) return false;
 1036 
 1037   // Find following MemBar node.
 1038   //
 1039   // The following node must be reachable by control AND memory
 1040   // edge to assure no other operations are in between the two nodes.
 1041   //
 1042   // So first get the Proj node, mem_proj, to use it to iterate forward.
 1043   Node *mem_proj = NULL;
 1044   for (DUIterator_Fast imax, i = mba->fast_outs(imax); i < imax; i++) {
 1045     mem_proj = mba->fast_out(i);      // Runs out of bounds and asserts if Proj not found.
 1046     assert(mem_proj->is_Proj(), "only projections here");
 1047     ProjNode *proj = mem_proj->as_Proj();
 1048     if (proj->_con == TypeFunc::Memory &&
 1049         !Compile::current()->node_arena()->contains(mem_proj)) // Unmatched old-space only
 1050       break;
 1051   }
 1052   assert(mem_proj->as_Proj()->_con == TypeFunc::Memory, "Graph broken");
 1053 
 1054   // Search MemBar behind Proj. If there are other memory operations
 1055   // behind the Proj we lost.
 1056   for (DUIterator_Fast jmax, j = mem_proj->fast_outs(jmax); j < jmax; j++) {
 1057     Node *x = mem_proj->fast_out(j);
 1058     // Proj might have an edge to a store or load node which precedes the membar.
 1059     if (x->is_Mem()) return false;
 1060 
 1061     // On PPC64 release and volatile are implemented by an instruction
 1062     // that also has acquire semantics. I.e. there is no need for an
 1063     // acquire before these.
 1064     int xop = x->Opcode();
 1065     if (xop == Op_MemBarRelease || xop == Op_MemBarVolatile) {
 1066       // Make sure we're not missing Call/Phi/MergeMem by checking
 1067       // control edges. The control edge must directly lead back
 1068       // to the MemBarAcquire
 1069       Node *ctrl_proj = x->in(0);
 1070       if (ctrl_proj->is_Proj() && ctrl_proj->in(0) == mba) {
 1071         return true;
 1072       }
 1073     }
 1074   }
 1075 
 1076   return false;
 1077 }
 1078 
 1079 #define __ _masm.
 1080 
 1081 // Tertiary op of a LoadP or StoreP encoding.
 1082 #define REGP_OP true
 1083 
 1084 // ****************************************************************************
 1085 
 1086 // REQUIRED FUNCTIONALITY
 1087 
 1088 // !!!!! Special hack to get all type of calls to specify the byte offset
 1089 //       from the start of the call to the point where the return address
 1090 //       will point.
 1091 
 1092 // PPC port: Removed use of lazy constant construct.
 1093 
 1094 int MachCallStaticJavaNode::ret_addr_offset() {
 1095   // It's only a single branch-and-link instruction.
 1096   return 4;
 1097 }
 1098 
 1099 int MachCallDynamicJavaNode::ret_addr_offset() {
 1100   // Offset is 4 with postalloc expanded calls (bl is one instruction). We use
 1101   // postalloc expanded calls if we use inline caches and do not update method data.
 1102   if (UseInlineCaches) return 4;
 1103 
 1104   int vtable_index = this->_vtable_index;
 1105   if (vtable_index < 0) {
 1106     // Must be invalid_vtable_index, not nonvirtual_vtable_index.
 1107     assert(vtable_index == Method::invalid_vtable_index, "correct sentinel value");
 1108     return 12;
 1109   } else {
 1110     return 24 + MacroAssembler::instr_size_for_decode_klass_not_null();
 1111   }
 1112 }
 1113 
 1114 int MachCallRuntimeNode::ret_addr_offset() {
 1115   if (rule() == CallRuntimeDirect_rule) {
 1116     // CallRuntimeDirectNode uses call_c.
 1117 #if defined(ABI_ELFv2)
 1118     return 28;
 1119 #else
 1120     return 40;
 1121 #endif
 1122   }
 1123   assert(rule() == CallLeafDirect_rule, "unexpected node with rule %u", rule());
 1124   // CallLeafDirectNode uses bl.
 1125   return 4;
 1126 }
 1127 
 1128 int MachCallNativeNode::ret_addr_offset() {
 1129   Unimplemented();
 1130   return -1;
 1131 }
 1132 
 1133 //=============================================================================
 1134 
 1135 // condition code conversions
 1136 
 1137 static int cc_to_boint(int cc) {
 1138   return Assembler::bcondCRbiIs0 | (cc & 8);
 1139 }
 1140 
 1141 static int cc_to_inverse_boint(int cc) {
 1142   return Assembler::bcondCRbiIs0 | (8-(cc & 8));
 1143 }
 1144 
 1145 static int cc_to_biint(int cc, int flags_reg) {
 1146   return (flags_reg << 2) | (cc & 3);
 1147 }
 1148 
 1149 //=============================================================================
 1150 
 1151 // Compute padding required for nodes which need alignment. The padding
 1152 // is the number of bytes (not instructions) which will be inserted before
 1153 // the instruction. The padding must match the size of a NOP instruction.
 1154 
 1155 // Add nop if a prefixed (two-word) instruction is going to cross a 64-byte boundary.
 1156 // (See Section 1.6 of Power ISA Version 3.1)
 1157 static int compute_prefix_padding(int current_offset) {
 1158   assert(PowerArchitecturePPC64 >= 10 && (CodeEntryAlignment & 63) == 0,
 1159          "Code buffer must be aligned to a multiple of 64 bytes");
 1160   if (is_aligned(current_offset + BytesPerInstWord, 64)) {
 1161     return BytesPerInstWord;
 1162   }
 1163   return 0;
 1164 }
 1165 
 1166 int loadConI32Node::compute_padding(int current_offset) const {
 1167   return compute_prefix_padding(current_offset);
 1168 }
 1169 
 1170 int loadConL34Node::compute_padding(int current_offset) const {
 1171   return compute_prefix_padding(current_offset);
 1172 }
 1173 
 1174 int addI_reg_imm32Node::compute_padding(int current_offset) const {
 1175   return compute_prefix_padding(current_offset);
 1176 }
 1177 
 1178 int addL_reg_imm34Node::compute_padding(int current_offset) const {
 1179   return compute_prefix_padding(current_offset);
 1180 }
 1181 
 1182 int addP_reg_imm34Node::compute_padding(int current_offset) const {
 1183   return compute_prefix_padding(current_offset);
 1184 }
 1185 
 1186 int cmprb_Whitespace_reg_reg_prefixedNode::compute_padding(int current_offset) const {
 1187   return compute_prefix_padding(current_offset);
 1188 }
 1189 
 1190 
 1191 //=============================================================================
 1192 
 1193 // Emit an interrupt that is caught by the debugger (for debugging compiler).
 1194 void emit_break(CodeBuffer &cbuf) {
 1195   C2_MacroAssembler _masm(&cbuf);
 1196   __ illtrap();
 1197 }
 1198 
 1199 #ifndef PRODUCT
 1200 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1201   st->print("BREAKPOINT");
 1202 }
 1203 #endif
 1204 
 1205 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1206   emit_break(cbuf);
 1207 }
 1208 
 1209 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 1210   return MachNode::size(ra_);
 1211 }
 1212 
 1213 //=============================================================================
 1214 
 1215 void emit_nop(CodeBuffer &cbuf) {
 1216   C2_MacroAssembler _masm(&cbuf);
 1217   __ nop();
 1218 }
 1219 
 1220 static inline void emit_long(CodeBuffer &cbuf, int value) {
 1221   *((int*)(cbuf.insts_end())) = value;
 1222   cbuf.set_insts_end(cbuf.insts_end() + BytesPerInstWord);
 1223 }
 1224 
 1225 //=============================================================================
 1226 
 1227 %} // interrupt source
 1228 
 1229 source_hpp %{ // Header information of the source block.
 1230 
 1231 //--------------------------------------------------------------
 1232 //---<  Used for optimization in Compile::Shorten_branches  >---
 1233 //--------------------------------------------------------------
 1234 
 1235 class C2_MacroAssembler;
 1236 
 1237 class CallStubImpl {
 1238 
 1239  public:
 1240 
 1241   // Emit call stub, compiled java to interpreter.
 1242   static void emit_trampoline_stub(C2_MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset);
 1243 
 1244   // Size of call trampoline stub.
 1245   // This doesn't need to be accurate to the byte, but it
 1246   // must be larger than or equal to the real size of the stub.
 1247   static uint size_call_trampoline() {
 1248     return MacroAssembler::trampoline_stub_size;
 1249   }
 1250 
 1251   // number of relocations needed by a call trampoline stub
 1252   static uint reloc_call_trampoline() {
 1253     return 5;
 1254   }
 1255 
 1256 };
 1257 
 1258 %} // end source_hpp
 1259 
 1260 source %{
 1261 
 1262 // Emit a trampoline stub for a call to a target which is too far away.
 1263 //
 1264 // code sequences:
 1265 //
 1266 // call-site:
 1267 //   branch-and-link to <destination> or <trampoline stub>
 1268 //
 1269 // Related trampoline stub for this call-site in the stub section:
 1270 //   load the call target from the constant pool
 1271 //   branch via CTR (LR/link still points to the call-site above)
 1272 
 1273 void CallStubImpl::emit_trampoline_stub(C2_MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
 1274   address stub = __ emit_trampoline_stub(destination_toc_offset, insts_call_instruction_offset);
 1275   if (stub == NULL) {
 1276     ciEnv::current()->record_out_of_memory_failure();
 1277   }
 1278 }
 1279 
 1280 //=============================================================================
 1281 
 1282 // Emit an inline branch-and-link call and a related trampoline stub.
 1283 //
 1284 // code sequences:
 1285 //
 1286 // call-site:
 1287 //   branch-and-link to <destination> or <trampoline stub>
 1288 //
 1289 // Related trampoline stub for this call-site in the stub section:
 1290 //   load the call target from the constant pool
 1291 //   branch via CTR (LR/link still points to the call-site above)
 1292 //
 1293 
 1294 typedef struct {
 1295   int insts_call_instruction_offset;
 1296   int ret_addr_offset;
 1297 } EmitCallOffsets;
 1298 
 1299 // Emit a branch-and-link instruction that branches to a trampoline.
 1300 // - Remember the offset of the branch-and-link instruction.
 1301 // - Add a relocation at the branch-and-link instruction.
 1302 // - Emit a branch-and-link.
 1303 // - Remember the return pc offset.
 1304 EmitCallOffsets emit_call_with_trampoline_stub(C2_MacroAssembler &_masm, address entry_point, relocInfo::relocType rtype) {
 1305   EmitCallOffsets offsets = { -1, -1 };
 1306   const int start_offset = __ offset();
 1307   offsets.insts_call_instruction_offset = __ offset();
 1308 
 1309   // No entry point given, use the current pc.
 1310   if (entry_point == NULL) entry_point = __ pc();
 1311 
 1312   // Put the entry point as a constant into the constant pool.
 1313   const address entry_point_toc_addr   = __ address_constant(entry_point, RelocationHolder::none);
 1314   if (entry_point_toc_addr == NULL) {
 1315     ciEnv::current()->record_out_of_memory_failure();
 1316     return offsets;
 1317   }
 1318   const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
 1319 
 1320   // Emit the trampoline stub which will be related to the branch-and-link below.
 1321   CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, offsets.insts_call_instruction_offset);
 1322   if (ciEnv::current()->failing()) { return offsets; } // Code cache may be full.
 1323   __ relocate(rtype);
 1324 
 1325   // Note: At this point we do not have the address of the trampoline
 1326   // stub, and the entry point might be too far away for bl, so __ pc()
 1327   // serves as dummy and the bl will be patched later.
 1328   __ bl((address) __ pc());
 1329 
 1330   offsets.ret_addr_offset = __ offset() - start_offset;
 1331 
 1332   return offsets;
 1333 }
 1334 
 1335 //=============================================================================
 1336 
 1337 // Factory for creating loadConL* nodes for large/small constant pool.
 1338 
 1339 static inline jlong replicate_immF(float con) {
 1340   // Replicate float con 2 times and pack into vector.
 1341   int val = *((int*)&con);
 1342   jlong lval = val;
 1343   lval = (lval << 32) | (lval & 0xFFFFFFFFl);
 1344   return lval;
 1345 }
 1346 
 1347 //=============================================================================
 1348 
 1349 const RegMask& MachConstantBaseNode::_out_RegMask = BITS64_CONSTANT_TABLE_BASE_mask();
 1350 int ConstantTable::calculate_table_base_offset() const {
 1351   return 0;  // absolute addressing, no offset
 1352 }
 1353 
 1354 bool MachConstantBaseNode::requires_postalloc_expand() const { return true; }
 1355 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1356   iRegPdstOper *op_dst = new iRegPdstOper();
 1357   MachNode *m1 = new loadToc_hiNode();
 1358   MachNode *m2 = new loadToc_loNode();
 1359 
 1360   m1->add_req(NULL);
 1361   m2->add_req(NULL, m1);
 1362   m1->_opnds[0] = op_dst;
 1363   m2->_opnds[0] = op_dst;
 1364   m2->_opnds[1] = op_dst;
 1365   ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 1366   ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 1367   nodes->push(m1);
 1368   nodes->push(m2);
 1369 }
 1370 
 1371 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 1372   // Is postalloc expanded.
 1373   ShouldNotReachHere();
 1374 }
 1375 
 1376 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1377   return 0;
 1378 }
 1379 
 1380 #ifndef PRODUCT
 1381 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1382   st->print("-- \t// MachConstantBaseNode (empty encoding)");
 1383 }
 1384 #endif
 1385 
 1386 //=============================================================================
 1387 
 1388 #ifndef PRODUCT
 1389 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1390   Compile* C = ra_->C;
 1391   const long framesize = C->output()->frame_slots() << LogBytesPerInt;
 1392 
 1393   st->print("PROLOG\n\t");
 1394   if (C->output()->need_stack_bang(framesize)) {
 1395     st->print("stack_overflow_check\n\t");
 1396   }
 1397 
 1398   if (!false /* TODO: PPC port C->is_frameless_method()*/) {
 1399     st->print("save return pc\n\t");
 1400     st->print("push frame %ld\n\t", -framesize);
 1401   }
 1402 
 1403   if (C->stub_function() == NULL) {
 1404     st->print("nmethod entry barrier\n\t");
 1405   }
 1406 }
 1407 #endif
 1408 
 1409 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1410   Compile* C = ra_->C;
 1411   C2_MacroAssembler _masm(&cbuf);
 1412 
 1413   const long framesize = C->output()->frame_size_in_bytes();
 1414   assert(framesize % (2 * wordSize) == 0, "must preserve 2*wordSize alignment");
 1415 
 1416   const bool method_is_frameless      = false /* TODO: PPC port C->is_frameless_method()*/;
 1417 
 1418   const Register return_pc            = R20; // Must match return_addr() in frame section.
 1419   const Register callers_sp           = R21;
 1420   const Register push_frame_temp      = R22;
 1421   const Register toc_temp             = R23;
 1422   assert_different_registers(R11, return_pc, callers_sp, push_frame_temp, toc_temp);
 1423 
 1424   if (method_is_frameless) {
 1425     // Add nop at beginning of all frameless methods to prevent any
 1426     // oop instructions from getting overwritten by make_not_entrant
 1427     // (patching attempt would fail).
 1428     __ nop();
 1429   } else {
 1430     // Get return pc.
 1431     __ mflr(return_pc);
 1432   }
 1433 
 1434   if (C->clinit_barrier_on_entry()) {
 1435     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1436 
 1437     Label L_skip_barrier;
 1438     Register klass = toc_temp;
 1439 
 1440     // Notify OOP recorder (don't need the relocation)
 1441     AddressLiteral md = __ constant_metadata_address(C->method()->holder()->constant_encoding());
 1442     __ load_const_optimized(klass, md.value(), R0);
 1443     __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/);
 1444 
 1445     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0);
 1446     __ mtctr(klass);
 1447     __ bctr();
 1448 
 1449     __ bind(L_skip_barrier);
 1450   }
 1451 
 1452   // Calls to C2R adapters often do not accept exceptional returns.
 1453   // We require that their callers must bang for them. But be
 1454   // careful, because some VM calls (such as call site linkage) can
 1455   // use several kilobytes of stack. But the stack safety zone should
 1456   // account for that. See bugs 4446381, 4468289, 4497237.
 1457 
 1458   int bangsize = C->output()->bang_size_in_bytes();
 1459   assert(bangsize >= framesize || bangsize <= 0, "stack bang size incorrect");
 1460   if (C->output()->need_stack_bang(bangsize)) {
 1461     // Unfortunately we cannot use the function provided in
 1462     // assembler.cpp as we have to emulate the pipes. So I had to
 1463     // insert the code of generate_stack_overflow_check(), see
 1464     // assembler.cpp for some illuminative comments.
 1465     const int page_size = os::vm_page_size();
 1466     int bang_end = StackOverflow::stack_shadow_zone_size();
 1467 
 1468     // This is how far the previous frame's stack banging extended.
 1469     const int bang_end_safe = bang_end;
 1470 
 1471     if (bangsize > page_size) {
 1472       bang_end += bangsize;
 1473     }
 1474 
 1475     int bang_offset = bang_end_safe;
 1476 
 1477     while (bang_offset <= bang_end) {
 1478       // Need at least one stack bang at end of shadow zone.
 1479 
 1480       // Again I had to copy code, this time from assembler_ppc.cpp,
 1481       // bang_stack_with_offset - see there for comments.
 1482 
 1483       // Stack grows down, caller passes positive offset.
 1484       assert(bang_offset > 0, "must bang with positive offset");
 1485 
 1486       long stdoffset = -bang_offset;
 1487 
 1488       if (Assembler::is_simm(stdoffset, 16)) {
 1489         // Signed 16 bit offset, a simple std is ok.
 1490         if (UseLoadInstructionsForStackBangingPPC64) {
 1491           __ ld(R0,  (int)(signed short)stdoffset, R1_SP);
 1492         } else {
 1493           __ std(R0, (int)(signed short)stdoffset, R1_SP);
 1494         }
 1495       } else if (Assembler::is_simm(stdoffset, 31)) {
 1496         // Use largeoffset calculations for addis & ld/std.
 1497         const int hi = MacroAssembler::largeoffset_si16_si16_hi(stdoffset);
 1498         const int lo = MacroAssembler::largeoffset_si16_si16_lo(stdoffset);
 1499 
 1500         Register tmp = R11;
 1501         __ addis(tmp, R1_SP, hi);
 1502         if (UseLoadInstructionsForStackBangingPPC64) {
 1503           __ ld(R0, lo, tmp);
 1504         } else {
 1505           __ std(R0, lo, tmp);
 1506         }
 1507       } else {
 1508         ShouldNotReachHere();
 1509       }
 1510 
 1511       bang_offset += page_size;
 1512     }
 1513     // R11 trashed
 1514   } // C->output()->need_stack_bang(framesize)
 1515 
 1516   unsigned int bytes = (unsigned int)framesize;
 1517   long offset = Assembler::align_addr(bytes, frame::alignment_in_bytes);
 1518   ciMethod *currMethod = C->method();
 1519 
 1520   if (!method_is_frameless) {
 1521     // Get callers sp.
 1522     __ mr(callers_sp, R1_SP);
 1523 
 1524     // Push method's frame, modifies SP.
 1525     assert(Assembler::is_uimm(framesize, 32U), "wrong type");
 1526     // The ABI is already accounted for in 'framesize' via the
 1527     // 'out_preserve' area.
 1528     Register tmp = push_frame_temp;
 1529     // Had to insert code of push_frame((unsigned int)framesize, push_frame_temp).
 1530     if (Assembler::is_simm(-offset, 16)) {
 1531       __ stdu(R1_SP, -offset, R1_SP);
 1532     } else {
 1533       long x = -offset;
 1534       // Had to insert load_const(tmp, -offset).
 1535       __ lis( tmp, (int)((signed short)(((x >> 32) & 0xffff0000) >> 16)));
 1536       __ ori( tmp, tmp, ((x >> 32) & 0x0000ffff));
 1537       __ sldi(tmp, tmp, 32);
 1538       __ oris(tmp, tmp, (x & 0xffff0000) >> 16);
 1539       __ ori( tmp, tmp, (x & 0x0000ffff));
 1540 
 1541       __ stdux(R1_SP, R1_SP, tmp);
 1542     }
 1543   }
 1544 #if 0 // TODO: PPC port
 1545   // For testing large constant pools, emit a lot of constants to constant pool.
 1546   // "Randomize" const_size.
 1547   if (ConstantsALot) {
 1548     const int num_consts = const_size();
 1549     for (int i = 0; i < num_consts; i++) {
 1550       __ long_constant(0xB0B5B00BBABE);
 1551     }
 1552   }
 1553 #endif
 1554   if (!method_is_frameless) {
 1555     // Save return pc.
 1556     __ std(return_pc, _abi0(lr), callers_sp);
 1557   }
 1558 
 1559   if (C->stub_function() == NULL) {
 1560     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1561     bs->nmethod_entry_barrier(&_masm, push_frame_temp);
 1562   }
 1563 
 1564   C->output()->set_frame_complete(cbuf.insts_size());
 1565 }
 1566 
 1567 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 1568   // Variable size. determine dynamically.
 1569   return MachNode::size(ra_);
 1570 }
 1571 
 1572 int MachPrologNode::reloc() const {
 1573   // Return number of relocatable values contained in this instruction.
 1574   return 1; // 1 reloc entry for load_const(toc).
 1575 }
 1576 
 1577 //=============================================================================
 1578 
 1579 #ifndef PRODUCT
 1580 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1581   Compile* C = ra_->C;
 1582 
 1583   st->print("EPILOG\n\t");
 1584   st->print("restore return pc\n\t");
 1585   st->print("pop frame\n\t");
 1586 
 1587   if (do_polling() && C->is_method_compilation()) {
 1588     st->print("safepoint poll\n\t");
 1589   }
 1590 }
 1591 #endif
 1592 
 1593 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1594   Compile* C = ra_->C;
 1595   C2_MacroAssembler _masm(&cbuf);
 1596 
 1597   const long framesize = ((long)C->output()->frame_slots()) << LogBytesPerInt;
 1598   assert(framesize >= 0, "negative frame-size?");
 1599 
 1600   const bool method_needs_polling = do_polling() && C->is_method_compilation();
 1601   const bool method_is_frameless  = false /* TODO: PPC port C->is_frameless_method()*/;
 1602   const Register return_pc        = R31;  // Must survive C-call to enable_stack_reserved_zone().
 1603   const Register temp             = R12;
 1604 
 1605   if (!method_is_frameless) {
 1606     // Restore return pc relative to callers' sp.
 1607     __ ld(return_pc, ((int)framesize) + _abi0(lr), R1_SP);
 1608     // Move return pc to LR.
 1609     __ mtlr(return_pc);
 1610     // Pop frame (fixed frame-size).
 1611     __ addi(R1_SP, R1_SP, (int)framesize);
 1612   }
 1613 
 1614   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1615     __ reserved_stack_check(return_pc);
 1616   }
 1617 
 1618   if (method_needs_polling) {
 1619     Label dummy_label;
 1620     Label* code_stub = &dummy_label;
 1621     if (!UseSIGTRAP && !C->output()->in_scratch_emit_size()) {
 1622       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
 1623       __ relocate(relocInfo::poll_return_type);
 1624     }
 1625     __ safepoint_poll(*code_stub, temp, true /* at_return */, true /* in_nmethod */);
 1626   }
 1627 }
 1628 
 1629 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 1630   // Variable size. Determine dynamically.
 1631   return MachNode::size(ra_);
 1632 }
 1633 
 1634 int MachEpilogNode::reloc() const {
 1635   // Return number of relocatable values contained in this instruction.
 1636   return 1; // 1 for load_from_polling_page.
 1637 }
 1638 
 1639 const Pipeline * MachEpilogNode::pipeline() const {
 1640   return MachNode::pipeline_class();
 1641 }
 1642 
 1643 // =============================================================================
 1644 
 1645 // Figure out which register class each belongs in: rc_int, rc_float, rc_vs or
 1646 // rc_stack.
 1647 enum RC { rc_bad, rc_int, rc_float, rc_vs, rc_stack };
 1648 
 1649 static enum RC rc_class(OptoReg::Name reg) {
 1650   // Return the register class for the given register. The given register
 1651   // reg is a <register>_num value, which is an index into the MachRegisterNumbers
 1652   // enumeration in adGlobals_ppc.hpp.
 1653 
 1654   if (reg == OptoReg::Bad) return rc_bad;
 1655 
 1656   // We have 64 integer register halves, starting at index 0.
 1657   if (reg < 64) return rc_int;
 1658 
 1659   // We have 64 floating-point register halves, starting at index 64.
 1660   if (reg < 64+64) return rc_float;
 1661 
 1662   // We have 64 vector-scalar registers, starting at index 128.
 1663   if (reg < 64+64+64) return rc_vs;
 1664 
 1665   // Between float regs & stack are the flags regs.
 1666   assert(OptoReg::is_stack(reg) || reg < 64+64+64, "blow up if spilling flags");
 1667 
 1668   return rc_stack;
 1669 }
 1670 
 1671 static int ld_st_helper(CodeBuffer *cbuf, const char *op_str, uint opcode, int reg, int offset,
 1672                         bool do_print, Compile* C, outputStream *st) {
 1673 
 1674   assert(opcode == Assembler::LD_OPCODE   ||
 1675          opcode == Assembler::STD_OPCODE  ||
 1676          opcode == Assembler::LWZ_OPCODE  ||
 1677          opcode == Assembler::STW_OPCODE  ||
 1678          opcode == Assembler::LFD_OPCODE  ||
 1679          opcode == Assembler::STFD_OPCODE ||
 1680          opcode == Assembler::LFS_OPCODE  ||
 1681          opcode == Assembler::STFS_OPCODE,
 1682          "opcode not supported");
 1683 
 1684   if (cbuf) {
 1685     int d =
 1686       (Assembler::LD_OPCODE == opcode || Assembler::STD_OPCODE == opcode) ?
 1687         Assembler::ds(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/)
 1688       : Assembler::d1(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/); // Makes no difference in opt build.
 1689     emit_long(*cbuf, opcode | Assembler::rt(Matcher::_regEncode[reg]) | d | Assembler::ra(R1_SP));
 1690   }
 1691 #ifndef PRODUCT
 1692   else if (do_print) {
 1693     st->print("%-7s %s, [R1_SP + #%d+%d] \t// spill copy",
 1694               op_str,
 1695               Matcher::regName[reg],
 1696               offset, 0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/);
 1697   }
 1698 #endif
 1699   return 4; // size
 1700 }
 1701 
 1702 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
 1703   Compile* C = ra_->C;
 1704 
 1705   // Get registers to move.
 1706   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
 1707   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
 1708   OptoReg::Name dst_hi = ra_->get_reg_second(this);
 1709   OptoReg::Name dst_lo = ra_->get_reg_first(this);
 1710 
 1711   enum RC src_hi_rc = rc_class(src_hi);
 1712   enum RC src_lo_rc = rc_class(src_lo);
 1713   enum RC dst_hi_rc = rc_class(dst_hi);
 1714   enum RC dst_lo_rc = rc_class(dst_lo);
 1715 
 1716   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
 1717   if (src_hi != OptoReg::Bad)
 1718     assert((src_lo&1)==0 && src_lo+1==src_hi &&
 1719            (dst_lo&1)==0 && dst_lo+1==dst_hi,
 1720            "expected aligned-adjacent pairs");
 1721   // Generate spill code!
 1722   int size = 0;
 1723 
 1724   if (src_lo == dst_lo && src_hi == dst_hi)
 1725     return size;            // Self copy, no move.
 1726 
 1727   if (bottom_type()->isa_vect() != NULL && ideal_reg() == Op_VecX) {
 1728     // Memory->Memory Spill.
 1729     if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
 1730       int src_offset = ra_->reg2offset(src_lo);
 1731       int dst_offset = ra_->reg2offset(dst_lo);
 1732       if (cbuf) {
 1733         C2_MacroAssembler _masm(cbuf);
 1734         __ ld(R0, src_offset, R1_SP);
 1735         __ std(R0, dst_offset, R1_SP);
 1736         __ ld(R0, src_offset+8, R1_SP);
 1737         __ std(R0, dst_offset+8, R1_SP);
 1738       }
 1739       size += 16;
 1740     }
 1741     // VectorSRegister->Memory Spill.
 1742     else if (src_lo_rc == rc_vs && dst_lo_rc == rc_stack) {
 1743       VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]);
 1744       int dst_offset = ra_->reg2offset(dst_lo);
 1745       if (cbuf) {
 1746         C2_MacroAssembler _masm(cbuf);
 1747         __ addi(R0, R1_SP, dst_offset);
 1748         __ stxvd2x(Rsrc, R0);
 1749       }
 1750       size += 8;
 1751     }
 1752     // Memory->VectorSRegister Spill.
 1753     else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vs) {
 1754       VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]);
 1755       int src_offset = ra_->reg2offset(src_lo);
 1756       if (cbuf) {
 1757         C2_MacroAssembler _masm(cbuf);
 1758         __ addi(R0, R1_SP, src_offset);
 1759         __ lxvd2x(Rdst, R0);
 1760       }
 1761       size += 8;
 1762     }
 1763     // VectorSRegister->VectorSRegister.
 1764     else if (src_lo_rc == rc_vs && dst_lo_rc == rc_vs) {
 1765       VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]);
 1766       VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]);
 1767       if (cbuf) {
 1768         C2_MacroAssembler _masm(cbuf);
 1769         __ xxlor(Rdst, Rsrc, Rsrc);
 1770       }
 1771       size += 4;
 1772     }
 1773     else {
 1774       ShouldNotReachHere(); // No VSR spill.
 1775     }
 1776     return size;
 1777   }
 1778 
 1779   // --------------------------------------
 1780   // Memory->Memory Spill. Use R0 to hold the value.
 1781   if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
 1782     int src_offset = ra_->reg2offset(src_lo);
 1783     int dst_offset = ra_->reg2offset(dst_lo);
 1784     if (src_hi != OptoReg::Bad) {
 1785       assert(src_hi_rc==rc_stack && dst_hi_rc==rc_stack,
 1786              "expected same type of move for high parts");
 1787       size += ld_st_helper(cbuf, "LD  ", Assembler::LD_OPCODE,  R0_num, src_offset, !do_size, C, st);
 1788       if (!cbuf && !do_size) st->print("\n\t");
 1789       size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, R0_num, dst_offset, !do_size, C, st);
 1790     } else {
 1791       size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, R0_num, src_offset, !do_size, C, st);
 1792       if (!cbuf && !do_size) st->print("\n\t");
 1793       size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, R0_num, dst_offset, !do_size, C, st);
 1794     }
 1795     return size;
 1796   }
 1797 
 1798   // --------------------------------------
 1799   // Check for float->int copy; requires a trip through memory.
 1800   if (src_lo_rc == rc_float && dst_lo_rc == rc_int) {
 1801     Unimplemented();
 1802   }
 1803 
 1804   // --------------------------------------
 1805   // Check for integer reg-reg copy.
 1806   if (src_lo_rc == rc_int && dst_lo_rc == rc_int) {
 1807       Register Rsrc = as_Register(Matcher::_regEncode[src_lo]);
 1808       Register Rdst = as_Register(Matcher::_regEncode[dst_lo]);
 1809       size = (Rsrc != Rdst) ? 4 : 0;
 1810 
 1811       if (cbuf) {
 1812         C2_MacroAssembler _masm(cbuf);
 1813         if (size) {
 1814           __ mr(Rdst, Rsrc);
 1815         }
 1816       }
 1817 #ifndef PRODUCT
 1818       else if (!do_size) {
 1819         if (size) {
 1820           st->print("%-7s %s, %s \t// spill copy", "MR", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 1821         } else {
 1822           st->print("%-7s %s, %s \t// spill copy", "MR-NOP", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 1823         }
 1824       }
 1825 #endif
 1826       return size;
 1827   }
 1828 
 1829   // Check for integer store.
 1830   if (src_lo_rc == rc_int && dst_lo_rc == rc_stack) {
 1831     int dst_offset = ra_->reg2offset(dst_lo);
 1832     if (src_hi != OptoReg::Bad) {
 1833       assert(src_hi_rc==rc_int && dst_hi_rc==rc_stack,
 1834              "expected same type of move for high parts");
 1835       size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1836     } else {
 1837       size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1838     }
 1839     return size;
 1840   }
 1841 
 1842   // Check for integer load.
 1843   if (dst_lo_rc == rc_int && src_lo_rc == rc_stack) {
 1844     int src_offset = ra_->reg2offset(src_lo);
 1845     if (src_hi != OptoReg::Bad) {
 1846       assert(dst_hi_rc==rc_int && src_hi_rc==rc_stack,
 1847              "expected same type of move for high parts");
 1848       size += ld_st_helper(cbuf, "LD  ", Assembler::LD_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1849     } else {
 1850       size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1851     }
 1852     return size;
 1853   }
 1854 
 1855   // Check for float reg-reg copy.
 1856   if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
 1857     if (cbuf) {
 1858       C2_MacroAssembler _masm(cbuf);
 1859       FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
 1860       FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
 1861       __ fmr(Rdst, Rsrc);
 1862     }
 1863 #ifndef PRODUCT
 1864     else if (!do_size) {
 1865       st->print("%-7s %s, %s \t// spill copy", "FMR", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 1866     }
 1867 #endif
 1868     return 4;
 1869   }
 1870 
 1871   // Check for float store.
 1872   if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
 1873     int dst_offset = ra_->reg2offset(dst_lo);
 1874     if (src_hi != OptoReg::Bad) {
 1875       assert(src_hi_rc==rc_float && dst_hi_rc==rc_stack,
 1876              "expected same type of move for high parts");
 1877       size += ld_st_helper(cbuf, "STFD", Assembler::STFD_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1878     } else {
 1879       size += ld_st_helper(cbuf, "STFS", Assembler::STFS_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1880     }
 1881     return size;
 1882   }
 1883 
 1884   // Check for float load.
 1885   if (dst_lo_rc == rc_float && src_lo_rc == rc_stack) {
 1886     int src_offset = ra_->reg2offset(src_lo);
 1887     if (src_hi != OptoReg::Bad) {
 1888       assert(dst_hi_rc==rc_float && src_hi_rc==rc_stack,
 1889              "expected same type of move for high parts");
 1890       size += ld_st_helper(cbuf, "LFD ", Assembler::LFD_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1891     } else {
 1892       size += ld_st_helper(cbuf, "LFS ", Assembler::LFS_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1893     }
 1894     return size;
 1895   }
 1896 
 1897   // --------------------------------------------------------------------
 1898   // Check for hi bits still needing moving. Only happens for misaligned
 1899   // arguments to native calls.
 1900   if (src_hi == dst_hi)
 1901     return size;               // Self copy; no move.
 1902 
 1903   assert(src_hi_rc != rc_bad && dst_hi_rc != rc_bad, "src_hi & dst_hi cannot be Bad");
 1904   ShouldNotReachHere(); // Unimplemented
 1905   return 0;
 1906 }
 1907 
 1908 #ifndef PRODUCT
 1909 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1910   if (!ra_)
 1911     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
 1912   else
 1913     implementation(NULL, ra_, false, st);
 1914 }
 1915 #endif
 1916 
 1917 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1918   implementation(&cbuf, ra_, false, NULL);
 1919 }
 1920 
 1921 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1922   return implementation(NULL, ra_, true, NULL);
 1923 }
 1924 
 1925 #ifndef PRODUCT
 1926 void MachNopNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1927   st->print("NOP \t// %d nops to pad for loops or prefixed instructions.", _count);
 1928 }
 1929 #endif
 1930 
 1931 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const {
 1932   C2_MacroAssembler _masm(&cbuf);
 1933   // _count contains the number of nops needed for padding.
 1934   for (int i = 0; i < _count; i++) {
 1935     __ nop();
 1936   }
 1937 }
 1938 
 1939 uint MachNopNode::size(PhaseRegAlloc *ra_) const {
 1940   return _count * 4;
 1941 }
 1942 
 1943 #ifndef PRODUCT
 1944 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1945   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1946   char reg_str[128];
 1947   ra_->dump_register(this, reg_str);
 1948   st->print("ADDI    %s, SP, %d \t// box node", reg_str, offset);
 1949 }
 1950 #endif
 1951 
 1952 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1953   C2_MacroAssembler _masm(&cbuf);
 1954 
 1955   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1956   int reg    = ra_->get_encode(this);
 1957 
 1958   if (Assembler::is_simm(offset, 16)) {
 1959     __ addi(as_Register(reg), R1, offset);
 1960   } else {
 1961     ShouldNotReachHere();
 1962   }
 1963 }
 1964 
 1965 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1966   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 1967   return 4;
 1968 }
 1969 
 1970 #ifndef PRODUCT
 1971 void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1972   st->print_cr("---- MachUEPNode ----");
 1973   st->print_cr("...");
 1974 }
 1975 #endif
 1976 
 1977 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1978   // This is the unverified entry point.
 1979   C2_MacroAssembler _masm(&cbuf);
 1980 
 1981   // Inline_cache contains a klass.
 1982   Register ic_klass       = as_Register(Matcher::inline_cache_reg_encode());
 1983   Register receiver_klass = R12_scratch2;  // tmp
 1984 
 1985   assert_different_registers(ic_klass, receiver_klass, R11_scratch1, R3_ARG1);
 1986   assert(R11_scratch1 == R11, "need prologue scratch register");
 1987 
 1988   // Check for NULL argument if we don't have implicit null checks.
 1989   if (!ImplicitNullChecks || !os::zero_page_read_protected()) {
 1990     if (TrapBasedNullChecks) {
 1991       __ trap_null_check(R3_ARG1);
 1992     } else {
 1993       Label valid;
 1994       __ cmpdi(CCR0, R3_ARG1, 0);
 1995       __ bne_predict_taken(CCR0, valid);
 1996       // We have a null argument, branch to ic_miss_stub.
 1997       __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
 1998                            relocInfo::runtime_call_type);
 1999       __ bind(valid);
 2000     }
 2001   }
 2002   // Assume argument is not NULL, load klass from receiver.
 2003   __ load_klass(receiver_klass, R3_ARG1);
 2004 
 2005   if (TrapBasedICMissChecks) {
 2006     __ trap_ic_miss_check(receiver_klass, ic_klass);
 2007   } else {
 2008     Label valid;
 2009     __ cmpd(CCR0, receiver_klass, ic_klass);
 2010     __ beq_predict_taken(CCR0, valid);
 2011     // We have an unexpected klass, branch to ic_miss_stub.
 2012     __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
 2013                          relocInfo::runtime_call_type);
 2014     __ bind(valid);
 2015   }
 2016 
 2017   // Argument is valid and klass is as expected, continue.
 2018 }
 2019 
 2020 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 2021   // Variable size. Determine dynamically.
 2022   return MachNode::size(ra_);
 2023 }
 2024 
 2025 //=============================================================================
 2026 
 2027 %} // interrupt source
 2028 
 2029 source_hpp %{ // Header information of the source block.
 2030 
 2031 class HandlerImpl {
 2032 
 2033  public:
 2034 
 2035   static int emit_exception_handler(CodeBuffer &cbuf);
 2036   static int emit_deopt_handler(CodeBuffer& cbuf);
 2037 
 2038   static uint size_exception_handler() {
 2039     // The exception_handler is a b64_patchable.
 2040     return MacroAssembler::b64_patchable_size;
 2041   }
 2042 
 2043   static uint size_deopt_handler() {
 2044     // The deopt_handler is a bl64_patchable.
 2045     return MacroAssembler::bl64_patchable_size;
 2046   }
 2047 
 2048 };
 2049 
 2050 class Node::PD {
 2051 public:
 2052   enum NodeFlags {
 2053     _last_flag = Node::_last_flag
 2054   };
 2055 };
 2056 
 2057 %} // end source_hpp
 2058 
 2059 source %{
 2060 
 2061 int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
 2062   C2_MacroAssembler _masm(&cbuf);
 2063 
 2064   address base = __ start_a_stub(size_exception_handler());
 2065   if (base == NULL) return 0; // CodeBuffer::expand failed
 2066 
 2067   int offset = __ offset();
 2068   __ b64_patchable((address)OptoRuntime::exception_blob()->content_begin(),
 2069                        relocInfo::runtime_call_type);
 2070   assert(__ offset() - offset == (int)size_exception_handler(), "must be fixed size");
 2071   __ end_a_stub();
 2072 
 2073   return offset;
 2074 }
 2075 
 2076 // The deopt_handler is like the exception handler, but it calls to
 2077 // the deoptimization blob instead of jumping to the exception blob.
 2078 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
 2079   C2_MacroAssembler _masm(&cbuf);
 2080 
 2081   address base = __ start_a_stub(size_deopt_handler());
 2082   if (base == NULL) return 0; // CodeBuffer::expand failed
 2083 
 2084   int offset = __ offset();
 2085   __ bl64_patchable((address)SharedRuntime::deopt_blob()->unpack(),
 2086                         relocInfo::runtime_call_type);
 2087   assert(__ offset() - offset == (int) size_deopt_handler(), "must be fixed size");
 2088   __ end_a_stub();
 2089 
 2090   return offset;
 2091 }
 2092 
 2093 //=============================================================================
 2094 
 2095 // Use a frame slots bias for frameless methods if accessing the stack.
 2096 static int frame_slots_bias(int reg_enc, PhaseRegAlloc* ra_) {
 2097   if (as_Register(reg_enc) == R1_SP) {
 2098     return 0; // TODO: PPC port ra_->C->frame_slots_sp_bias_in_bytes();
 2099   }
 2100   return 0;
 2101 }
 2102 
 2103 const bool Matcher::match_rule_supported(int opcode) {
 2104   if (!has_match_rule(opcode)) {
 2105     return false; // no match rule present
 2106   }
 2107 
 2108   switch (opcode) {
 2109     case Op_SqrtD:
 2110       return VM_Version::has_fsqrt();
 2111     case Op_RoundDoubleMode:
 2112       return VM_Version::has_vsx();
 2113     case Op_CountLeadingZerosI:
 2114     case Op_CountLeadingZerosL:
 2115       return UseCountLeadingZerosInstructionsPPC64;
 2116     case Op_CountTrailingZerosI:
 2117     case Op_CountTrailingZerosL:
 2118       return (UseCountLeadingZerosInstructionsPPC64 || UseCountTrailingZerosInstructionsPPC64);
 2119     case Op_PopCountI:
 2120     case Op_PopCountL:
 2121       return (UsePopCountInstruction && VM_Version::has_popcntw());
 2122 
 2123     case Op_AddVB:
 2124     case Op_AddVS:
 2125     case Op_AddVI:
 2126     case Op_AddVF:
 2127     case Op_AddVD:
 2128     case Op_SubVB:
 2129     case Op_SubVS:
 2130     case Op_SubVI:
 2131     case Op_SubVF:
 2132     case Op_SubVD:
 2133     case Op_MulVS:
 2134     case Op_MulVF:
 2135     case Op_MulVD:
 2136     case Op_DivVF:
 2137     case Op_DivVD:
 2138     case Op_AbsVF:
 2139     case Op_AbsVD:
 2140     case Op_NegVF:
 2141     case Op_NegVD:
 2142     case Op_SqrtVF:
 2143     case Op_SqrtVD:
 2144     case Op_AddVL:
 2145     case Op_SubVL:
 2146     case Op_MulVI:
 2147     case Op_RoundDoubleModeV:
 2148       return SuperwordUseVSX;
 2149     case Op_PopCountVI:
 2150       return (SuperwordUseVSX && UsePopCountInstruction);
 2151     case Op_FmaVF:
 2152     case Op_FmaVD:
 2153       return (SuperwordUseVSX && UseFMA);
 2154 
 2155     case Op_Digit:
 2156       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isDigit);
 2157     case Op_LowerCase:
 2158       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isLowerCase);
 2159     case Op_UpperCase:
 2160       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isUpperCase);
 2161     case Op_Whitespace:
 2162       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isWhitespace);
 2163 
 2164     case Op_CacheWB:
 2165     case Op_CacheWBPreSync:
 2166     case Op_CacheWBPostSync:
 2167       return VM_Version::supports_data_cache_line_flush();
 2168   }
 2169 
 2170   return true; // Per default match rules are supported.
 2171 }
 2172 
 2173 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 2174   if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
 2175     return false;
 2176   }
 2177   return true; // Per default match rules are supported.
 2178 }
 2179 
 2180 const RegMask* Matcher::predicate_reg_mask(void) {
 2181   return NULL;
 2182 }
 2183 
 2184 const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
 2185   return NULL;
 2186 }
 2187 
 2188 // Vector calling convention not yet implemented.
 2189 const bool Matcher::supports_vector_calling_convention(void) {
 2190   return false;
 2191 }
 2192 
 2193 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2194   Unimplemented();
 2195   return OptoRegPair(0, 0);
 2196 }
 2197 
 2198 // Vector width in bytes.
 2199 const int Matcher::vector_width_in_bytes(BasicType bt) {
 2200   if (SuperwordUseVSX) {
 2201     assert(MaxVectorSize == 16, "");
 2202     return 16;
 2203   } else {
 2204     assert(MaxVectorSize == 8, "");
 2205     return 8;
 2206   }
 2207 }
 2208 
 2209 // Vector ideal reg.
 2210 const uint Matcher::vector_ideal_reg(int size) {
 2211   if (SuperwordUseVSX) {
 2212     assert(MaxVectorSize == 16 && size == 16, "");
 2213     return Op_VecX;
 2214   } else {
 2215     assert(MaxVectorSize == 8 && size == 8, "");
 2216     return Op_RegL;
 2217   }
 2218 }
 2219 
 2220 // Limits on vector size (number of elements) loaded into vector.
 2221 const int Matcher::max_vector_size(const BasicType bt) {
 2222   assert(is_java_primitive(bt), "only primitive type vectors");
 2223   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 2224 }
 2225 
 2226 const int Matcher::min_vector_size(const BasicType bt) {
 2227   return max_vector_size(bt); // Same as max.
 2228 }
 2229 
 2230 const int Matcher::scalable_vector_reg_size(const BasicType bt) {
 2231   return -1;
 2232 }
 2233 
 2234 // RETURNS: whether this branch offset is short enough that a short
 2235 // branch can be used.
 2236 //
 2237 // If the platform does not provide any short branch variants, then
 2238 // this method should return `false' for offset 0.
 2239 //
 2240 // `Compile::Fill_buffer' will decide on basis of this information
 2241 // whether to do the pass `Compile::Shorten_branches' at all.
 2242 //
 2243 // And `Compile::Shorten_branches' will decide on basis of this
 2244 // information whether to replace particular branch sites by short
 2245 // ones.
 2246 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2247   // Is the offset within the range of a ppc64 pc relative branch?
 2248   bool b;
 2249 
 2250   const int safety_zone = 3 * BytesPerInstWord;
 2251   b = Assembler::is_simm((offset<0 ? offset-safety_zone : offset+safety_zone),
 2252                          29 - 16 + 1 + 2);
 2253   return b;
 2254 }
 2255 
 2256 /* TODO: PPC port
 2257 // Make a new machine dependent decode node (with its operands).
 2258 MachTypeNode *Matcher::make_decode_node() {
 2259   assert(CompressedOops::base() == NULL && CompressedOops::shift() == 0,
 2260          "This method is only implemented for unscaled cOops mode so far");
 2261   MachTypeNode *decode = new decodeN_unscaledNode();
 2262   decode->set_opnd_array(0, new iRegPdstOper());
 2263   decode->set_opnd_array(1, new iRegNsrcOper());
 2264   return decode;
 2265 }
 2266 */
 2267 
 2268 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
 2269   ShouldNotReachHere(); // generic vector operands not supported
 2270   return NULL;
 2271 }
 2272 
 2273 bool Matcher::is_reg2reg_move(MachNode* m) {
 2274   ShouldNotReachHere();  // generic vector operands not supported
 2275   return false;
 2276 }
 2277 
 2278 bool Matcher::is_generic_vector(MachOper* opnd)  {
 2279   ShouldNotReachHere();  // generic vector operands not supported
 2280   return false;
 2281 }
 2282 
 2283 // Constants for c2c and c calling conventions.
 2284 
 2285 const MachRegisterNumbers iarg_reg[8] = {
 2286   R3_num, R4_num, R5_num, R6_num,
 2287   R7_num, R8_num, R9_num, R10_num
 2288 };
 2289 
 2290 const MachRegisterNumbers farg_reg[13] = {
 2291   F1_num, F2_num, F3_num, F4_num,
 2292   F5_num, F6_num, F7_num, F8_num,
 2293   F9_num, F10_num, F11_num, F12_num,
 2294   F13_num
 2295 };
 2296 
 2297 const MachRegisterNumbers vsarg_reg[64] = {
 2298   VSR0_num, VSR1_num, VSR2_num, VSR3_num,
 2299   VSR4_num, VSR5_num, VSR6_num, VSR7_num,
 2300   VSR8_num, VSR9_num, VSR10_num, VSR11_num,
 2301   VSR12_num, VSR13_num, VSR14_num, VSR15_num,
 2302   VSR16_num, VSR17_num, VSR18_num, VSR19_num,
 2303   VSR20_num, VSR21_num, VSR22_num, VSR23_num,
 2304   VSR24_num, VSR23_num, VSR24_num, VSR25_num,
 2305   VSR28_num, VSR29_num, VSR30_num, VSR31_num,
 2306   VSR32_num, VSR33_num, VSR34_num, VSR35_num,
 2307   VSR36_num, VSR37_num, VSR38_num, VSR39_num,
 2308   VSR40_num, VSR41_num, VSR42_num, VSR43_num,
 2309   VSR44_num, VSR45_num, VSR46_num, VSR47_num,
 2310   VSR48_num, VSR49_num, VSR50_num, VSR51_num,
 2311   VSR52_num, VSR53_num, VSR54_num, VSR55_num,
 2312   VSR56_num, VSR57_num, VSR58_num, VSR59_num,
 2313   VSR60_num, VSR61_num, VSR62_num, VSR63_num
 2314 };
 2315 
 2316 const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);
 2317 
 2318 const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);
 2319 
 2320 const int num_vsarg_registers = sizeof(vsarg_reg) / sizeof(vsarg_reg[0]);
 2321 
 2322 // Return whether or not this register is ever used as an argument. This
 2323 // function is used on startup to build the trampoline stubs in generateOptoStub.
 2324 // Registers not mentioned will be killed by the VM call in the trampoline, and
 2325 // arguments in those registers not be available to the callee.
 2326 bool Matcher::can_be_java_arg(int reg) {
 2327   // We return true for all registers contained in iarg_reg[] and
 2328   // farg_reg[] and their virtual halves.
 2329   // We must include the virtual halves in order to get STDs and LDs
 2330   // instead of STWs and LWs in the trampoline stubs.
 2331 
 2332   if (   reg == R3_num  || reg == R3_H_num
 2333       || reg == R4_num  || reg == R4_H_num
 2334       || reg == R5_num  || reg == R5_H_num
 2335       || reg == R6_num  || reg == R6_H_num
 2336       || reg == R7_num  || reg == R7_H_num
 2337       || reg == R8_num  || reg == R8_H_num
 2338       || reg == R9_num  || reg == R9_H_num
 2339       || reg == R10_num || reg == R10_H_num)
 2340     return true;
 2341 
 2342   if (   reg == F1_num  || reg == F1_H_num
 2343       || reg == F2_num  || reg == F2_H_num
 2344       || reg == F3_num  || reg == F3_H_num
 2345       || reg == F4_num  || reg == F4_H_num
 2346       || reg == F5_num  || reg == F5_H_num
 2347       || reg == F6_num  || reg == F6_H_num
 2348       || reg == F7_num  || reg == F7_H_num
 2349       || reg == F8_num  || reg == F8_H_num
 2350       || reg == F9_num  || reg == F9_H_num
 2351       || reg == F10_num || reg == F10_H_num
 2352       || reg == F11_num || reg == F11_H_num
 2353       || reg == F12_num || reg == F12_H_num
 2354       || reg == F13_num || reg == F13_H_num)
 2355     return true;
 2356 
 2357   return false;
 2358 }
 2359 
 2360 bool Matcher::is_spillable_arg(int reg) {
 2361   return can_be_java_arg(reg);
 2362 }
 2363 
 2364 uint Matcher::int_pressure_limit()
 2365 {
 2366   return (INTPRESSURE == -1) ? 26 : INTPRESSURE;
 2367 }
 2368 
 2369 uint Matcher::float_pressure_limit()
 2370 {
 2371   return (FLOATPRESSURE == -1) ? 28 : FLOATPRESSURE;
 2372 }
 2373 
 2374 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
 2375   return false;
 2376 }
 2377 
 2378 // Register for DIVI projection of divmodI.
 2379 RegMask Matcher::divI_proj_mask() {
 2380   ShouldNotReachHere();
 2381   return RegMask();
 2382 }
 2383 
 2384 // Register for MODI projection of divmodI.
 2385 RegMask Matcher::modI_proj_mask() {
 2386   ShouldNotReachHere();
 2387   return RegMask();
 2388 }
 2389 
 2390 // Register for DIVL projection of divmodL.
 2391 RegMask Matcher::divL_proj_mask() {
 2392   ShouldNotReachHere();
 2393   return RegMask();
 2394 }
 2395 
 2396 // Register for MODL projection of divmodL.
 2397 RegMask Matcher::modL_proj_mask() {
 2398   ShouldNotReachHere();
 2399   return RegMask();
 2400 }
 2401 
 2402 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 2403   return RegMask();
 2404 }
 2405 
 2406 %}
 2407 
 2408 //----------ENCODING BLOCK-----------------------------------------------------
 2409 // This block specifies the encoding classes used by the compiler to output
 2410 // byte streams. Encoding classes are parameterized macros used by
 2411 // Machine Instruction Nodes in order to generate the bit encoding of the
 2412 // instruction. Operands specify their base encoding interface with the
 2413 // interface keyword. There are currently supported four interfaces,
 2414 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
 2415 // operand to generate a function which returns its register number when
 2416 // queried. CONST_INTER causes an operand to generate a function which
 2417 // returns the value of the constant when queried. MEMORY_INTER causes an
 2418 // operand to generate four functions which return the Base Register, the
 2419 // Index Register, the Scale Value, and the Offset Value of the operand when
 2420 // queried. COND_INTER causes an operand to generate six functions which
 2421 // return the encoding code (ie - encoding bits for the instruction)
 2422 // associated with each basic boolean condition for a conditional instruction.
 2423 //
 2424 // Instructions specify two basic values for encoding. Again, a function
 2425 // is available to check if the constant displacement is an oop. They use the
 2426 // ins_encode keyword to specify their encoding classes (which must be
 2427 // a sequence of enc_class names, and their parameters, specified in
 2428 // the encoding block), and they use the
 2429 // opcode keyword to specify, in order, their primary, secondary, and
 2430 // tertiary opcode. Only the opcode sections which a particular instruction
 2431 // needs for encoding need to be specified.
 2432 encode %{
 2433   enc_class enc_unimplemented %{
 2434     C2_MacroAssembler _masm(&cbuf);
 2435     __ unimplemented("Unimplemented mach node encoding in AD file.", 13);
 2436   %}
 2437 
 2438   enc_class enc_untested %{
 2439 #ifdef ASSERT
 2440     C2_MacroAssembler _masm(&cbuf);
 2441     __ untested("Untested mach node encoding in AD file.");
 2442 #else
 2443 #endif
 2444   %}
 2445 
 2446   enc_class enc_lbz(iRegIdst dst, memory mem) %{
 2447     C2_MacroAssembler _masm(&cbuf);
 2448     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2449     __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
 2450   %}
 2451 
 2452   // Load acquire.
 2453   enc_class enc_lbz_ac(iRegIdst dst, memory mem) %{
 2454     C2_MacroAssembler _masm(&cbuf);
 2455     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2456     __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
 2457     __ twi_0($dst$$Register);
 2458     __ isync();
 2459   %}
 2460 
 2461   enc_class enc_lhz(iRegIdst dst, memory mem) %{
 2462 
 2463     C2_MacroAssembler _masm(&cbuf);
 2464     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2465     __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
 2466   %}
 2467 
 2468   // Load acquire.
 2469   enc_class enc_lhz_ac(iRegIdst dst, memory mem) %{
 2470 
 2471     C2_MacroAssembler _masm(&cbuf);
 2472     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2473     __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
 2474     __ twi_0($dst$$Register);
 2475     __ isync();
 2476   %}
 2477 
 2478   enc_class enc_lwz(iRegIdst dst, memory mem) %{
 2479 
 2480     C2_MacroAssembler _masm(&cbuf);
 2481     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2482     __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
 2483   %}
 2484 
 2485   // Load acquire.
 2486   enc_class enc_lwz_ac(iRegIdst dst, memory mem) %{
 2487 
 2488     C2_MacroAssembler _masm(&cbuf);
 2489     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2490     __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
 2491     __ twi_0($dst$$Register);
 2492     __ isync();
 2493   %}
 2494 
 2495   enc_class enc_ld(iRegLdst dst, memoryAlg4 mem) %{
 2496     C2_MacroAssembler _masm(&cbuf);
 2497     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2498     // Operand 'ds' requires 4-alignment.
 2499     assert((Idisp & 0x3) == 0, "unaligned offset");
 2500     __ ld($dst$$Register, Idisp, $mem$$base$$Register);
 2501   %}
 2502 
 2503   // Load acquire.
 2504   enc_class enc_ld_ac(iRegLdst dst, memoryAlg4 mem) %{
 2505     C2_MacroAssembler _masm(&cbuf);
 2506     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2507     // Operand 'ds' requires 4-alignment.
 2508     assert((Idisp & 0x3) == 0, "unaligned offset");
 2509     __ ld($dst$$Register, Idisp, $mem$$base$$Register);
 2510     __ twi_0($dst$$Register);
 2511     __ isync();
 2512   %}
 2513 
 2514   enc_class enc_lfd(RegF dst, memory mem) %{
 2515     C2_MacroAssembler _masm(&cbuf);
 2516     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2517     __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 2518   %}
 2519 
 2520   enc_class enc_load_long_constL(iRegLdst dst, immL src, iRegLdst toc) %{
 2521 
 2522     C2_MacroAssembler _masm(&cbuf);
 2523     int toc_offset = 0;
 2524 
 2525     address const_toc_addr;
 2526     // Create a non-oop constant, no relocation needed.
 2527     // If it is an IC, it has a virtual_call_Relocation.
 2528     const_toc_addr = __ long_constant((jlong)$src$$constant);
 2529     if (const_toc_addr == NULL) {
 2530       ciEnv::current()->record_out_of_memory_failure();
 2531       return;
 2532     }
 2533 
 2534     // Get the constant's TOC offset.
 2535     toc_offset = __ offset_to_method_toc(const_toc_addr);
 2536 
 2537     // Keep the current instruction offset in mind.
 2538     ((loadConLNode*)this)->_cbuf_insts_offset = __ offset();
 2539 
 2540     __ ld($dst$$Register, toc_offset, $toc$$Register);
 2541   %}
 2542 
 2543   enc_class enc_load_long_constL_hi(iRegLdst dst, iRegLdst toc, immL src) %{
 2544 
 2545     C2_MacroAssembler _masm(&cbuf);
 2546 
 2547     if (!ra_->C->output()->in_scratch_emit_size()) {
 2548       address const_toc_addr;
 2549       // Create a non-oop constant, no relocation needed.
 2550       // If it is an IC, it has a virtual_call_Relocation.
 2551       const_toc_addr = __ long_constant((jlong)$src$$constant);
 2552       if (const_toc_addr == NULL) {
 2553         ciEnv::current()->record_out_of_memory_failure();
 2554         return;
 2555       }
 2556 
 2557       // Get the constant's TOC offset.
 2558       const int toc_offset = __ offset_to_method_toc(const_toc_addr);
 2559       // Store the toc offset of the constant.
 2560       ((loadConL_hiNode*)this)->_const_toc_offset = toc_offset;
 2561 
 2562       // Also keep the current instruction offset in mind.
 2563       ((loadConL_hiNode*)this)->_cbuf_insts_offset = __ offset();
 2564     }
 2565 
 2566     __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset));
 2567   %}
 2568 
 2569 %} // encode
 2570 
 2571 source %{
 2572 
 2573 typedef struct {
 2574   loadConL_hiNode *_large_hi;
 2575   loadConL_loNode *_large_lo;
 2576   loadConLNode    *_small;
 2577   MachNode        *_last;
 2578 } loadConLNodesTuple;
 2579 
 2580 loadConLNodesTuple loadConLNodesTuple_create(PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
 2581                                              OptoReg::Name reg_second, OptoReg::Name reg_first) {
 2582   loadConLNodesTuple nodes;
 2583 
 2584   const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2585   if (large_constant_pool) {
 2586     // Create new nodes.
 2587     loadConL_hiNode *m1 = new loadConL_hiNode();
 2588     loadConL_loNode *m2 = new loadConL_loNode();
 2589 
 2590     // inputs for new nodes
 2591     m1->add_req(NULL, toc);
 2592     m2->add_req(NULL, m1);
 2593 
 2594     // operands for new nodes
 2595     m1->_opnds[0] = new iRegLdstOper(); // dst
 2596     m1->_opnds[1] = immSrc;             // src
 2597     m1->_opnds[2] = new iRegPdstOper(); // toc
 2598     m2->_opnds[0] = new iRegLdstOper(); // dst
 2599     m2->_opnds[1] = immSrc;             // src
 2600     m2->_opnds[2] = new iRegLdstOper(); // base
 2601 
 2602     // Initialize ins_attrib TOC fields.
 2603     m1->_const_toc_offset = -1;
 2604     m2->_const_toc_offset_hi_node = m1;
 2605 
 2606     // Initialize ins_attrib instruction offset.
 2607     m1->_cbuf_insts_offset = -1;
 2608 
 2609     // register allocation for new nodes
 2610     ra_->set_pair(m1->_idx, reg_second, reg_first);
 2611     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2612 
 2613     // Create result.
 2614     nodes._large_hi = m1;
 2615     nodes._large_lo = m2;
 2616     nodes._small = NULL;
 2617     nodes._last = nodes._large_lo;
 2618     assert(m2->bottom_type()->isa_long(), "must be long");
 2619   } else {
 2620     loadConLNode *m2 = new loadConLNode();
 2621 
 2622     // inputs for new nodes
 2623     m2->add_req(NULL, toc);
 2624 
 2625     // operands for new nodes
 2626     m2->_opnds[0] = new iRegLdstOper(); // dst
 2627     m2->_opnds[1] = immSrc;             // src
 2628     m2->_opnds[2] = new iRegPdstOper(); // toc
 2629 
 2630     // Initialize ins_attrib instruction offset.
 2631     m2->_cbuf_insts_offset = -1;
 2632 
 2633     // register allocation for new nodes
 2634     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2635 
 2636     // Create result.
 2637     nodes._large_hi = NULL;
 2638     nodes._large_lo = NULL;
 2639     nodes._small = m2;
 2640     nodes._last = nodes._small;
 2641     assert(m2->bottom_type()->isa_long(), "must be long");
 2642   }
 2643 
 2644   return nodes;
 2645 }
 2646 
 2647 typedef struct {
 2648   loadConL_hiNode *_large_hi;
 2649   loadConL_loNode *_large_lo;
 2650   mtvsrdNode      *_moved;
 2651   xxspltdNode     *_replicated;
 2652   loadConLNode    *_small;
 2653   MachNode        *_last;
 2654 } loadConLReplicatedNodesTuple;
 2655 
 2656 loadConLReplicatedNodesTuple loadConLReplicatedNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
 2657                                                  vecXOper *dst, immI_0Oper *zero,
 2658                                                  OptoReg::Name reg_second, OptoReg::Name reg_first,
 2659                                                  OptoReg::Name reg_vec_second, OptoReg::Name reg_vec_first) {
 2660   loadConLReplicatedNodesTuple nodes;
 2661 
 2662   const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2663   if (large_constant_pool) {
 2664     // Create new nodes.
 2665     loadConL_hiNode *m1 = new  loadConL_hiNode();
 2666     loadConL_loNode *m2 = new  loadConL_loNode();
 2667     mtvsrdNode *m3 = new  mtvsrdNode();
 2668     xxspltdNode *m4 = new  xxspltdNode();
 2669 
 2670     // inputs for new nodes
 2671     m1->add_req(NULL, toc);
 2672     m2->add_req(NULL, m1);
 2673     m3->add_req(NULL, m2);
 2674     m4->add_req(NULL, m3);
 2675 
 2676     // operands for new nodes
 2677     m1->_opnds[0] = new  iRegLdstOper(); // dst
 2678     m1->_opnds[1] = immSrc;              // src
 2679     m1->_opnds[2] = new  iRegPdstOper(); // toc
 2680 
 2681     m2->_opnds[0] = new  iRegLdstOper(); // dst
 2682     m2->_opnds[1] = immSrc;              // src
 2683     m2->_opnds[2] = new  iRegLdstOper(); // base
 2684 
 2685     m3->_opnds[0] = new  vecXOper();     // dst
 2686     m3->_opnds[1] = new  iRegLdstOper(); // src
 2687 
 2688     m4->_opnds[0] = new  vecXOper();     // dst
 2689     m4->_opnds[1] = new  vecXOper();     // src
 2690     m4->_opnds[2] = zero;
 2691 
 2692     // Initialize ins_attrib TOC fields.
 2693     m1->_const_toc_offset = -1;
 2694     m2->_const_toc_offset_hi_node = m1;
 2695 
 2696     // Initialize ins_attrib instruction offset.
 2697     m1->_cbuf_insts_offset = -1;
 2698 
 2699     // register allocation for new nodes
 2700     ra_->set_pair(m1->_idx, reg_second, reg_first);
 2701     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2702     ra_->set1(m3->_idx, reg_second);
 2703     ra_->set2(m3->_idx, reg_vec_first);
 2704     ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
 2705 
 2706     // Create result.
 2707     nodes._large_hi = m1;
 2708     nodes._large_lo = m2;
 2709     nodes._moved = m3;
 2710     nodes._replicated = m4;
 2711     nodes._small = NULL;
 2712     nodes._last = nodes._replicated;
 2713     assert(m2->bottom_type()->isa_long(), "must be long");
 2714   } else {
 2715     loadConLNode *m2 = new  loadConLNode();
 2716     mtvsrdNode *m3 = new  mtvsrdNode();
 2717     xxspltdNode *m4 = new  xxspltdNode();
 2718 
 2719     // inputs for new nodes
 2720     m2->add_req(NULL, toc);
 2721 
 2722     // operands for new nodes
 2723     m2->_opnds[0] = new  iRegLdstOper(); // dst
 2724     m2->_opnds[1] = immSrc;              // src
 2725     m2->_opnds[2] = new  iRegPdstOper(); // toc
 2726 
 2727     m3->_opnds[0] = new  vecXOper();     // dst
 2728     m3->_opnds[1] = new  iRegLdstOper(); // src
 2729 
 2730     m4->_opnds[0] = new  vecXOper();     // dst
 2731     m4->_opnds[1] = new  vecXOper();     // src
 2732     m4->_opnds[2] = zero;
 2733 
 2734     // Initialize ins_attrib instruction offset.
 2735     m2->_cbuf_insts_offset = -1;
 2736     ra_->set1(m3->_idx, reg_second);
 2737     ra_->set2(m3->_idx, reg_vec_first);
 2738     ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
 2739 
 2740     // register allocation for new nodes
 2741     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2742 
 2743     // Create result.
 2744     nodes._large_hi = NULL;
 2745     nodes._large_lo = NULL;
 2746     nodes._small = m2;
 2747     nodes._moved = m3;
 2748     nodes._replicated = m4;
 2749     nodes._last = nodes._replicated;
 2750     assert(m2->bottom_type()->isa_long(), "must be long");
 2751   }
 2752 
 2753   return nodes;
 2754 }
 2755 
 2756 %} // source
 2757 
 2758 encode %{
 2759   // Postalloc expand emitter for loading a long constant from the method's TOC.
 2760   // Enc_class needed as consttanttablebase is not supported by postalloc
 2761   // expand.
 2762   enc_class postalloc_expand_load_long_constant(iRegLdst dst, immL src, iRegLdst toc) %{
 2763     // Create new nodes.
 2764     loadConLNodesTuple loadConLNodes =
 2765       loadConLNodesTuple_create(ra_, n_toc, op_src,
 2766                                 ra_->get_reg_second(this), ra_->get_reg_first(this));
 2767 
 2768     // Push new nodes.
 2769     if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi);
 2770     if (loadConLNodes._last)     nodes->push(loadConLNodes._last);
 2771 
 2772     // some asserts
 2773     assert(nodes->length() >= 1, "must have created at least 1 node");
 2774     assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
 2775   %}
 2776 
 2777   enc_class enc_load_long_constP(iRegLdst dst, immP src, iRegLdst toc) %{
 2778 
 2779     C2_MacroAssembler _masm(&cbuf);
 2780     int toc_offset = 0;
 2781 
 2782     intptr_t val = $src$$constant;
 2783     relocInfo::relocType constant_reloc = $src->constant_reloc();  // src
 2784     address const_toc_addr;
 2785     if (constant_reloc == relocInfo::oop_type) {
 2786       // Create an oop constant and a corresponding relocation.
 2787       AddressLiteral a = __ allocate_oop_address((jobject)val);
 2788       const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2789       __ relocate(a.rspec());
 2790     } else if (constant_reloc == relocInfo::metadata_type) {
 2791       AddressLiteral a = __ constant_metadata_address((Metadata *)val);
 2792       const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2793       __ relocate(a.rspec());
 2794     } else {
 2795       // Create a non-oop constant, no relocation needed.
 2796       const_toc_addr = __ long_constant((jlong)$src$$constant);
 2797     }
 2798 
 2799     if (const_toc_addr == NULL) {
 2800       ciEnv::current()->record_out_of_memory_failure();
 2801       return;
 2802     }
 2803     // Get the constant's TOC offset.
 2804     toc_offset = __ offset_to_method_toc(const_toc_addr);
 2805 
 2806     __ ld($dst$$Register, toc_offset, $toc$$Register);
 2807   %}
 2808 
 2809   enc_class enc_load_long_constP_hi(iRegLdst dst, immP src, iRegLdst toc) %{
 2810 
 2811     C2_MacroAssembler _masm(&cbuf);
 2812     if (!ra_->C->output()->in_scratch_emit_size()) {
 2813       intptr_t val = $src$$constant;
 2814       relocInfo::relocType constant_reloc = $src->constant_reloc();  // src
 2815       address const_toc_addr;
 2816       if (constant_reloc == relocInfo::oop_type) {
 2817         // Create an oop constant and a corresponding relocation.
 2818         AddressLiteral a = __ allocate_oop_address((jobject)val);
 2819         const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2820         __ relocate(a.rspec());
 2821       } else if (constant_reloc == relocInfo::metadata_type) {
 2822         AddressLiteral a = __ constant_metadata_address((Metadata *)val);
 2823         const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2824         __ relocate(a.rspec());
 2825       } else {  // non-oop pointers, e.g. card mark base, heap top
 2826         // Create a non-oop constant, no relocation needed.
 2827         const_toc_addr = __ long_constant((jlong)$src$$constant);
 2828       }
 2829 
 2830       if (const_toc_addr == NULL) {
 2831         ciEnv::current()->record_out_of_memory_failure();
 2832         return;
 2833       }
 2834       // Get the constant's TOC offset.
 2835       const int toc_offset = __ offset_to_method_toc(const_toc_addr);
 2836       // Store the toc offset of the constant.
 2837       ((loadConP_hiNode*)this)->_const_toc_offset = toc_offset;
 2838     }
 2839 
 2840     __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset));
 2841   %}
 2842 
 2843   // Postalloc expand emitter for loading a ptr constant from the method's TOC.
 2844   // Enc_class needed as consttanttablebase is not supported by postalloc
 2845   // expand.
 2846   enc_class postalloc_expand_load_ptr_constant(iRegPdst dst, immP src, iRegLdst toc) %{
 2847     const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2848     if (large_constant_pool) {
 2849       // Create new nodes.
 2850       loadConP_hiNode *m1 = new loadConP_hiNode();
 2851       loadConP_loNode *m2 = new loadConP_loNode();
 2852 
 2853       // inputs for new nodes
 2854       m1->add_req(NULL, n_toc);
 2855       m2->add_req(NULL, m1);
 2856 
 2857       // operands for new nodes
 2858       m1->_opnds[0] = new iRegPdstOper(); // dst
 2859       m1->_opnds[1] = op_src;             // src
 2860       m1->_opnds[2] = new iRegPdstOper(); // toc
 2861       m2->_opnds[0] = new iRegPdstOper(); // dst
 2862       m2->_opnds[1] = op_src;             // src
 2863       m2->_opnds[2] = new iRegLdstOper(); // base
 2864 
 2865       // Initialize ins_attrib TOC fields.
 2866       m1->_const_toc_offset = -1;
 2867       m2->_const_toc_offset_hi_node = m1;
 2868 
 2869       // Register allocation for new nodes.
 2870       ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2871       ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2872 
 2873       nodes->push(m1);
 2874       nodes->push(m2);
 2875       assert(m2->bottom_type()->isa_ptr(), "must be ptr");
 2876     } else {
 2877       loadConPNode *m2 = new loadConPNode();
 2878 
 2879       // inputs for new nodes
 2880       m2->add_req(NULL, n_toc);
 2881 
 2882       // operands for new nodes
 2883       m2->_opnds[0] = new iRegPdstOper(); // dst
 2884       m2->_opnds[1] = op_src;             // src
 2885       m2->_opnds[2] = new iRegPdstOper(); // toc
 2886 
 2887       // Register allocation for new nodes.
 2888       ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2889 
 2890       nodes->push(m2);
 2891       assert(m2->bottom_type()->isa_ptr(), "must be ptr");
 2892     }
 2893   %}
 2894 
 2895   // Enc_class needed as consttanttablebase is not supported by postalloc
 2896   // expand.
 2897   enc_class postalloc_expand_load_float_constant(regF dst, immF src, iRegLdst toc) %{
 2898     bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2899 
 2900     MachNode *m2;
 2901     if (large_constant_pool) {
 2902       m2 = new loadConFCompNode();
 2903     } else {
 2904       m2 = new loadConFNode();
 2905     }
 2906     // inputs for new nodes
 2907     m2->add_req(NULL, n_toc);
 2908 
 2909     // operands for new nodes
 2910     m2->_opnds[0] = op_dst;
 2911     m2->_opnds[1] = op_src;
 2912     m2->_opnds[2] = new iRegPdstOper(); // constanttablebase
 2913 
 2914     // register allocation for new nodes
 2915     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2916     nodes->push(m2);
 2917   %}
 2918 
 2919   // Enc_class needed as consttanttablebase is not supported by postalloc
 2920   // expand.
 2921   enc_class postalloc_expand_load_double_constant(regD dst, immD src, iRegLdst toc) %{
 2922     bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2923 
 2924     MachNode *m2;
 2925     if (large_constant_pool) {
 2926       m2 = new loadConDCompNode();
 2927     } else {
 2928       m2 = new loadConDNode();
 2929     }
 2930     // inputs for new nodes
 2931     m2->add_req(NULL, n_toc);
 2932 
 2933     // operands for new nodes
 2934     m2->_opnds[0] = op_dst;
 2935     m2->_opnds[1] = op_src;
 2936     m2->_opnds[2] = new iRegPdstOper(); // constanttablebase
 2937 
 2938     // register allocation for new nodes
 2939     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2940     nodes->push(m2);
 2941   %}
 2942 
 2943   enc_class enc_stw(iRegIsrc src, memory mem) %{
 2944     C2_MacroAssembler _masm(&cbuf);
 2945     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2946     __ stw($src$$Register, Idisp, $mem$$base$$Register);
 2947   %}
 2948 
 2949   enc_class enc_std(iRegIsrc src, memoryAlg4 mem) %{
 2950     C2_MacroAssembler _masm(&cbuf);
 2951     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2952     // Operand 'ds' requires 4-alignment.
 2953     assert((Idisp & 0x3) == 0, "unaligned offset");
 2954     __ std($src$$Register, Idisp, $mem$$base$$Register);
 2955   %}
 2956 
 2957   enc_class enc_stfs(RegF src, memory mem) %{
 2958     C2_MacroAssembler _masm(&cbuf);
 2959     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2960     __ stfs($src$$FloatRegister, Idisp, $mem$$base$$Register);
 2961   %}
 2962 
 2963   enc_class enc_stfd(RegF src, memory mem) %{
 2964     C2_MacroAssembler _masm(&cbuf);
 2965     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2966     __ stfd($src$$FloatRegister, Idisp, $mem$$base$$Register);
 2967   %}
 2968 
 2969   enc_class postalloc_expand_encode_oop(iRegNdst dst, iRegPdst src, flagsReg crx) %{
 2970 
 2971     if (VM_Version::has_isel()) {
 2972       // use isel instruction with Power 7
 2973       cmpP_reg_imm16Node *n_compare  = new cmpP_reg_imm16Node();
 2974       encodeP_subNode    *n_sub_base = new encodeP_subNode();
 2975       encodeP_shiftNode  *n_shift    = new encodeP_shiftNode();
 2976       cond_set_0_oopNode *n_cond_set = new cond_set_0_oopNode();
 2977 
 2978       n_compare->add_req(n_region, n_src);
 2979       n_compare->_opnds[0] = op_crx;
 2980       n_compare->_opnds[1] = op_src;
 2981       n_compare->_opnds[2] = new immL16Oper(0);
 2982 
 2983       n_sub_base->add_req(n_region, n_src);
 2984       n_sub_base->_opnds[0] = op_dst;
 2985       n_sub_base->_opnds[1] = op_src;
 2986       n_sub_base->_bottom_type = _bottom_type;
 2987 
 2988       n_shift->add_req(n_region, n_sub_base);
 2989       n_shift->_opnds[0] = op_dst;
 2990       n_shift->_opnds[1] = op_dst;
 2991       n_shift->_bottom_type = _bottom_type;
 2992 
 2993       n_cond_set->add_req(n_region, n_compare, n_shift);
 2994       n_cond_set->_opnds[0] = op_dst;
 2995       n_cond_set->_opnds[1] = op_crx;
 2996       n_cond_set->_opnds[2] = op_dst;
 2997       n_cond_set->_bottom_type = _bottom_type;
 2998 
 2999       ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 3000       ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3001       ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3002       ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3003 
 3004       nodes->push(n_compare);
 3005       nodes->push(n_sub_base);
 3006       nodes->push(n_shift);
 3007       nodes->push(n_cond_set);
 3008 
 3009     } else {
 3010       // before Power 7
 3011       moveRegNode        *n_move     = new moveRegNode();
 3012       cmpP_reg_imm16Node *n_compare  = new cmpP_reg_imm16Node();
 3013       encodeP_shiftNode  *n_shift    = new encodeP_shiftNode();
 3014       cond_sub_baseNode  *n_sub_base = new cond_sub_baseNode();
 3015 
 3016       n_move->add_req(n_region, n_src);
 3017       n_move->_opnds[0] = op_dst;
 3018       n_move->_opnds[1] = op_src;
 3019       ra_->set_oop(n_move, true); // Until here, 'n_move' still produces an oop.
 3020 
 3021       n_compare->add_req(n_region, n_src);
 3022       n_compare->add_prec(n_move);
 3023 
 3024       n_compare->_opnds[0] = op_crx;
 3025       n_compare->_opnds[1] = op_src;
 3026       n_compare->_opnds[2] = new immL16Oper(0);
 3027 
 3028       n_sub_base->add_req(n_region, n_compare, n_src);
 3029       n_sub_base->_opnds[0] = op_dst;
 3030       n_sub_base->_opnds[1] = op_crx;
 3031       n_sub_base->_opnds[2] = op_src;
 3032       n_sub_base->_bottom_type = _bottom_type;
 3033 
 3034       n_shift->add_req(n_region, n_sub_base);
 3035       n_shift->_opnds[0] = op_dst;
 3036       n_shift->_opnds[1] = op_dst;
 3037       n_shift->_bottom_type = _bottom_type;
 3038 
 3039       ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3040       ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 3041       ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3042       ra_->set_pair(n_move->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3043 
 3044       nodes->push(n_move);
 3045       nodes->push(n_compare);
 3046       nodes->push(n_sub_base);
 3047       nodes->push(n_shift);
 3048     }
 3049 
 3050     assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed.
 3051   %}
 3052 
 3053   enc_class postalloc_expand_encode_oop_not_null(iRegNdst dst, iRegPdst src) %{
 3054 
 3055     encodeP_subNode *n1 = new encodeP_subNode();
 3056     n1->add_req(n_region, n_src);
 3057     n1->_opnds[0] = op_dst;
 3058     n1->_opnds[1] = op_src;
 3059     n1->_bottom_type = _bottom_type;
 3060 
 3061     encodeP_shiftNode *n2 = new encodeP_shiftNode();
 3062     n2->add_req(n_region, n1);
 3063     n2->_opnds[0] = op_dst;
 3064     n2->_opnds[1] = op_dst;
 3065     n2->_bottom_type = _bottom_type;
 3066     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3067     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3068 
 3069     nodes->push(n1);
 3070     nodes->push(n2);
 3071     assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed.
 3072   %}
 3073 
 3074   enc_class postalloc_expand_decode_oop(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
 3075     decodeN_shiftNode *n_shift    = new decodeN_shiftNode();
 3076     cmpN_reg_imm0Node *n_compare  = new cmpN_reg_imm0Node();
 3077 
 3078     n_compare->add_req(n_region, n_src);
 3079     n_compare->_opnds[0] = op_crx;
 3080     n_compare->_opnds[1] = op_src;
 3081     n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR);
 3082 
 3083     n_shift->add_req(n_region, n_src);
 3084     n_shift->_opnds[0] = op_dst;
 3085     n_shift->_opnds[1] = op_src;
 3086     n_shift->_bottom_type = _bottom_type;
 3087 
 3088     if (VM_Version::has_isel()) {
 3089       // use isel instruction with Power 7
 3090 
 3091       decodeN_addNode *n_add_base = new decodeN_addNode();
 3092       n_add_base->add_req(n_region, n_shift);
 3093       n_add_base->_opnds[0] = op_dst;
 3094       n_add_base->_opnds[1] = op_dst;
 3095       n_add_base->_bottom_type = _bottom_type;
 3096 
 3097       cond_set_0_ptrNode *n_cond_set = new cond_set_0_ptrNode();
 3098       n_cond_set->add_req(n_region, n_compare, n_add_base);
 3099       n_cond_set->_opnds[0] = op_dst;
 3100       n_cond_set->_opnds[1] = op_crx;
 3101       n_cond_set->_opnds[2] = op_dst;
 3102       n_cond_set->_bottom_type = _bottom_type;
 3103 
 3104       assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 3105       ra_->set_oop(n_cond_set, true);
 3106 
 3107       ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3108       ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 3109       ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3110       ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3111 
 3112       nodes->push(n_compare);
 3113       nodes->push(n_shift);
 3114       nodes->push(n_add_base);
 3115       nodes->push(n_cond_set);
 3116 
 3117     } else {
 3118       // before Power 7
 3119       cond_add_baseNode *n_add_base = new cond_add_baseNode();
 3120 
 3121       n_add_base->add_req(n_region, n_compare, n_shift);
 3122       n_add_base->_opnds[0] = op_dst;
 3123       n_add_base->_opnds[1] = op_crx;
 3124       n_add_base->_opnds[2] = op_dst;
 3125       n_add_base->_bottom_type = _bottom_type;
 3126 
 3127       assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 3128       ra_->set_oop(n_add_base, true);
 3129 
 3130       ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3131       ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 3132       ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3133 
 3134       nodes->push(n_compare);
 3135       nodes->push(n_shift);
 3136       nodes->push(n_add_base);
 3137     }
 3138   %}
 3139 
 3140   enc_class postalloc_expand_decode_oop_not_null(iRegPdst dst, iRegNsrc src) %{
 3141     decodeN_shiftNode *n1 = new decodeN_shiftNode();
 3142     n1->add_req(n_region, n_src);
 3143     n1->_opnds[0] = op_dst;
 3144     n1->_opnds[1] = op_src;
 3145     n1->_bottom_type = _bottom_type;
 3146 
 3147     decodeN_addNode *n2 = new decodeN_addNode();
 3148     n2->add_req(n_region, n1);
 3149     n2->_opnds[0] = op_dst;
 3150     n2->_opnds[1] = op_dst;
 3151     n2->_bottom_type = _bottom_type;
 3152     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3153     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3154 
 3155     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 3156     ra_->set_oop(n2, true);
 3157 
 3158     nodes->push(n1);
 3159     nodes->push(n2);
 3160   %}
 3161 
 3162   enc_class enc_cmove_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src, cmpOp cmp) %{
 3163 
 3164     C2_MacroAssembler _masm(&cbuf);
 3165     int cc        = $cmp$$cmpcode;
 3166     int flags_reg = $crx$$reg;
 3167     Label done;
 3168     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 3169     // Branch if not (cmp crx).
 3170     __ bc(cc_to_inverse_boint(cc), cc_to_biint(cc, flags_reg), done);
 3171     __ mr($dst$$Register, $src$$Register);
 3172     __ bind(done);
 3173   %}
 3174 
 3175   enc_class enc_cmove_imm(iRegIdst dst, flagsRegSrc crx, immI16 src, cmpOp cmp) %{
 3176 
 3177     C2_MacroAssembler _masm(&cbuf);
 3178     Label done;
 3179     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 3180     // Branch if not (cmp crx).
 3181     __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
 3182     __ li($dst$$Register, $src$$constant);
 3183     __ bind(done);
 3184   %}
 3185 
 3186   // This enc_class is needed so that scheduler gets proper
 3187   // input mapping for latency computation.
 3188   enc_class enc_andc(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 3189     C2_MacroAssembler _masm(&cbuf);
 3190     __ andc($dst$$Register, $src1$$Register, $src2$$Register);
 3191   %}
 3192 
 3193   enc_class enc_convI2B_regI__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
 3194 
 3195     C2_MacroAssembler _masm(&cbuf);
 3196 
 3197     Label done;
 3198     __ cmpwi($crx$$CondRegister, $src$$Register, 0);
 3199     __ li($dst$$Register, $zero$$constant);
 3200     __ beq($crx$$CondRegister, done);
 3201     __ li($dst$$Register, $notzero$$constant);
 3202     __ bind(done);
 3203   %}
 3204 
 3205   enc_class enc_convP2B_regP__cmove(iRegIdst dst, iRegPsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
 3206 
 3207     C2_MacroAssembler _masm(&cbuf);
 3208 
 3209     Label done;
 3210     __ cmpdi($crx$$CondRegister, $src$$Register, 0);
 3211     __ li($dst$$Register, $zero$$constant);
 3212     __ beq($crx$$CondRegister, done);
 3213     __ li($dst$$Register, $notzero$$constant);
 3214     __ bind(done);
 3215   %}
 3216 
 3217   enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL mem ) %{
 3218 
 3219     C2_MacroAssembler _masm(&cbuf);
 3220     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 3221     Label done;
 3222     __ bso($crx$$CondRegister, done);
 3223     __ ld($dst$$Register, Idisp, $mem$$base$$Register);
 3224     __ bind(done);
 3225   %}
 3226 
 3227   enc_class enc_cmove_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
 3228 
 3229     C2_MacroAssembler _masm(&cbuf);
 3230     Label done;
 3231     __ bso($crx$$CondRegister, done);
 3232     __ mffprd($dst$$Register, $src$$FloatRegister);
 3233     __ bind(done);
 3234   %}
 3235 
 3236   enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
 3237 
 3238     C2_MacroAssembler _masm(&cbuf);
 3239     Label d;   // dummy
 3240     __ bind(d);
 3241     Label* p = ($lbl$$label);
 3242     // `p' is `NULL' when this encoding class is used only to
 3243     // determine the size of the encoded instruction.
 3244     Label& l = (NULL == p)? d : *(p);
 3245     int cc = $cmp$$cmpcode;
 3246     int flags_reg = $crx$$reg;
 3247     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 3248     int bhint = Assembler::bhintNoHint;
 3249 
 3250     if (UseStaticBranchPredictionForUncommonPathsPPC64) {
 3251       if (_prob <= PROB_NEVER) {
 3252         bhint = Assembler::bhintIsNotTaken;
 3253       } else if (_prob >= PROB_ALWAYS) {
 3254         bhint = Assembler::bhintIsTaken;
 3255       }
 3256     }
 3257 
 3258     __ bc(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
 3259           cc_to_biint(cc, flags_reg),
 3260           l);
 3261   %}
 3262 
 3263   enc_class enc_bc_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
 3264     // The scheduler doesn't know about branch shortening, so we set the opcode
 3265     // to ppc64Opcode_bc in order to hide this detail from the scheduler.
 3266 
 3267     C2_MacroAssembler _masm(&cbuf);
 3268     Label d;    // dummy
 3269     __ bind(d);
 3270     Label* p = ($lbl$$label);
 3271     // `p' is `NULL' when this encoding class is used only to
 3272     // determine the size of the encoded instruction.
 3273     Label& l = (NULL == p)? d : *(p);
 3274     int cc = $cmp$$cmpcode;
 3275     int flags_reg = $crx$$reg;
 3276     int bhint = Assembler::bhintNoHint;
 3277 
 3278     if (UseStaticBranchPredictionForUncommonPathsPPC64) {
 3279       if (_prob <= PROB_NEVER) {
 3280         bhint = Assembler::bhintIsNotTaken;
 3281       } else if (_prob >= PROB_ALWAYS) {
 3282         bhint = Assembler::bhintIsTaken;
 3283       }
 3284     }
 3285 
 3286     // Tell the conditional far branch to optimize itself when being relocated.
 3287     __ bc_far(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
 3288                   cc_to_biint(cc, flags_reg),
 3289                   l,
 3290                   MacroAssembler::bc_far_optimize_on_relocate);
 3291   %}
 3292 
 3293   // Postalloc expand emitter for loading a replicatef float constant from
 3294   // the method's TOC.
 3295   // Enc_class needed as consttanttablebase is not supported by postalloc
 3296   // expand.
 3297   enc_class postalloc_expand_load_replF_constant(iRegLdst dst, immF src, iRegLdst toc) %{
 3298     // Create new nodes.
 3299 
 3300     // Make an operand with the bit pattern to load as float.
 3301     immLOper *op_repl = new immLOper((jlong)replicate_immF(op_src->constantF()));
 3302 
 3303     loadConLNodesTuple loadConLNodes =
 3304       loadConLNodesTuple_create(ra_, n_toc, op_repl,
 3305                                 ra_->get_reg_second(this), ra_->get_reg_first(this));
 3306 
 3307     // Push new nodes.
 3308     if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi);
 3309     if (loadConLNodes._last)     nodes->push(loadConLNodes._last);
 3310 
 3311     assert(nodes->length() >= 1, "must have created at least 1 node");
 3312     assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
 3313   %}
 3314 
 3315   enc_class postalloc_expand_load_replF_constant_vsx(vecX dst, immF src, iRegLdst toc, iRegLdst tmp) %{
 3316     // Create new nodes.
 3317 
 3318     // Make an operand with the bit pattern to load as float.
 3319     immLOper *op_repl = new  immLOper((jlong)replicate_immF(op_src->constantF()));
 3320     immI_0Oper *op_zero = new  immI_0Oper(0);
 3321 
 3322     loadConLReplicatedNodesTuple loadConLNodes =
 3323       loadConLReplicatedNodesTuple_create(C, ra_, n_toc, op_repl, op_dst, op_zero,
 3324                                 ra_->get_reg_second(n_tmp), ra_->get_reg_first(n_tmp),
 3325                                 ra_->get_reg_second(this), ra_->get_reg_first(this));
 3326 
 3327     // Push new nodes.
 3328     if (loadConLNodes._large_hi) { nodes->push(loadConLNodes._large_hi); }
 3329     if (loadConLNodes._large_lo) { nodes->push(loadConLNodes._large_lo); }
 3330     if (loadConLNodes._moved)    { nodes->push(loadConLNodes._moved); }
 3331     if (loadConLNodes._last)     { nodes->push(loadConLNodes._last); }
 3332 
 3333     assert(nodes->length() >= 1, "must have created at least 1 node");
 3334   %}
 3335 
 3336   // This enc_class is needed so that scheduler gets proper
 3337   // input mapping for latency computation.
 3338   enc_class enc_poll(immI dst, iRegLdst poll) %{
 3339     // Fake operand dst needed for PPC scheduler.
 3340     assert($dst$$constant == 0x0, "dst must be 0x0");
 3341 
 3342     C2_MacroAssembler _masm(&cbuf);
 3343     // Mark the code position where the load from the safepoint
 3344     // polling page was emitted as relocInfo::poll_type.
 3345     __ relocate(relocInfo::poll_type);
 3346     __ load_from_polling_page($poll$$Register);
 3347   %}
 3348 
 3349   // A Java static call or a runtime call.
 3350   //
 3351   // Branch-and-link relative to a trampoline.
 3352   // The trampoline loads the target address and does a long branch to there.
 3353   // In case we call java, the trampoline branches to a interpreter_stub
 3354   // which loads the inline cache and the real call target from the constant pool.
 3355   //
 3356   // This basically looks like this:
 3357   //
 3358   // >>>> consts      -+  -+
 3359   //                   |   |- offset1
 3360   // [call target1]    | <-+
 3361   // [IC cache]        |- offset2
 3362   // [call target2] <--+
 3363   //
 3364   // <<<< consts
 3365   // >>>> insts
 3366   //
 3367   // bl offset16               -+  -+             ??? // How many bits available?
 3368   //                            |   |
 3369   // <<<< insts                 |   |
 3370   // >>>> stubs                 |   |
 3371   //                            |   |- trampoline_stub_Reloc
 3372   // trampoline stub:           | <-+
 3373   //   r2 = toc                 |
 3374   //   r2 = [r2 + offset1]      |       // Load call target1 from const section
 3375   //   mtctr r2                 |
 3376   //   bctr                     |- static_stub_Reloc
 3377   // comp_to_interp_stub:   <---+
 3378   //   r1 = toc
 3379   //   ICreg = [r1 + IC_offset]         // Load IC from const section
 3380   //   r1    = [r1 + offset2]           // Load call target2 from const section
 3381   //   mtctr r1
 3382   //   bctr
 3383   //
 3384   // <<<< stubs
 3385   //
 3386   // The call instruction in the code either
 3387   // - Branches directly to a compiled method if the offset is encodable in instruction.
 3388   // - Branches to the trampoline stub if the offset to the compiled method is not encodable.
 3389   // - Branches to the compiled_to_interp stub if the target is interpreted.
 3390   //
 3391   // Further there are three relocations from the loads to the constants in
 3392   // the constant section.
 3393   //
 3394   // Usage of r1 and r2 in the stubs allows to distinguish them.
 3395   enc_class enc_java_static_call(method meth) %{
 3396 
 3397     C2_MacroAssembler _masm(&cbuf);
 3398     address entry_point = (address)$meth$$method;
 3399 
 3400     if (!_method) {
 3401       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
 3402       emit_call_with_trampoline_stub(_masm, entry_point, relocInfo::runtime_call_type);
 3403     } else {
 3404       // Remember the offset not the address.
 3405       const int start_offset = __ offset();
 3406 
 3407       // The trampoline stub.
 3408       // No entry point given, use the current pc.
 3409       // Make sure branch fits into
 3410       if (entry_point == 0) entry_point = __ pc();
 3411 
 3412       // Put the entry point as a constant into the constant pool.
 3413       const address entry_point_toc_addr = __ address_constant(entry_point, RelocationHolder::none);
 3414       if (entry_point_toc_addr == NULL) {
 3415         ciEnv::current()->record_out_of_memory_failure();
 3416         return;
 3417       }
 3418       const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
 3419 
 3420       // Emit the trampoline stub which will be related to the branch-and-link below.
 3421       CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset);
 3422       if (ciEnv::current()->failing()) { return; } // Code cache may be full.
 3423       int method_index = resolved_method_index(cbuf);
 3424       __ relocate(_optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 3425                   : static_call_Relocation::spec(method_index));
 3426 
 3427       // The real call.
 3428       // Note: At this point we do not have the address of the trampoline
 3429       // stub, and the entry point might be too far away for bl, so __ pc()
 3430       // serves as dummy and the bl will be patched later.
 3431       cbuf.set_insts_mark();
 3432       __ bl(__ pc());  // Emits a relocation.
 3433 
 3434       // The stub for call to interpreter.
 3435       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 3436       if (stub == NULL) {
 3437         ciEnv::current()->record_failure("CodeCache is full");
 3438         return;
 3439       }
 3440     }
 3441   %}
 3442 
 3443   // Second node of expanded dynamic call - the call.
 3444   enc_class enc_java_dynamic_call_sched(method meth) %{
 3445 
 3446     C2_MacroAssembler _masm(&cbuf);
 3447 
 3448     if (!ra_->C->output()->in_scratch_emit_size()) {
 3449       // Create a call trampoline stub for the given method.
 3450       const address entry_point = !($meth$$method) ? 0 : (address)$meth$$method;
 3451       const address entry_point_const = __ address_constant(entry_point, RelocationHolder::none);
 3452       if (entry_point_const == NULL) {
 3453         ciEnv::current()->record_out_of_memory_failure();
 3454         return;
 3455       }
 3456       const int entry_point_const_toc_offset = __ offset_to_method_toc(entry_point_const);
 3457       CallStubImpl::emit_trampoline_stub(_masm, entry_point_const_toc_offset, __ offset());
 3458       if (ra_->C->env()->failing()) { return; } // Code cache may be full.
 3459 
 3460       // Build relocation at call site with ic position as data.
 3461       assert((_load_ic_hi_node != NULL && _load_ic_node == NULL) ||
 3462              (_load_ic_hi_node == NULL && _load_ic_node != NULL),
 3463              "must have one, but can't have both");
 3464       assert((_load_ic_hi_node != NULL && _load_ic_hi_node->_cbuf_insts_offset != -1) ||
 3465              (_load_ic_node != NULL    && _load_ic_node->_cbuf_insts_offset != -1),
 3466              "must contain instruction offset");
 3467       const int virtual_call_oop_addr_offset = _load_ic_hi_node != NULL
 3468         ? _load_ic_hi_node->_cbuf_insts_offset
 3469         : _load_ic_node->_cbuf_insts_offset;
 3470       const address virtual_call_oop_addr = __ addr_at(virtual_call_oop_addr_offset);
 3471       assert(MacroAssembler::is_load_const_from_method_toc_at(virtual_call_oop_addr),
 3472              "should be load from TOC");
 3473       int method_index = resolved_method_index(cbuf);
 3474       __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr, method_index));
 3475     }
 3476 
 3477     // At this point I do not have the address of the trampoline stub,
 3478     // and the entry point might be too far away for bl. Pc() serves
 3479     // as dummy and bl will be patched later.
 3480     __ bl((address) __ pc());
 3481   %}
 3482 
 3483   // postalloc expand emitter for virtual calls.
 3484   enc_class postalloc_expand_java_dynamic_call_sched(method meth, iRegLdst toc) %{
 3485 
 3486     // Create the nodes for loading the IC from the TOC.
 3487     loadConLNodesTuple loadConLNodes_IC =
 3488       loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong)Universe::non_oop_word()),
 3489                                 OptoReg::Name(R19_H_num), OptoReg::Name(R19_num));
 3490 
 3491     // Create the call node.
 3492     CallDynamicJavaDirectSchedNode *call = new CallDynamicJavaDirectSchedNode();
 3493     call->_method_handle_invoke = _method_handle_invoke;
 3494     call->_vtable_index      = _vtable_index;
 3495     call->_method            = _method;
 3496     call->_optimized_virtual = _optimized_virtual;
 3497     call->_tf                = _tf;
 3498     call->_entry_point       = _entry_point;
 3499     call->_cnt               = _cnt;
 3500     call->_guaranteed_safepoint = true;
 3501     call->_oop_map           = _oop_map;
 3502     call->_jvms              = _jvms;
 3503     call->_jvmadj            = _jvmadj;
 3504     call->_in_rms            = _in_rms;
 3505     call->_nesting           = _nesting;
 3506     call->_override_symbolic_info = _override_symbolic_info;
 3507 
 3508     // New call needs all inputs of old call.
 3509     // Req...
 3510     for (uint i = 0; i < req(); ++i) {
 3511       // The expanded node does not need toc any more.
 3512       // Add the inline cache constant here instead. This expresses the
 3513       // register of the inline cache must be live at the call.
 3514       // Else we would have to adapt JVMState by -1.
 3515       if (i == mach_constant_base_node_input()) {
 3516         call->add_req(loadConLNodes_IC._last);
 3517       } else {
 3518         call->add_req(in(i));
 3519       }
 3520     }
 3521     // ...as well as prec
 3522     for (uint i = req(); i < len(); ++i) {
 3523       call->add_prec(in(i));
 3524     }
 3525 
 3526     // Remember nodes loading the inline cache into r19.
 3527     call->_load_ic_hi_node = loadConLNodes_IC._large_hi;
 3528     call->_load_ic_node    = loadConLNodes_IC._small;
 3529 
 3530     // Operands for new nodes.
 3531     call->_opnds[0] = _opnds[0];
 3532     call->_opnds[1] = _opnds[1];
 3533 
 3534     // Only the inline cache is associated with a register.
 3535     assert(Matcher::inline_cache_reg() == OptoReg::Name(R19_num), "ic reg should be R19");
 3536 
 3537     // Push new nodes.
 3538     if (loadConLNodes_IC._large_hi) nodes->push(loadConLNodes_IC._large_hi);
 3539     if (loadConLNodes_IC._last)     nodes->push(loadConLNodes_IC._last);
 3540     nodes->push(call);
 3541   %}
 3542 
 3543   // Compound version of call dynamic
 3544   // Toc is only passed so that it can be used in ins_encode statement.
 3545   // In the code we have to use $constanttablebase.
 3546   enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{
 3547     C2_MacroAssembler _masm(&cbuf);
 3548     int start_offset = __ offset();
 3549 
 3550     Register Rtoc = (ra_) ? $constanttablebase : R2_TOC;
 3551 
 3552     int vtable_index = this->_vtable_index;
 3553     if (vtable_index < 0) {
 3554       // Must be invalid_vtable_index, not nonvirtual_vtable_index.
 3555       assert(vtable_index == Method::invalid_vtable_index, "correct sentinel value");
 3556       Register ic_reg = as_Register(Matcher::inline_cache_reg_encode());
 3557 
 3558       // Virtual call relocation will point to ic load.
 3559       address virtual_call_meta_addr = __ pc();
 3560       // Load a clear inline cache.
 3561       AddressLiteral empty_ic((address) Universe::non_oop_word());
 3562       bool success = __ load_const_from_method_toc(ic_reg, empty_ic, Rtoc, /*fixed_size*/ true);
 3563       if (!success) {
 3564         ciEnv::current()->record_out_of_memory_failure();
 3565         return;
 3566       }
 3567       // CALL to fixup routine.  Fixup routine uses ScopeDesc info
 3568       // to determine who we intended to call.
 3569       __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr));
 3570       emit_call_with_trampoline_stub(_masm, (address)$meth$$method, relocInfo::none);
 3571       assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
 3572              "Fix constant in ret_addr_offset(), expected %d", __ offset() - start_offset);
 3573     } else {
 3574       assert(!UseInlineCaches, "expect vtable calls only if not using ICs");
 3575       // Go thru the vtable. Get receiver klass. Receiver already
 3576       // checked for non-null. If we'll go thru a C2I adapter, the
 3577       // interpreter expects method in R19_method.
 3578 
 3579       __ load_klass(R11_scratch1, R3);
 3580 
 3581       int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes();
 3582       int v_off = entry_offset + vtableEntry::method_offset_in_bytes();
 3583       __ li(R19_method, v_off);
 3584       __ ldx(R19_method/*method*/, R19_method/*method offset*/, R11_scratch1/*class*/);
 3585       // NOTE: for vtable dispatches, the vtable entry will never be
 3586       // null. However it may very well end up in handle_wrong_method
 3587       // if the method is abstract for the particular class.
 3588       __ ld(R11_scratch1, in_bytes(Method::from_compiled_offset()), R19_method);
 3589       // Call target. Either compiled code or C2I adapter.
 3590       __ mtctr(R11_scratch1);
 3591       __ bctrl();
 3592       assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
 3593              "Fix constant in ret_addr_offset(), expected %d", __ offset() - start_offset);
 3594     }
 3595   %}
 3596 
 3597   // a runtime call
 3598   enc_class enc_java_to_runtime_call (method meth) %{
 3599 
 3600     C2_MacroAssembler _masm(&cbuf);
 3601     const address start_pc = __ pc();
 3602 
 3603 #if defined(ABI_ELFv2)
 3604     address entry= !($meth$$method) ? NULL : (address)$meth$$method;
 3605     __ call_c(entry, relocInfo::runtime_call_type);
 3606 #else
 3607     // The function we're going to call.
 3608     FunctionDescriptor fdtemp;
 3609     const FunctionDescriptor* fd = !($meth$$method) ? &fdtemp : (FunctionDescriptor*)$meth$$method;
 3610 
 3611     Register Rtoc = R12_scratch2;
 3612     // Calculate the method's TOC.
 3613     __ calculate_address_from_global_toc(Rtoc, __ method_toc());
 3614     // Put entry, env, toc into the constant pool, this needs up to 3 constant
 3615     // pool entries; call_c_using_toc will optimize the call.
 3616     bool success = __ call_c_using_toc(fd, relocInfo::runtime_call_type, Rtoc);
 3617     if (!success) {
 3618       ciEnv::current()->record_out_of_memory_failure();
 3619       return;
 3620     }
 3621 #endif
 3622 
 3623     // Check the ret_addr_offset.
 3624     assert(((MachCallRuntimeNode*)this)->ret_addr_offset() ==  __ last_calls_return_pc() - start_pc,
 3625            "Fix constant in ret_addr_offset()");
 3626   %}
 3627 
 3628   // Move to ctr for leaf call.
 3629   // This enc_class is needed so that scheduler gets proper
 3630   // input mapping for latency computation.
 3631   enc_class enc_leaf_call_mtctr(iRegLsrc src) %{
 3632     C2_MacroAssembler _masm(&cbuf);
 3633     __ mtctr($src$$Register);
 3634   %}
 3635 
 3636   // Postalloc expand emitter for runtime leaf calls.
 3637   enc_class postalloc_expand_java_to_runtime_call(method meth, iRegLdst toc) %{
 3638     loadConLNodesTuple loadConLNodes_Entry;
 3639 #if defined(ABI_ELFv2)
 3640     jlong entry_address = (jlong) this->entry_point();
 3641     assert(entry_address, "need address here");
 3642     loadConLNodes_Entry = loadConLNodesTuple_create(ra_, n_toc, new immLOper(entry_address),
 3643                                                     OptoReg::Name(R12_H_num), OptoReg::Name(R12_num));
 3644 #else
 3645     // Get the struct that describes the function we are about to call.
 3646     FunctionDescriptor* fd = (FunctionDescriptor*) this->entry_point();
 3647     assert(fd, "need fd here");
 3648     jlong entry_address = (jlong) fd->entry();
 3649     // new nodes
 3650     loadConLNodesTuple loadConLNodes_Env;
 3651     loadConLNodesTuple loadConLNodes_Toc;
 3652 
 3653     // Create nodes and operands for loading the entry point.
 3654     loadConLNodes_Entry = loadConLNodesTuple_create(ra_, n_toc, new immLOper(entry_address),
 3655                                                     OptoReg::Name(R12_H_num), OptoReg::Name(R12_num));
 3656 
 3657 
 3658     // Create nodes and operands for loading the env pointer.
 3659     if (fd->env() != NULL) {
 3660       loadConLNodes_Env = loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong) fd->env()),
 3661                                                     OptoReg::Name(R11_H_num), OptoReg::Name(R11_num));
 3662     } else {
 3663       loadConLNodes_Env._large_hi = NULL;
 3664       loadConLNodes_Env._large_lo = NULL;
 3665       loadConLNodes_Env._small    = NULL;
 3666       loadConLNodes_Env._last = new loadConL16Node();
 3667       loadConLNodes_Env._last->_opnds[0] = new iRegLdstOper();
 3668       loadConLNodes_Env._last->_opnds[1] = new immL16Oper(0);
 3669       ra_->set_pair(loadConLNodes_Env._last->_idx, OptoReg::Name(R11_H_num), OptoReg::Name(R11_num));
 3670     }
 3671 
 3672     // Create nodes and operands for loading the Toc point.
 3673     loadConLNodes_Toc = loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong) fd->toc()),
 3674                                                   OptoReg::Name(R2_H_num), OptoReg::Name(R2_num));
 3675 #endif // ABI_ELFv2
 3676     // mtctr node
 3677     MachNode *mtctr = new CallLeafDirect_mtctrNode();
 3678 
 3679     assert(loadConLNodes_Entry._last != NULL, "entry must exist");
 3680     mtctr->add_req(0, loadConLNodes_Entry._last);
 3681 
 3682     mtctr->_opnds[0] = new iRegLdstOper();
 3683     mtctr->_opnds[1] = new iRegLdstOper();
 3684 
 3685     // call node
 3686     MachCallLeafNode *call = new CallLeafDirectNode();
 3687 
 3688     call->_opnds[0] = _opnds[0];
 3689     call->_opnds[1] = new methodOper((intptr_t) entry_address); // May get set later.
 3690 
 3691     // Make the new call node look like the old one.
 3692     call->_name        = _name;
 3693     call->_tf          = _tf;
 3694     call->_entry_point = _entry_point;
 3695     call->_cnt         = _cnt;
 3696     call->_guaranteed_safepoint = false;
 3697     call->_oop_map     = _oop_map;
 3698     guarantee(!_jvms, "You must clone the jvms and adapt the offsets by fix_jvms().");
 3699     call->_jvms        = NULL;
 3700     call->_jvmadj      = _jvmadj;
 3701     call->_in_rms      = _in_rms;
 3702     call->_nesting     = _nesting;
 3703 
 3704     // New call needs all inputs of old call.
 3705     // Req...
 3706     for (uint i = 0; i < req(); ++i) {
 3707       if (i != mach_constant_base_node_input()) {
 3708         call->add_req(in(i));
 3709       }
 3710     }
 3711 
 3712     // These must be reqired edges, as the registers are live up to
 3713     // the call. Else the constants are handled as kills.
 3714     call->add_req(mtctr);
 3715 #if !defined(ABI_ELFv2)
 3716     call->add_req(loadConLNodes_Env._last);
 3717     call->add_req(loadConLNodes_Toc._last);
 3718 #endif
 3719 
 3720     // ...as well as prec
 3721     for (uint i = req(); i < len(); ++i) {
 3722       call->add_prec(in(i));
 3723     }
 3724 
 3725     // registers
 3726     ra_->set1(mtctr->_idx, OptoReg::Name(SR_CTR_num));
 3727 
 3728     // Insert the new nodes.
 3729     if (loadConLNodes_Entry._large_hi) nodes->push(loadConLNodes_Entry._large_hi);
 3730     if (loadConLNodes_Entry._last)     nodes->push(loadConLNodes_Entry._last);
 3731 #if !defined(ABI_ELFv2)
 3732     if (loadConLNodes_Env._large_hi)   nodes->push(loadConLNodes_Env._large_hi);
 3733     if (loadConLNodes_Env._last)       nodes->push(loadConLNodes_Env._last);
 3734     if (loadConLNodes_Toc._large_hi)   nodes->push(loadConLNodes_Toc._large_hi);
 3735     if (loadConLNodes_Toc._last)       nodes->push(loadConLNodes_Toc._last);
 3736 #endif
 3737     nodes->push(mtctr);
 3738     nodes->push(call);
 3739   %}
 3740 %}
 3741 
 3742 //----------FRAME--------------------------------------------------------------
 3743 // Definition of frame structure and management information.
 3744 
 3745 frame %{
 3746   // These two registers define part of the calling convention between
 3747   // compiled code and the interpreter.
 3748 
 3749   // Inline Cache Register or method for I2C.
 3750   inline_cache_reg(R19); // R19_method
 3751 
 3752   // Optional: name the operand used by cisc-spilling to access
 3753   // [stack_pointer + offset].
 3754   cisc_spilling_operand_name(indOffset);
 3755 
 3756   // Number of stack slots consumed by a Monitor enter.
 3757   sync_stack_slots((frame::jit_monitor_size / VMRegImpl::stack_slot_size));
 3758 
 3759   // Compiled code's Frame Pointer.
 3760   frame_pointer(R1); // R1_SP
 3761 
 3762   // Interpreter stores its frame pointer in a register which is
 3763   // stored to the stack by I2CAdaptors. I2CAdaptors convert from
 3764   // interpreted java to compiled java.
 3765   //
 3766   // R14_state holds pointer to caller's cInterpreter.
 3767   interpreter_frame_pointer(R14); // R14_state
 3768 
 3769   stack_alignment(frame::alignment_in_bytes);
 3770 
 3771   // Number of outgoing stack slots killed above the
 3772   // out_preserve_stack_slots for calls to C. Supports the var-args
 3773   // backing area for register parms.
 3774   //
 3775   varargs_C_out_slots_killed(((frame::abi_reg_args_size - frame::jit_out_preserve_size) / VMRegImpl::stack_slot_size));
 3776 
 3777   // The after-PROLOG location of the return address. Location of
 3778   // return address specifies a type (REG or STACK) and a number
 3779   // representing the register number (i.e. - use a register name) or
 3780   // stack slot.
 3781   //
 3782   // A: Link register is stored in stack slot ...
 3783   // M:  ... but it's in the caller's frame according to PPC-64 ABI.
 3784   // J: Therefore, we make sure that the link register is also in R11_scratch1
 3785   //    at the end of the prolog.
 3786   // B: We use R20, now.
 3787   //return_addr(REG R20);
 3788 
 3789   // G: After reading the comments made by all the luminaries on their
 3790   //    failure to tell the compiler where the return address really is,
 3791   //    I hardly dare to try myself.  However, I'm convinced it's in slot
 3792   //    4 what apparently works and saves us some spills.
 3793   return_addr(STACK 4);
 3794 
 3795   // Location of native (C/C++) and interpreter return values. This
 3796   // is specified to be the same as Java. In the 32-bit VM, long
 3797   // values are actually returned from native calls in O0:O1 and
 3798   // returned to the interpreter in I0:I1. The copying to and from
 3799   // the register pairs is done by the appropriate call and epilog
 3800   // opcodes. This simplifies the register allocator.
 3801   c_return_value %{
 3802     assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) ||
 3803             (ideal_reg == Op_RegN && CompressedOops::base() == NULL && CompressedOops::shift() == 0),
 3804             "only return normal values");
 3805     // enum names from opcodes.hpp:    Op_Node Op_Set Op_RegN       Op_RegI       Op_RegP       Op_RegF       Op_RegD       Op_RegL
 3806     static int typeToRegLo[Op_RegL+1] = { 0,   0,     R3_num,   R3_num,   R3_num,   F1_num,   F1_num,   R3_num };
 3807     static int typeToRegHi[Op_RegL+1] = { 0,   0,     OptoReg::Bad, R3_H_num, R3_H_num, OptoReg::Bad, F1_H_num, R3_H_num };
 3808     return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
 3809   %}
 3810 
 3811   // Location of compiled Java return values.  Same as C
 3812   return_value %{
 3813     assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) ||
 3814             (ideal_reg == Op_RegN && CompressedOops::base() == NULL && CompressedOops::shift() == 0),
 3815             "only return normal values");
 3816     // enum names from opcodes.hpp:    Op_Node Op_Set Op_RegN       Op_RegI       Op_RegP       Op_RegF       Op_RegD       Op_RegL
 3817     static int typeToRegLo[Op_RegL+1] = { 0,   0,     R3_num,   R3_num,   R3_num,   F1_num,   F1_num,   R3_num };
 3818     static int typeToRegHi[Op_RegL+1] = { 0,   0,     OptoReg::Bad, R3_H_num, R3_H_num, OptoReg::Bad, F1_H_num, R3_H_num };
 3819     return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
 3820   %}
 3821 %}
 3822 
 3823 
 3824 //----------ATTRIBUTES---------------------------------------------------------
 3825 
 3826 //----------Operand Attributes-------------------------------------------------
 3827 op_attrib op_cost(1);          // Required cost attribute.
 3828 
 3829 //----------Instruction Attributes---------------------------------------------
 3830 
 3831 // Cost attribute. required.
 3832 ins_attrib ins_cost(DEFAULT_COST);
 3833 
 3834 // Is this instruction a non-matching short branch variant of some
 3835 // long branch? Not required.
 3836 ins_attrib ins_short_branch(0);
 3837 
 3838 ins_attrib ins_is_TrapBasedCheckNode(true);
 3839 
 3840 // Number of constants.
 3841 // This instruction uses the given number of constants
 3842 // (optional attribute).
 3843 // This is needed to determine in time whether the constant pool will
 3844 // exceed 4000 entries. Before postalloc_expand the overall number of constants
 3845 // is determined. It's also used to compute the constant pool size
 3846 // in Output().
 3847 ins_attrib ins_num_consts(0);
 3848 
 3849 // Required alignment attribute (must be a power of 2) specifies the
 3850 // alignment that some part of the instruction (not necessarily the
 3851 // start) requires. If > 1, a compute_padding() function must be
 3852 // provided for the instruction.
 3853 ins_attrib ins_alignment(1);
 3854 
 3855 // Enforce/prohibit rematerializations.
 3856 // - If an instruction is attributed with 'ins_cannot_rematerialize(true)'
 3857 //   then rematerialization of that instruction is prohibited and the
 3858 //   instruction's value will be spilled if necessary.
 3859 //   Causes that MachNode::rematerialize() returns false.
 3860 // - If an instruction is attributed with 'ins_should_rematerialize(true)'
 3861 //   then rematerialization should be enforced and a copy of the instruction
 3862 //   should be inserted if possible; rematerialization is not guaranteed.
 3863 //   Note: this may result in rematerializations in front of every use.
 3864 //   Causes that MachNode::rematerialize() can return true.
 3865 // (optional attribute)
 3866 ins_attrib ins_cannot_rematerialize(false);
 3867 ins_attrib ins_should_rematerialize(false);
 3868 
 3869 // Instruction has variable size depending on alignment.
 3870 ins_attrib ins_variable_size_depending_on_alignment(false);
 3871 
 3872 // Instruction is a nop.
 3873 ins_attrib ins_is_nop(false);
 3874 
 3875 // Instruction is mapped to a MachIfFastLock node (instead of MachFastLock).
 3876 ins_attrib ins_use_mach_if_fast_lock_node(false);
 3877 
 3878 // Field for the toc offset of a constant.
 3879 //
 3880 // This is needed if the toc offset is not encodable as an immediate in
 3881 // the PPC load instruction. If so, the upper (hi) bits of the offset are
 3882 // added to the toc, and from this a load with immediate is performed.
 3883 // With postalloc expand, we get two nodes that require the same offset
 3884 // but which don't know about each other. The offset is only known
 3885 // when the constant is added to the constant pool during emitting.
 3886 // It is generated in the 'hi'-node adding the upper bits, and saved
 3887 // in this node.  The 'lo'-node has a link to the 'hi'-node and reads
 3888 // the offset from there when it gets encoded.
 3889 ins_attrib ins_field_const_toc_offset(0);
 3890 ins_attrib ins_field_const_toc_offset_hi_node(0);
 3891 
 3892 // A field that can hold the instructions offset in the code buffer.
 3893 // Set in the nodes emitter.
 3894 ins_attrib ins_field_cbuf_insts_offset(-1);
 3895 
 3896 // Fields for referencing a call's load-IC-node.
 3897 // If the toc offset can not be encoded as an immediate in a load, we
 3898 // use two nodes.
 3899 ins_attrib ins_field_load_ic_hi_node(0);
 3900 ins_attrib ins_field_load_ic_node(0);
 3901 
 3902 //----------OPERANDS-----------------------------------------------------------
 3903 // Operand definitions must precede instruction definitions for correct
 3904 // parsing in the ADLC because operands constitute user defined types
 3905 // which are used in instruction definitions.
 3906 //
 3907 // Formats are generated automatically for constants and base registers.
 3908 
 3909 operand vecX() %{
 3910   constraint(ALLOC_IN_RC(vs_reg));
 3911   match(VecX);
 3912 
 3913   format %{ %}
 3914   interface(REG_INTER);
 3915 %}
 3916 
 3917 //----------Simple Operands----------------------------------------------------
 3918 // Immediate Operands
 3919 
 3920 // Integer Immediate: 32-bit
 3921 operand immI() %{
 3922   match(ConI);
 3923   op_cost(40);
 3924   format %{ %}
 3925   interface(CONST_INTER);
 3926 %}
 3927 
 3928 operand immI8() %{
 3929   predicate(Assembler::is_simm(n->get_int(), 8));
 3930   op_cost(0);
 3931   match(ConI);
 3932   format %{ %}
 3933   interface(CONST_INTER);
 3934 %}
 3935 
 3936 // Integer Immediate: 16-bit
 3937 operand immI16() %{
 3938   predicate(Assembler::is_simm(n->get_int(), 16));
 3939   op_cost(0);
 3940   match(ConI);
 3941   format %{ %}
 3942   interface(CONST_INTER);
 3943 %}
 3944 
 3945 // Integer Immediate: 32-bit, where lowest 16 bits are 0x0000.
 3946 operand immIhi16() %{
 3947   predicate(((n->get_int() & 0xffff0000) != 0) && ((n->get_int() & 0xffff) == 0));
 3948   match(ConI);
 3949   op_cost(0);
 3950   format %{ %}
 3951   interface(CONST_INTER);
 3952 %}
 3953 
 3954 // Integer Immediate: 32-bit immediate for prefixed addi and load/store.
 3955 operand immI32() %{
 3956   predicate(PowerArchitecturePPC64 >= 10);
 3957   op_cost(0);
 3958   match(ConI);
 3959   format %{ %}
 3960   interface(CONST_INTER);
 3961 %}
 3962 
 3963 operand immInegpow2() %{
 3964   predicate(is_power_of_2(-(juint)(n->get_int())));
 3965   match(ConI);
 3966   op_cost(0);
 3967   format %{ %}
 3968   interface(CONST_INTER);
 3969 %}
 3970 
 3971 operand immIpow2minus1() %{
 3972   predicate(is_power_of_2((juint)(n->get_int()) + 1u));
 3973   match(ConI);
 3974   op_cost(0);
 3975   format %{ %}
 3976   interface(CONST_INTER);
 3977 %}
 3978 
 3979 operand immIpowerOf2() %{
 3980   predicate(is_power_of_2((juint)(n->get_int())));
 3981   match(ConI);
 3982   op_cost(0);
 3983   format %{ %}
 3984   interface(CONST_INTER);
 3985 %}
 3986 
 3987 // Unsigned Integer Immediate: the values 0-31
 3988 operand uimmI5() %{
 3989   predicate(Assembler::is_uimm(n->get_int(), 5));
 3990   match(ConI);
 3991   op_cost(0);
 3992   format %{ %}
 3993   interface(CONST_INTER);
 3994 %}
 3995 
 3996 // Unsigned Integer Immediate: 6-bit
 3997 operand uimmI6() %{
 3998   predicate(Assembler::is_uimm(n->get_int(), 6));
 3999   match(ConI);
 4000   op_cost(0);
 4001   format %{ %}
 4002   interface(CONST_INTER);
 4003 %}
 4004 
 4005 // Unsigned Integer Immediate:  6-bit int, greater than 32
 4006 operand uimmI6_ge32() %{
 4007   predicate(Assembler::is_uimm(n->get_int(), 6) && n->get_int() >= 32);
 4008   match(ConI);
 4009   op_cost(0);
 4010   format %{ %}
 4011   interface(CONST_INTER);
 4012 %}
 4013 
 4014 // Unsigned Integer Immediate: 15-bit
 4015 operand uimmI15() %{
 4016   predicate(Assembler::is_uimm(n->get_int(), 15));
 4017   match(ConI);
 4018   op_cost(0);
 4019   format %{ %}
 4020   interface(CONST_INTER);
 4021 %}
 4022 
 4023 // Unsigned Integer Immediate: 16-bit
 4024 operand uimmI16() %{
 4025   predicate(Assembler::is_uimm(n->get_int(), 16));
 4026   match(ConI);
 4027   op_cost(0);
 4028   format %{ %}
 4029   interface(CONST_INTER);
 4030 %}
 4031 
 4032 // constant 'int 0'.
 4033 operand immI_0() %{
 4034   predicate(n->get_int() == 0);
 4035   match(ConI);
 4036   op_cost(0);
 4037   format %{ %}
 4038   interface(CONST_INTER);
 4039 %}
 4040 
 4041 // constant 'int 1'.
 4042 operand immI_1() %{
 4043   predicate(n->get_int() == 1);
 4044   match(ConI);
 4045   op_cost(0);
 4046   format %{ %}
 4047   interface(CONST_INTER);
 4048 %}
 4049 
 4050 // constant 'int -1'.
 4051 operand immI_minus1() %{
 4052   predicate(n->get_int() == -1);
 4053   match(ConI);
 4054   op_cost(0);
 4055   format %{ %}
 4056   interface(CONST_INTER);
 4057 %}
 4058 
 4059 // int value 16.
 4060 operand immI_16() %{
 4061   predicate(n->get_int() == 16);
 4062   match(ConI);
 4063   op_cost(0);
 4064   format %{ %}
 4065   interface(CONST_INTER);
 4066 %}
 4067 
 4068 // int value 24.
 4069 operand immI_24() %{
 4070   predicate(n->get_int() == 24);
 4071   match(ConI);
 4072   op_cost(0);
 4073   format %{ %}
 4074   interface(CONST_INTER);
 4075 %}
 4076 
 4077 // Compressed oops constants
 4078 // Pointer Immediate
 4079 operand immN() %{
 4080   match(ConN);
 4081 
 4082   op_cost(10);
 4083   format %{ %}
 4084   interface(CONST_INTER);
 4085 %}
 4086 
 4087 // NULL Pointer Immediate
 4088 operand immN_0() %{
 4089   predicate(n->get_narrowcon() == 0);
 4090   match(ConN);
 4091 
 4092   op_cost(0);
 4093   format %{ %}
 4094   interface(CONST_INTER);
 4095 %}
 4096 
 4097 // Compressed klass constants
 4098 operand immNKlass() %{
 4099   match(ConNKlass);
 4100 
 4101   op_cost(0);
 4102   format %{ %}
 4103   interface(CONST_INTER);
 4104 %}
 4105 
 4106 // This operand can be used to avoid matching of an instruct
 4107 // with chain rule.
 4108 operand immNKlass_NM() %{
 4109   match(ConNKlass);
 4110   predicate(false);
 4111   op_cost(0);
 4112   format %{ %}
 4113   interface(CONST_INTER);
 4114 %}
 4115 
 4116 // Pointer Immediate: 64-bit
 4117 operand immP() %{
 4118   match(ConP);
 4119   op_cost(0);
 4120   format %{ %}
 4121   interface(CONST_INTER);
 4122 %}
 4123 
 4124 // Operand to avoid match of loadConP.
 4125 // This operand can be used to avoid matching of an instruct
 4126 // with chain rule.
 4127 operand immP_NM() %{
 4128   match(ConP);
 4129   predicate(false);
 4130   op_cost(0);
 4131   format %{ %}
 4132   interface(CONST_INTER);
 4133 %}
 4134 
 4135 // costant 'pointer 0'.
 4136 operand immP_0() %{
 4137   predicate(n->get_ptr() == 0);
 4138   match(ConP);
 4139   op_cost(0);
 4140   format %{ %}
 4141   interface(CONST_INTER);
 4142 %}
 4143 
 4144 // pointer 0x0 or 0x1
 4145 operand immP_0or1() %{
 4146   predicate((n->get_ptr() == 0) || (n->get_ptr() == 1));
 4147   match(ConP);
 4148   op_cost(0);
 4149   format %{ %}
 4150   interface(CONST_INTER);
 4151 %}
 4152 
 4153 operand immL() %{
 4154   match(ConL);
 4155   op_cost(40);
 4156   format %{ %}
 4157   interface(CONST_INTER);
 4158 %}
 4159 
 4160 operand immLmax30() %{
 4161   predicate((n->get_long() <= 30));
 4162   match(ConL);
 4163   op_cost(0);
 4164   format %{ %}
 4165   interface(CONST_INTER);
 4166 %}
 4167 
 4168 // Long Immediate: 16-bit
 4169 operand immL16() %{
 4170   predicate(Assembler::is_simm(n->get_long(), 16));
 4171   match(ConL);
 4172   op_cost(0);
 4173   format %{ %}
 4174   interface(CONST_INTER);
 4175 %}
 4176 
 4177 // Long Immediate: 16-bit, 4-aligned
 4178 operand immL16Alg4() %{
 4179   predicate(Assembler::is_simm(n->get_long(), 16) && ((n->get_long() & 0x3) == 0));
 4180   match(ConL);
 4181   op_cost(0);
 4182   format %{ %}
 4183   interface(CONST_INTER);
 4184 %}
 4185 
 4186 // Long Immediate: 32-bit, where lowest 16 bits are 0x0000.
 4187 operand immL32hi16() %{
 4188   predicate(Assembler::is_simm(n->get_long(), 32) && ((n->get_long() & 0xffffL) == 0L));
 4189   match(ConL);
 4190   op_cost(0);
 4191   format %{ %}
 4192   interface(CONST_INTER);
 4193 %}
 4194 
 4195 // Long Immediate: 32-bit
 4196 operand immL32() %{
 4197   predicate(Assembler::is_simm(n->get_long(), 32));
 4198   match(ConL);
 4199   op_cost(0);
 4200   format %{ %}
 4201   interface(CONST_INTER);
 4202 %}
 4203 
 4204 // Long Immediate: 34-bit, immediate field in prefixed addi and load/store.
 4205 operand immL34() %{
 4206   predicate(PowerArchitecturePPC64 >= 10 && Assembler::is_simm(n->get_long(), 34));
 4207   match(ConL);
 4208   op_cost(0);
 4209   format %{ %}
 4210   interface(CONST_INTER);
 4211 %}
 4212 
 4213 // Long Immediate: 64-bit, where highest 16 bits are not 0x0000.
 4214 operand immLhighest16() %{
 4215   predicate((n->get_long() & 0xffff000000000000L) != 0L && (n->get_long() & 0x0000ffffffffffffL) == 0L);
 4216   match(ConL);
 4217   op_cost(0);
 4218   format %{ %}
 4219   interface(CONST_INTER);
 4220 %}
 4221 
 4222 operand immLnegpow2() %{
 4223   predicate(is_power_of_2(-(julong)(n->get_long())));
 4224   match(ConL);
 4225   op_cost(0);
 4226   format %{ %}
 4227   interface(CONST_INTER);
 4228 %}
 4229 
 4230 operand immLpow2minus1() %{
 4231   predicate(is_power_of_2((julong)(n->get_long()) + 1ull));
 4232   match(ConL);
 4233   op_cost(0);
 4234   format %{ %}
 4235   interface(CONST_INTER);
 4236 %}
 4237 
 4238 // constant 'long 0'.
 4239 operand immL_0() %{
 4240   predicate(n->get_long() == 0L);
 4241   match(ConL);
 4242   op_cost(0);
 4243   format %{ %}
 4244   interface(CONST_INTER);
 4245 %}
 4246 
 4247 // constat ' long -1'.
 4248 operand immL_minus1() %{
 4249   predicate(n->get_long() == -1L);
 4250   match(ConL);
 4251   op_cost(0);
 4252   format %{ %}
 4253   interface(CONST_INTER);
 4254 %}
 4255 
 4256 // Long Immediate: low 32-bit mask
 4257 operand immL_32bits() %{
 4258   predicate(n->get_long() == 0xFFFFFFFFL);
 4259   match(ConL);
 4260   op_cost(0);
 4261   format %{ %}
 4262   interface(CONST_INTER);
 4263 %}
 4264 
 4265 // Unsigned Long Immediate: 16-bit
 4266 operand uimmL16() %{
 4267   predicate(Assembler::is_uimm(n->get_long(), 16));
 4268   match(ConL);
 4269   op_cost(0);
 4270   format %{ %}
 4271   interface(CONST_INTER);
 4272 %}
 4273 
 4274 // Float Immediate
 4275 operand immF() %{
 4276   match(ConF);
 4277   op_cost(40);
 4278   format %{ %}
 4279   interface(CONST_INTER);
 4280 %}
 4281 
 4282 // Float Immediate: +0.0f.
 4283 operand immF_0() %{
 4284   predicate(jint_cast(n->getf()) == 0);
 4285   match(ConF);
 4286 
 4287   op_cost(0);
 4288   format %{ %}
 4289   interface(CONST_INTER);
 4290 %}
 4291 
 4292 // Double Immediate
 4293 operand immD() %{
 4294   match(ConD);
 4295   op_cost(40);
 4296   format %{ %}
 4297   interface(CONST_INTER);
 4298 %}
 4299 
 4300 // Double Immediate: +0.0d.
 4301 operand immD_0() %{
 4302   predicate(jlong_cast(n->getd()) == 0);
 4303   match(ConD);
 4304 
 4305   op_cost(0);
 4306   format %{ %}
 4307   interface(CONST_INTER);
 4308 %}
 4309 
 4310 // Integer Register Operands
 4311 // Integer Destination Register
 4312 // See definition of reg_class bits32_reg_rw.
 4313 operand iRegIdst() %{
 4314   constraint(ALLOC_IN_RC(bits32_reg_rw));
 4315   match(RegI);
 4316   match(rscratch1RegI);
 4317   match(rscratch2RegI);
 4318   match(rarg1RegI);
 4319   match(rarg2RegI);
 4320   match(rarg3RegI);
 4321   match(rarg4RegI);
 4322   format %{ %}
 4323   interface(REG_INTER);
 4324 %}
 4325 
 4326 // Integer Source Register
 4327 // See definition of reg_class bits32_reg_ro.
 4328 operand iRegIsrc() %{
 4329   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4330   match(RegI);
 4331   match(rscratch1RegI);
 4332   match(rscratch2RegI);
 4333   match(rarg1RegI);
 4334   match(rarg2RegI);
 4335   match(rarg3RegI);
 4336   match(rarg4RegI);
 4337   format %{ %}
 4338   interface(REG_INTER);
 4339 %}
 4340 
 4341 operand rscratch1RegI() %{
 4342   constraint(ALLOC_IN_RC(rscratch1_bits32_reg));
 4343   match(iRegIdst);
 4344   format %{ %}
 4345   interface(REG_INTER);
 4346 %}
 4347 
 4348 operand rscratch2RegI() %{
 4349   constraint(ALLOC_IN_RC(rscratch2_bits32_reg));
 4350   match(iRegIdst);
 4351   format %{ %}
 4352   interface(REG_INTER);
 4353 %}
 4354 
 4355 operand rarg1RegI() %{
 4356   constraint(ALLOC_IN_RC(rarg1_bits32_reg));
 4357   match(iRegIdst);
 4358   format %{ %}
 4359   interface(REG_INTER);
 4360 %}
 4361 
 4362 operand rarg2RegI() %{
 4363   constraint(ALLOC_IN_RC(rarg2_bits32_reg));
 4364   match(iRegIdst);
 4365   format %{ %}
 4366   interface(REG_INTER);
 4367 %}
 4368 
 4369 operand rarg3RegI() %{
 4370   constraint(ALLOC_IN_RC(rarg3_bits32_reg));
 4371   match(iRegIdst);
 4372   format %{ %}
 4373   interface(REG_INTER);
 4374 %}
 4375 
 4376 operand rarg4RegI() %{
 4377   constraint(ALLOC_IN_RC(rarg4_bits32_reg));
 4378   match(iRegIdst);
 4379   format %{ %}
 4380   interface(REG_INTER);
 4381 %}
 4382 
 4383 operand rarg1RegL() %{
 4384   constraint(ALLOC_IN_RC(rarg1_bits64_reg));
 4385   match(iRegLdst);
 4386   format %{ %}
 4387   interface(REG_INTER);
 4388 %}
 4389 
 4390 operand rarg2RegL() %{
 4391   constraint(ALLOC_IN_RC(rarg2_bits64_reg));
 4392   match(iRegLdst);
 4393   format %{ %}
 4394   interface(REG_INTER);
 4395 %}
 4396 
 4397 operand rarg3RegL() %{
 4398   constraint(ALLOC_IN_RC(rarg3_bits64_reg));
 4399   match(iRegLdst);
 4400   format %{ %}
 4401   interface(REG_INTER);
 4402 %}
 4403 
 4404 operand rarg4RegL() %{
 4405   constraint(ALLOC_IN_RC(rarg4_bits64_reg));
 4406   match(iRegLdst);
 4407   format %{ %}
 4408   interface(REG_INTER);
 4409 %}
 4410 
 4411 // Pointer Destination Register
 4412 // See definition of reg_class bits64_reg_rw.
 4413 operand iRegPdst() %{
 4414   constraint(ALLOC_IN_RC(bits64_reg_rw));
 4415   match(RegP);
 4416   match(rscratch1RegP);
 4417   match(rscratch2RegP);
 4418   match(rarg1RegP);
 4419   match(rarg2RegP);
 4420   match(rarg3RegP);
 4421   match(rarg4RegP);
 4422   format %{ %}
 4423   interface(REG_INTER);
 4424 %}
 4425 
 4426 // Pointer Destination Register
 4427 // Operand not using r11 and r12 (killed in epilog).
 4428 operand iRegPdstNoScratch() %{
 4429   constraint(ALLOC_IN_RC(bits64_reg_leaf_call));
 4430   match(RegP);
 4431   match(rarg1RegP);
 4432   match(rarg2RegP);
 4433   match(rarg3RegP);
 4434   match(rarg4RegP);
 4435   format %{ %}
 4436   interface(REG_INTER);
 4437 %}
 4438 
 4439 // Pointer Source Register
 4440 // See definition of reg_class bits64_reg_ro.
 4441 operand iRegPsrc() %{
 4442   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4443   match(RegP);
 4444   match(iRegPdst);
 4445   match(rscratch1RegP);
 4446   match(rscratch2RegP);
 4447   match(rarg1RegP);
 4448   match(rarg2RegP);
 4449   match(rarg3RegP);
 4450   match(rarg4RegP);
 4451   match(threadRegP);
 4452   format %{ %}
 4453   interface(REG_INTER);
 4454 %}
 4455 
 4456 // Thread operand.
 4457 operand threadRegP() %{
 4458   constraint(ALLOC_IN_RC(thread_bits64_reg));
 4459   match(iRegPdst);
 4460   format %{ "R16" %}
 4461   interface(REG_INTER);
 4462 %}
 4463 
 4464 operand rscratch1RegP() %{
 4465   constraint(ALLOC_IN_RC(rscratch1_bits64_reg));
 4466   match(iRegPdst);
 4467   format %{ "R11" %}
 4468   interface(REG_INTER);
 4469 %}
 4470 
 4471 operand rscratch2RegP() %{
 4472   constraint(ALLOC_IN_RC(rscratch2_bits64_reg));
 4473   match(iRegPdst);
 4474   format %{ %}
 4475   interface(REG_INTER);
 4476 %}
 4477 
 4478 operand rarg1RegP() %{
 4479   constraint(ALLOC_IN_RC(rarg1_bits64_reg));
 4480   match(iRegPdst);
 4481   format %{ %}
 4482   interface(REG_INTER);
 4483 %}
 4484 
 4485 operand rarg2RegP() %{
 4486   constraint(ALLOC_IN_RC(rarg2_bits64_reg));
 4487   match(iRegPdst);
 4488   format %{ %}
 4489   interface(REG_INTER);
 4490 %}
 4491 
 4492 operand rarg3RegP() %{
 4493   constraint(ALLOC_IN_RC(rarg3_bits64_reg));
 4494   match(iRegPdst);
 4495   format %{ %}
 4496   interface(REG_INTER);
 4497 %}
 4498 
 4499 operand rarg4RegP() %{
 4500   constraint(ALLOC_IN_RC(rarg4_bits64_reg));
 4501   match(iRegPdst);
 4502   format %{ %}
 4503   interface(REG_INTER);
 4504 %}
 4505 
 4506 operand iRegNsrc() %{
 4507   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4508   match(RegN);
 4509   match(iRegNdst);
 4510 
 4511   format %{ %}
 4512   interface(REG_INTER);
 4513 %}
 4514 
 4515 operand iRegNdst() %{
 4516   constraint(ALLOC_IN_RC(bits32_reg_rw));
 4517   match(RegN);
 4518 
 4519   format %{ %}
 4520   interface(REG_INTER);
 4521 %}
 4522 
 4523 // Long Destination Register
 4524 // See definition of reg_class bits64_reg_rw.
 4525 operand iRegLdst() %{
 4526   constraint(ALLOC_IN_RC(bits64_reg_rw));
 4527   match(RegL);
 4528   match(rscratch1RegL);
 4529   match(rscratch2RegL);
 4530   format %{ %}
 4531   interface(REG_INTER);
 4532 %}
 4533 
 4534 // Long Source Register
 4535 // See definition of reg_class bits64_reg_ro.
 4536 operand iRegLsrc() %{
 4537   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4538   match(RegL);
 4539   match(iRegLdst);
 4540   match(rscratch1RegL);
 4541   match(rscratch2RegL);
 4542   format %{ %}
 4543   interface(REG_INTER);
 4544 %}
 4545 
 4546 // Special operand for ConvL2I.
 4547 operand iRegL2Isrc(iRegLsrc reg) %{
 4548   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4549   match(ConvL2I reg);
 4550   format %{ "ConvL2I($reg)" %}
 4551   interface(REG_INTER)
 4552 %}
 4553 
 4554 operand rscratch1RegL() %{
 4555   constraint(ALLOC_IN_RC(rscratch1_bits64_reg));
 4556   match(RegL);
 4557   format %{ %}
 4558   interface(REG_INTER);
 4559 %}
 4560 
 4561 operand rscratch2RegL() %{
 4562   constraint(ALLOC_IN_RC(rscratch2_bits64_reg));
 4563   match(RegL);
 4564   format %{ %}
 4565   interface(REG_INTER);
 4566 %}
 4567 
 4568 // Condition Code Flag Registers
 4569 operand flagsReg() %{
 4570   constraint(ALLOC_IN_RC(int_flags));
 4571   match(RegFlags);
 4572   format %{ %}
 4573   interface(REG_INTER);
 4574 %}
 4575 
 4576 operand flagsRegSrc() %{
 4577   constraint(ALLOC_IN_RC(int_flags_ro));
 4578   match(RegFlags);
 4579   match(flagsReg);
 4580   match(flagsRegCR0);
 4581   format %{ %}
 4582   interface(REG_INTER);
 4583 %}
 4584 
 4585 // Condition Code Flag Register CR0
 4586 operand flagsRegCR0() %{
 4587   constraint(ALLOC_IN_RC(int_flags_CR0));
 4588   match(RegFlags);
 4589   format %{ "CR0" %}
 4590   interface(REG_INTER);
 4591 %}
 4592 
 4593 operand flagsRegCR1() %{
 4594   constraint(ALLOC_IN_RC(int_flags_CR1));
 4595   match(RegFlags);
 4596   format %{ "CR1" %}
 4597   interface(REG_INTER);
 4598 %}
 4599 
 4600 operand flagsRegCR6() %{
 4601   constraint(ALLOC_IN_RC(int_flags_CR6));
 4602   match(RegFlags);
 4603   format %{ "CR6" %}
 4604   interface(REG_INTER);
 4605 %}
 4606 
 4607 operand regCTR() %{
 4608   constraint(ALLOC_IN_RC(ctr_reg));
 4609   // RegFlags should work. Introducing a RegSpecial type would cause a
 4610   // lot of changes.
 4611   match(RegFlags);
 4612   format %{"SR_CTR" %}
 4613   interface(REG_INTER);
 4614 %}
 4615 
 4616 operand regD() %{
 4617   constraint(ALLOC_IN_RC(dbl_reg));
 4618   match(RegD);
 4619   format %{ %}
 4620   interface(REG_INTER);
 4621 %}
 4622 
 4623 operand regF() %{
 4624   constraint(ALLOC_IN_RC(flt_reg));
 4625   match(RegF);
 4626   format %{ %}
 4627   interface(REG_INTER);
 4628 %}
 4629 
 4630 // Special Registers
 4631 
 4632 // Method Register
 4633 operand inline_cache_regP(iRegPdst reg) %{
 4634   constraint(ALLOC_IN_RC(r19_bits64_reg)); // inline_cache_reg
 4635   match(reg);
 4636   format %{ %}
 4637   interface(REG_INTER);
 4638 %}
 4639 
 4640 // Operands to remove register moves in unscaled mode.
 4641 // Match read/write registers with an EncodeP node if neither shift nor add are required.
 4642 operand iRegP2N(iRegPsrc reg) %{
 4643   predicate(false /* TODO: PPC port MatchDecodeNodes*/&& CompressedOops::shift() == 0);
 4644   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4645   match(EncodeP reg);
 4646   format %{ "$reg" %}
 4647   interface(REG_INTER)
 4648 %}
 4649 
 4650 operand iRegN2P(iRegNsrc reg) %{
 4651   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4652   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4653   match(DecodeN reg);
 4654   format %{ "$reg" %}
 4655   interface(REG_INTER)
 4656 %}
 4657 
 4658 operand iRegN2P_klass(iRegNsrc reg) %{
 4659   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
 4660   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4661   match(DecodeNKlass reg);
 4662   format %{ "$reg" %}
 4663   interface(REG_INTER)
 4664 %}
 4665 
 4666 //----------Complex Operands---------------------------------------------------
 4667 // Indirect Memory Reference
 4668 operand indirect(iRegPsrc reg) %{
 4669   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4670   match(reg);
 4671   op_cost(100);
 4672   format %{ "[$reg]" %}
 4673   interface(MEMORY_INTER) %{
 4674     base($reg);
 4675     index(0x0);
 4676     scale(0x0);
 4677     disp(0x0);
 4678   %}
 4679 %}
 4680 
 4681 // Indirect with Offset
 4682 operand indOffset16(iRegPsrc reg, immL16 offset) %{
 4683   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4684   match(AddP reg offset);
 4685   op_cost(100);
 4686   format %{ "[$reg + $offset]" %}
 4687   interface(MEMORY_INTER) %{
 4688     base($reg);
 4689     index(0x0);
 4690     scale(0x0);
 4691     disp($offset);
 4692   %}
 4693 %}
 4694 
 4695 // Indirect with 4-aligned Offset
 4696 operand indOffset16Alg4(iRegPsrc reg, immL16Alg4 offset) %{
 4697   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4698   match(AddP reg offset);
 4699   op_cost(100);
 4700   format %{ "[$reg + $offset]" %}
 4701   interface(MEMORY_INTER) %{
 4702     base($reg);
 4703     index(0x0);
 4704     scale(0x0);
 4705     disp($offset);
 4706   %}
 4707 %}
 4708 
 4709 //----------Complex Operands for Compressed OOPs-------------------------------
 4710 // Compressed OOPs with narrow_oop_shift == 0.
 4711 
 4712 // Indirect Memory Reference, compressed OOP
 4713 operand indirectNarrow(iRegNsrc reg) %{
 4714   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4715   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4716   match(DecodeN reg);
 4717   op_cost(100);
 4718   format %{ "[$reg]" %}
 4719   interface(MEMORY_INTER) %{
 4720     base($reg);
 4721     index(0x0);
 4722     scale(0x0);
 4723     disp(0x0);
 4724   %}
 4725 %}
 4726 
 4727 operand indirectNarrow_klass(iRegNsrc reg) %{
 4728   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
 4729   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4730   match(DecodeNKlass reg);
 4731   op_cost(100);
 4732   format %{ "[$reg]" %}
 4733   interface(MEMORY_INTER) %{
 4734     base($reg);
 4735     index(0x0);
 4736     scale(0x0);
 4737     disp(0x0);
 4738   %}
 4739 %}
 4740 
 4741 // Indirect with Offset, compressed OOP
 4742 operand indOffset16Narrow(iRegNsrc reg, immL16 offset) %{
 4743   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4744   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4745   match(AddP (DecodeN reg) offset);
 4746   op_cost(100);
 4747   format %{ "[$reg + $offset]" %}
 4748   interface(MEMORY_INTER) %{
 4749     base($reg);
 4750     index(0x0);
 4751     scale(0x0);
 4752     disp($offset);
 4753   %}
 4754 %}
 4755 
 4756 operand indOffset16Narrow_klass(iRegNsrc reg, immL16 offset) %{
 4757   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
 4758   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4759   match(AddP (DecodeNKlass reg) offset);
 4760   op_cost(100);
 4761   format %{ "[$reg + $offset]" %}
 4762   interface(MEMORY_INTER) %{
 4763     base($reg);
 4764     index(0x0);
 4765     scale(0x0);
 4766     disp($offset);
 4767   %}
 4768 %}
 4769 
 4770 // Indirect with 4-aligned Offset, compressed OOP
 4771 operand indOffset16NarrowAlg4(iRegNsrc reg, immL16Alg4 offset) %{
 4772   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4773   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4774   match(AddP (DecodeN reg) offset);
 4775   op_cost(100);
 4776   format %{ "[$reg + $offset]" %}
 4777   interface(MEMORY_INTER) %{
 4778     base($reg);
 4779     index(0x0);
 4780     scale(0x0);
 4781     disp($offset);
 4782   %}
 4783 %}
 4784 
 4785 operand indOffset16NarrowAlg4_klass(iRegNsrc reg, immL16Alg4 offset) %{
 4786   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
 4787   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4788   match(AddP (DecodeNKlass reg) offset);
 4789   op_cost(100);
 4790   format %{ "[$reg + $offset]" %}
 4791   interface(MEMORY_INTER) %{
 4792     base($reg);
 4793     index(0x0);
 4794     scale(0x0);
 4795     disp($offset);
 4796   %}
 4797 %}
 4798 
 4799 //----------Special Memory Operands--------------------------------------------
 4800 // Stack Slot Operand
 4801 //
 4802 // This operand is used for loading and storing temporary values on
 4803 // the stack where a match requires a value to flow through memory.
 4804 operand stackSlotI(sRegI reg) %{
 4805   constraint(ALLOC_IN_RC(stack_slots));
 4806   op_cost(100);
 4807   //match(RegI);
 4808   format %{ "[sp+$reg]" %}
 4809   interface(MEMORY_INTER) %{
 4810     base(0x1);   // R1_SP
 4811     index(0x0);
 4812     scale(0x0);
 4813     disp($reg);  // Stack Offset
 4814   %}
 4815 %}
 4816 
 4817 operand stackSlotL(sRegL reg) %{
 4818   constraint(ALLOC_IN_RC(stack_slots));
 4819   op_cost(100);
 4820   //match(RegL);
 4821   format %{ "[sp+$reg]" %}
 4822   interface(MEMORY_INTER) %{
 4823     base(0x1);   // R1_SP
 4824     index(0x0);
 4825     scale(0x0);
 4826     disp($reg);  // Stack Offset
 4827   %}
 4828 %}
 4829 
 4830 operand stackSlotP(sRegP reg) %{
 4831   constraint(ALLOC_IN_RC(stack_slots));
 4832   op_cost(100);
 4833   //match(RegP);
 4834   format %{ "[sp+$reg]" %}
 4835   interface(MEMORY_INTER) %{
 4836     base(0x1);   // R1_SP
 4837     index(0x0);
 4838     scale(0x0);
 4839     disp($reg);  // Stack Offset
 4840   %}
 4841 %}
 4842 
 4843 operand stackSlotF(sRegF reg) %{
 4844   constraint(ALLOC_IN_RC(stack_slots));
 4845   op_cost(100);
 4846   //match(RegF);
 4847   format %{ "[sp+$reg]" %}
 4848   interface(MEMORY_INTER) %{
 4849     base(0x1);   // R1_SP
 4850     index(0x0);
 4851     scale(0x0);
 4852     disp($reg);  // Stack Offset
 4853   %}
 4854 %}
 4855 
 4856 operand stackSlotD(sRegD reg) %{
 4857   constraint(ALLOC_IN_RC(stack_slots));
 4858   op_cost(100);
 4859   //match(RegD);
 4860   format %{ "[sp+$reg]" %}
 4861   interface(MEMORY_INTER) %{
 4862     base(0x1);   // R1_SP
 4863     index(0x0);
 4864     scale(0x0);
 4865     disp($reg);  // Stack Offset
 4866   %}
 4867 %}
 4868 
 4869 // Operands for expressing Control Flow
 4870 // NOTE: Label is a predefined operand which should not be redefined in
 4871 //       the AD file. It is generically handled within the ADLC.
 4872 
 4873 //----------Conditional Branch Operands----------------------------------------
 4874 // Comparison Op
 4875 //
 4876 // This is the operation of the comparison, and is limited to the
 4877 // following set of codes: L (<), LE (<=), G (>), GE (>=), E (==), NE
 4878 // (!=).
 4879 //
 4880 // Other attributes of the comparison, such as unsignedness, are specified
 4881 // by the comparison instruction that sets a condition code flags register.
 4882 // That result is represented by a flags operand whose subtype is appropriate
 4883 // to the unsignedness (etc.) of the comparison.
 4884 //
 4885 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4886 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4887 // by matching a specific subtype of Bool operand below.
 4888 
 4889 // When used for floating point comparisons: unordered same as less.
 4890 operand cmpOp() %{
 4891   match(Bool);
 4892   format %{ "" %}
 4893   interface(COND_INTER) %{
 4894                            // BO only encodes bit 4 of bcondCRbiIsX, as bits 1-3 are always '100'.
 4895                            //           BO          &  BI
 4896     equal(0xA);            // 10 10:   bcondCRbiIs1 & Condition::equal
 4897     not_equal(0x2);        // 00 10:   bcondCRbiIs0 & Condition::equal
 4898     less(0x8);             // 10 00:   bcondCRbiIs1 & Condition::less
 4899     greater_equal(0x0);    // 00 00:   bcondCRbiIs0 & Condition::less
 4900     less_equal(0x1);       // 00 01:   bcondCRbiIs0 & Condition::greater
 4901     greater(0x9);          // 10 01:   bcondCRbiIs1 & Condition::greater
 4902     overflow(0xB);         // 10 11:   bcondCRbiIs1 & Condition::summary_overflow
 4903     no_overflow(0x3);      // 00 11:   bcondCRbiIs0 & Condition::summary_overflow
 4904   %}
 4905 %}
 4906 
 4907 //----------OPERAND CLASSES----------------------------------------------------
 4908 // Operand Classes are groups of operands that are used to simplify
 4909 // instruction definitions by not requiring the AD writer to specify
 4910 // seperate instructions for every form of operand when the
 4911 // instruction accepts multiple operand types with the same basic
 4912 // encoding and format. The classic case of this is memory operands.
 4913 // Indirect is not included since its use is limited to Compare & Swap.
 4914 
 4915 opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indirectNarrow_klass, indOffset16Narrow, indOffset16Narrow_klass);
 4916 // Memory operand where offsets are 4-aligned. Required for ld, std.
 4917 opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4, indOffset16NarrowAlg4_klass);
 4918 opclass indirectMemory(indirect, indirectNarrow);
 4919 
 4920 // Special opclass for I and ConvL2I.
 4921 opclass iRegIsrc_iRegL2Isrc(iRegIsrc, iRegL2Isrc);
 4922 
 4923 // Operand classes to match encode and decode. iRegN_P2N is only used
 4924 // for storeN. I have never seen an encode node elsewhere.
 4925 opclass iRegN_P2N(iRegNsrc, iRegP2N);
 4926 opclass iRegP_N2P(iRegPsrc, iRegN2P, iRegN2P_klass);
 4927 
 4928 //----------PIPELINE-----------------------------------------------------------
 4929 
 4930 pipeline %{
 4931 
 4932 // See J.M.Tendler et al. "Power4 system microarchitecture", IBM
 4933 // J. Res. & Dev., No. 1, Jan. 2002.
 4934 
 4935 //----------ATTRIBUTES---------------------------------------------------------
 4936 attributes %{
 4937 
 4938   // Power4 instructions are of fixed length.
 4939   fixed_size_instructions;
 4940 
 4941   // TODO: if `bundle' means number of instructions fetched
 4942   // per cycle, this is 8. If `bundle' means Power4 `group', that is
 4943   // max instructions issued per cycle, this is 5.
 4944   max_instructions_per_bundle = 8;
 4945 
 4946   // A Power4 instruction is 4 bytes long.
 4947   instruction_unit_size = 4;
 4948 
 4949   // The Power4 processor fetches 64 bytes...
 4950   instruction_fetch_unit_size = 64;
 4951 
 4952   // ...in one line
 4953   instruction_fetch_units = 1
 4954 
 4955   // Unused, list one so that array generated by adlc is not empty.
 4956   // Aix compiler chokes if _nop_count = 0.
 4957   nops(fxNop);
 4958 %}
 4959 
 4960 //----------RESOURCES----------------------------------------------------------
 4961 // Resources are the functional units available to the machine
 4962 resources(
 4963    PPC_BR,         // branch unit
 4964    PPC_CR,         // condition unit
 4965    PPC_FX1,        // integer arithmetic unit 1
 4966    PPC_FX2,        // integer arithmetic unit 2
 4967    PPC_LDST1,      // load/store unit 1
 4968    PPC_LDST2,      // load/store unit 2
 4969    PPC_FP1,        // float arithmetic unit 1
 4970    PPC_FP2,        // float arithmetic unit 2
 4971    PPC_LDST = PPC_LDST1 | PPC_LDST2,
 4972    PPC_FX = PPC_FX1 | PPC_FX2,
 4973    PPC_FP = PPC_FP1 | PPC_FP2
 4974  );
 4975 
 4976 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4977 // Pipeline Description specifies the stages in the machine's pipeline
 4978 pipe_desc(
 4979    // Power4 longest pipeline path
 4980    PPC_IF,   // instruction fetch
 4981    PPC_IC,
 4982    //PPC_BP, // branch prediction
 4983    PPC_D0,   // decode
 4984    PPC_D1,   // decode
 4985    PPC_D2,   // decode
 4986    PPC_D3,   // decode
 4987    PPC_Xfer1,
 4988    PPC_GD,   // group definition
 4989    PPC_MP,   // map
 4990    PPC_ISS,  // issue
 4991    PPC_RF,   // resource fetch
 4992    PPC_EX1,  // execute (all units)
 4993    PPC_EX2,  // execute (FP, LDST)
 4994    PPC_EX3,  // execute (FP, LDST)
 4995    PPC_EX4,  // execute (FP)
 4996    PPC_EX5,  // execute (FP)
 4997    PPC_EX6,  // execute (FP)
 4998    PPC_WB,   // write back
 4999    PPC_Xfer2,
 5000    PPC_CP
 5001  );
 5002 
 5003 //----------PIPELINE CLASSES---------------------------------------------------
 5004 // Pipeline Classes describe the stages in which input and output are
 5005 // referenced by the hardware pipeline.
 5006 
 5007 // Simple pipeline classes.
 5008 
 5009 // Default pipeline class.
 5010 pipe_class pipe_class_default() %{
 5011   single_instruction;
 5012   fixed_latency(2);
 5013 %}
 5014 
 5015 // Pipeline class for empty instructions.
 5016 pipe_class pipe_class_empty() %{
 5017   single_instruction;
 5018   fixed_latency(0);
 5019 %}
 5020 
 5021 // Pipeline class for compares.
 5022 pipe_class pipe_class_compare() %{
 5023   single_instruction;
 5024   fixed_latency(16);
 5025 %}
 5026 
 5027 // Pipeline class for traps.
 5028 pipe_class pipe_class_trap() %{
 5029   single_instruction;
 5030   fixed_latency(100);
 5031 %}
 5032 
 5033 // Pipeline class for memory operations.
 5034 pipe_class pipe_class_memory() %{
 5035   single_instruction;
 5036   fixed_latency(16);
 5037 %}
 5038 
 5039 // Pipeline class for call.
 5040 pipe_class pipe_class_call() %{
 5041   single_instruction;
 5042   fixed_latency(100);
 5043 %}
 5044 
 5045 // Define the class for the Nop node.
 5046 define %{
 5047    MachNop = pipe_class_default;
 5048 %}
 5049 
 5050 %}
 5051 
 5052 //----------INSTRUCTIONS-------------------------------------------------------
 5053 
 5054 // Naming of instructions:
 5055 //   opA_operB / opA_operB_operC:
 5056 //     Operation 'op' with one or two source operands 'oper'. Result
 5057 //     type is A, source operand types are B and C.
 5058 //     Iff A == B == C, B and C are left out.
 5059 //
 5060 // The instructions are ordered according to the following scheme:
 5061 //  - loads
 5062 //  - load constants
 5063 //  - prefetch
 5064 //  - store
 5065 //  - encode/decode
 5066 //  - membar
 5067 //  - conditional moves
 5068 //  - compare & swap
 5069 //  - arithmetic and logic operations
 5070 //    * int: Add, Sub, Mul, Div, Mod
 5071 //    * int: lShift, arShift, urShift, rot
 5072 //    * float: Add, Sub, Mul, Div
 5073 //    * and, or, xor ...
 5074 //  - register moves: float <-> int, reg <-> stack, repl
 5075 //  - cast (high level type cast, XtoP, castPP, castII, not_null etc.
 5076 //  - conv (low level type cast requiring bit changes (sign extend etc)
 5077 //  - compares, range & zero checks.
 5078 //  - branches
 5079 //  - complex operations, intrinsics, min, max, replicate
 5080 //  - lock
 5081 //  - Calls
 5082 //
 5083 // If there are similar instructions with different types they are sorted:
 5084 // int before float
 5085 // small before big
 5086 // signed before unsigned
 5087 // e.g., loadS before loadUS before loadI before loadF.
 5088 
 5089 
 5090 //----------Load/Store Instructions--------------------------------------------
 5091 
 5092 //----------Load Instructions--------------------------------------------------
 5093 
 5094 // Converts byte to int.
 5095 // As convB2I_reg, but without match rule.  The match rule of convB2I_reg
 5096 // reuses the 'amount' operand, but adlc expects that operand specification
 5097 // and operands in match rule are equivalent.
 5098 instruct convB2I_reg_2(iRegIdst dst, iRegIsrc src) %{
 5099   effect(DEF dst, USE src);
 5100   format %{ "EXTSB   $dst, $src \t// byte->int" %}
 5101   size(4);
 5102   ins_encode %{
 5103     __ extsb($dst$$Register, $src$$Register);
 5104   %}
 5105   ins_pipe(pipe_class_default);
 5106 %}
 5107 
 5108 instruct loadUB_indirect(iRegIdst dst, indirectMemory mem) %{
 5109   // match-rule, false predicate
 5110   match(Set dst (LoadB mem));
 5111   predicate(false);
 5112 
 5113   format %{ "LBZ     $dst, $mem" %}
 5114   size(4);
 5115   ins_encode( enc_lbz(dst, mem) );
 5116   ins_pipe(pipe_class_memory);
 5117 %}
 5118 
 5119 instruct loadUB_indirect_ac(iRegIdst dst, indirectMemory mem) %{
 5120   // match-rule, false predicate
 5121   match(Set dst (LoadB mem));
 5122   predicate(false);
 5123 
 5124   format %{ "LBZ     $dst, $mem\n\t"
 5125             "TWI     $dst\n\t"
 5126             "ISYNC" %}
 5127   size(12);
 5128   ins_encode( enc_lbz_ac(dst, mem) );
 5129   ins_pipe(pipe_class_memory);
 5130 %}
 5131 
 5132 // Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B.
 5133 instruct loadB_indirect_Ex(iRegIdst dst, indirectMemory mem) %{
 5134   match(Set dst (LoadB mem));
 5135   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5136   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
 5137   expand %{
 5138     iRegIdst tmp;
 5139     loadUB_indirect(tmp, mem);
 5140     convB2I_reg_2(dst, tmp);
 5141   %}
 5142 %}
 5143 
 5144 instruct loadB_indirect_ac_Ex(iRegIdst dst, indirectMemory mem) %{
 5145   match(Set dst (LoadB mem));
 5146   ins_cost(3*MEMORY_REF_COST + DEFAULT_COST);
 5147   expand %{
 5148     iRegIdst tmp;
 5149     loadUB_indirect_ac(tmp, mem);
 5150     convB2I_reg_2(dst, tmp);
 5151   %}
 5152 %}
 5153 
 5154 instruct loadUB_indOffset16(iRegIdst dst, indOffset16 mem) %{
 5155   // match-rule, false predicate
 5156   match(Set dst (LoadB mem));
 5157   predicate(false);
 5158 
 5159   format %{ "LBZ     $dst, $mem" %}
 5160   size(4);
 5161   ins_encode( enc_lbz(dst, mem) );
 5162   ins_pipe(pipe_class_memory);
 5163 %}
 5164 
 5165 instruct loadUB_indOffset16_ac(iRegIdst dst, indOffset16 mem) %{
 5166   // match-rule, false predicate
 5167   match(Set dst (LoadB mem));
 5168   predicate(false);
 5169 
 5170   format %{ "LBZ     $dst, $mem\n\t"
 5171             "TWI     $dst\n\t"
 5172             "ISYNC" %}
 5173   size(12);
 5174   ins_encode( enc_lbz_ac(dst, mem) );
 5175   ins_pipe(pipe_class_memory);
 5176 %}
 5177 
 5178 // Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B.
 5179 instruct loadB_indOffset16_Ex(iRegIdst dst, indOffset16 mem) %{
 5180   match(Set dst (LoadB mem));
 5181   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5182   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
 5183 
 5184   expand %{
 5185     iRegIdst tmp;
 5186     loadUB_indOffset16(tmp, mem);
 5187     convB2I_reg_2(dst, tmp);
 5188   %}
 5189 %}
 5190 
 5191 instruct loadB_indOffset16_ac_Ex(iRegIdst dst, indOffset16 mem) %{
 5192   match(Set dst (LoadB mem));
 5193   ins_cost(3*MEMORY_REF_COST + DEFAULT_COST);
 5194 
 5195   expand %{
 5196     iRegIdst tmp;
 5197     loadUB_indOffset16_ac(tmp, mem);
 5198     convB2I_reg_2(dst, tmp);
 5199   %}
 5200 %}
 5201 
 5202 // Load Unsigned Byte (8bit UNsigned) into an int reg.
 5203 instruct loadUB(iRegIdst dst, memory mem) %{
 5204   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5205   match(Set dst (LoadUB mem));
 5206   ins_cost(MEMORY_REF_COST);
 5207 
 5208   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to int" %}
 5209   size(4);
 5210   ins_encode( enc_lbz(dst, mem) );
 5211   ins_pipe(pipe_class_memory);
 5212 %}
 5213 
 5214 // Load  Unsigned Byte (8bit UNsigned) acquire.
 5215 instruct loadUB_ac(iRegIdst dst, memory mem) %{
 5216   match(Set dst (LoadUB mem));
 5217   ins_cost(3*MEMORY_REF_COST);
 5218 
 5219   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to int, acquire\n\t"
 5220             "TWI     $dst\n\t"
 5221             "ISYNC" %}
 5222   size(12);
 5223   ins_encode( enc_lbz_ac(dst, mem) );
 5224   ins_pipe(pipe_class_memory);
 5225 %}
 5226 
 5227 // Load Unsigned Byte (8bit UNsigned) into a Long Register.
 5228 instruct loadUB2L(iRegLdst dst, memory mem) %{
 5229   match(Set dst (ConvI2L (LoadUB mem)));
 5230   predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf));
 5231   ins_cost(MEMORY_REF_COST);
 5232 
 5233   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to long" %}
 5234   size(4);
 5235   ins_encode( enc_lbz(dst, mem) );
 5236   ins_pipe(pipe_class_memory);
 5237 %}
 5238 
 5239 instruct loadUB2L_ac(iRegLdst dst, memory mem) %{
 5240   match(Set dst (ConvI2L (LoadUB mem)));
 5241   ins_cost(3*MEMORY_REF_COST);
 5242 
 5243   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to long, acquire\n\t"
 5244             "TWI     $dst\n\t"
 5245             "ISYNC" %}
 5246   size(12);
 5247   ins_encode( enc_lbz_ac(dst, mem) );
 5248   ins_pipe(pipe_class_memory);
 5249 %}
 5250 
 5251 // Load Short (16bit signed)
 5252 instruct loadS(iRegIdst dst, memory mem) %{
 5253   match(Set dst (LoadS mem));
 5254   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5255   ins_cost(MEMORY_REF_COST);
 5256 
 5257   format %{ "LHA     $dst, $mem" %}
 5258   size(4);
 5259   ins_encode %{
 5260     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5261     __ lha($dst$$Register, Idisp, $mem$$base$$Register);
 5262   %}
 5263   ins_pipe(pipe_class_memory);
 5264 %}
 5265 
 5266 // Load Short (16bit signed) acquire.
 5267 instruct loadS_ac(iRegIdst dst, memory mem) %{
 5268   match(Set dst (LoadS mem));
 5269   ins_cost(3*MEMORY_REF_COST);
 5270 
 5271   format %{ "LHA     $dst, $mem\t acquire\n\t"
 5272             "TWI     $dst\n\t"
 5273             "ISYNC" %}
 5274   size(12);
 5275   ins_encode %{
 5276     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5277     __ lha($dst$$Register, Idisp, $mem$$base$$Register);
 5278     __ twi_0($dst$$Register);
 5279     __ isync();
 5280   %}
 5281   ins_pipe(pipe_class_memory);
 5282 %}
 5283 
 5284 // Load Char (16bit unsigned)
 5285 instruct loadUS(iRegIdst dst, memory mem) %{
 5286   match(Set dst (LoadUS mem));
 5287   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5288   ins_cost(MEMORY_REF_COST);
 5289 
 5290   format %{ "LHZ     $dst, $mem" %}
 5291   size(4);
 5292   ins_encode( enc_lhz(dst, mem) );
 5293   ins_pipe(pipe_class_memory);
 5294 %}
 5295 
 5296 // Load Char (16bit unsigned) acquire.
 5297 instruct loadUS_ac(iRegIdst dst, memory mem) %{
 5298   match(Set dst (LoadUS mem));
 5299   ins_cost(3*MEMORY_REF_COST);
 5300 
 5301   format %{ "LHZ     $dst, $mem \t// acquire\n\t"
 5302             "TWI     $dst\n\t"
 5303             "ISYNC" %}
 5304   size(12);
 5305   ins_encode( enc_lhz_ac(dst, mem) );
 5306   ins_pipe(pipe_class_memory);
 5307 %}
 5308 
 5309 // Load Unsigned Short/Char (16bit UNsigned) into a Long Register.
 5310 instruct loadUS2L(iRegLdst dst, memory mem) %{
 5311   match(Set dst (ConvI2L (LoadUS mem)));
 5312   predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf));
 5313   ins_cost(MEMORY_REF_COST);
 5314 
 5315   format %{ "LHZ     $dst, $mem \t// short, zero-extend to long" %}
 5316   size(4);
 5317   ins_encode( enc_lhz(dst, mem) );
 5318   ins_pipe(pipe_class_memory);
 5319 %}
 5320 
 5321 // Load Unsigned Short/Char (16bit UNsigned) into a Long Register acquire.
 5322 instruct loadUS2L_ac(iRegLdst dst, memory mem) %{
 5323   match(Set dst (ConvI2L (LoadUS mem)));
 5324   ins_cost(3*MEMORY_REF_COST);
 5325 
 5326   format %{ "LHZ     $dst, $mem \t// short, zero-extend to long, acquire\n\t"
 5327             "TWI     $dst\n\t"
 5328             "ISYNC" %}
 5329   size(12);
 5330   ins_encode( enc_lhz_ac(dst, mem) );
 5331   ins_pipe(pipe_class_memory);
 5332 %}
 5333 
 5334 // Load Integer.
 5335 instruct loadI(iRegIdst dst, memory mem) %{
 5336   match(Set dst (LoadI mem));
 5337   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5338   ins_cost(MEMORY_REF_COST);
 5339 
 5340   format %{ "LWZ     $dst, $mem" %}
 5341   size(4);
 5342   ins_encode( enc_lwz(dst, mem) );
 5343   ins_pipe(pipe_class_memory);
 5344 %}
 5345 
 5346 // Load Integer acquire.
 5347 instruct loadI_ac(iRegIdst dst, memory mem) %{
 5348   match(Set dst (LoadI mem));
 5349   ins_cost(3*MEMORY_REF_COST);
 5350 
 5351   format %{ "LWZ     $dst, $mem \t// load acquire\n\t"
 5352             "TWI     $dst\n\t"
 5353             "ISYNC" %}
 5354   size(12);
 5355   ins_encode( enc_lwz_ac(dst, mem) );
 5356   ins_pipe(pipe_class_memory);
 5357 %}
 5358 
 5359 // Match loading integer and casting it to unsigned int in
 5360 // long register.
 5361 // LoadI + ConvI2L + AndL 0xffffffff.
 5362 instruct loadUI2L(iRegLdst dst, memory mem, immL_32bits mask) %{
 5363   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5364   predicate(_kids[0]->_kids[0]->_leaf->as_Load()->is_unordered());
 5365   ins_cost(MEMORY_REF_COST);
 5366 
 5367   format %{ "LWZ     $dst, $mem \t// zero-extend to long" %}
 5368   size(4);
 5369   ins_encode( enc_lwz(dst, mem) );
 5370   ins_pipe(pipe_class_memory);
 5371 %}
 5372 
 5373 // Match loading integer and casting it to long.
 5374 instruct loadI2L(iRegLdst dst, memoryAlg4 mem) %{
 5375   match(Set dst (ConvI2L (LoadI mem)));
 5376   predicate(_kids[0]->_leaf->as_Load()->is_unordered());
 5377   ins_cost(MEMORY_REF_COST);
 5378 
 5379   format %{ "LWA     $dst, $mem \t// loadI2L" %}
 5380   size(4);
 5381   ins_encode %{
 5382     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5383     __ lwa($dst$$Register, Idisp, $mem$$base$$Register);
 5384   %}
 5385   ins_pipe(pipe_class_memory);
 5386 %}
 5387 
 5388 // Match loading integer and casting it to long - acquire.
 5389 instruct loadI2L_ac(iRegLdst dst, memoryAlg4 mem) %{
 5390   match(Set dst (ConvI2L (LoadI mem)));
 5391   ins_cost(3*MEMORY_REF_COST);
 5392 
 5393   format %{ "LWA     $dst, $mem \t// loadI2L acquire"
 5394             "TWI     $dst\n\t"
 5395             "ISYNC" %}
 5396   size(12);
 5397   ins_encode %{
 5398     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5399     __ lwa($dst$$Register, Idisp, $mem$$base$$Register);
 5400     __ twi_0($dst$$Register);
 5401     __ isync();
 5402   %}
 5403   ins_pipe(pipe_class_memory);
 5404 %}
 5405 
 5406 // Load Long - aligned
 5407 instruct loadL(iRegLdst dst, memoryAlg4 mem) %{
 5408   match(Set dst (LoadL mem));
 5409   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5410   ins_cost(MEMORY_REF_COST);
 5411 
 5412   format %{ "LD      $dst, $mem \t// long" %}
 5413   size(4);
 5414   ins_encode( enc_ld(dst, mem) );
 5415   ins_pipe(pipe_class_memory);
 5416 %}
 5417 
 5418 // Load Long - aligned acquire.
 5419 instruct loadL_ac(iRegLdst dst, memoryAlg4 mem) %{
 5420   match(Set dst (LoadL mem));
 5421   ins_cost(3*MEMORY_REF_COST);
 5422 
 5423   format %{ "LD      $dst, $mem \t// long acquire\n\t"
 5424             "TWI     $dst\n\t"
 5425             "ISYNC" %}
 5426   size(12);
 5427   ins_encode( enc_ld_ac(dst, mem) );
 5428   ins_pipe(pipe_class_memory);
 5429 %}
 5430 
 5431 // Load Long - UNaligned
 5432 instruct loadL_unaligned(iRegLdst dst, memoryAlg4 mem) %{
 5433   match(Set dst (LoadL_unaligned mem));
 5434   // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense).
 5435   ins_cost(MEMORY_REF_COST);
 5436 
 5437   format %{ "LD      $dst, $mem \t// unaligned long" %}
 5438   size(4);
 5439   ins_encode( enc_ld(dst, mem) );
 5440   ins_pipe(pipe_class_memory);
 5441 %}
 5442 
 5443 // Load nodes for superwords
 5444 
 5445 // Load Aligned Packed Byte
 5446 instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{
 5447   predicate(n->as_LoadVector()->memory_size() == 8);
 5448   match(Set dst (LoadVector mem));
 5449   ins_cost(MEMORY_REF_COST);
 5450 
 5451   format %{ "LD      $dst, $mem \t// load 8-byte Vector" %}
 5452   size(4);
 5453   ins_encode( enc_ld(dst, mem) );
 5454   ins_pipe(pipe_class_memory);
 5455 %}
 5456 
 5457 // Load Aligned Packed Byte
 5458 instruct loadV16(vecX dst, indirect mem) %{
 5459   predicate(n->as_LoadVector()->memory_size() == 16);
 5460   match(Set dst (LoadVector mem));
 5461   ins_cost(MEMORY_REF_COST);
 5462 
 5463   format %{ "LXVD2X      $dst, $mem \t// load 16-byte Vector" %}
 5464   size(4);
 5465   ins_encode %{
 5466     __ lxvd2x($dst$$VectorSRegister, $mem$$Register);
 5467   %}
 5468   ins_pipe(pipe_class_default);
 5469 %}
 5470 
 5471 // Load Range, range = array length (=jint)
 5472 instruct loadRange(iRegIdst dst, memory mem) %{
 5473   match(Set dst (LoadRange mem));
 5474   ins_cost(MEMORY_REF_COST);
 5475 
 5476   format %{ "LWZ     $dst, $mem \t// range" %}
 5477   size(4);
 5478   ins_encode( enc_lwz(dst, mem) );
 5479   ins_pipe(pipe_class_memory);
 5480 %}
 5481 
 5482 // Load Compressed Pointer
 5483 instruct loadN(iRegNdst dst, memory mem) %{
 5484   match(Set dst (LoadN mem));
 5485   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5486   ins_cost(MEMORY_REF_COST);
 5487 
 5488   format %{ "LWZ     $dst, $mem \t// load compressed ptr" %}
 5489   size(4);
 5490   ins_encode( enc_lwz(dst, mem) );
 5491   ins_pipe(pipe_class_memory);
 5492 %}
 5493 
 5494 // Load Compressed Pointer acquire.
 5495 instruct loadN_ac(iRegNdst dst, memory mem) %{
 5496   match(Set dst (LoadN mem));
 5497   ins_cost(3*MEMORY_REF_COST);
 5498 
 5499   format %{ "LWZ     $dst, $mem \t// load acquire compressed ptr\n\t"
 5500             "TWI     $dst\n\t"
 5501             "ISYNC" %}
 5502   size(12);
 5503   ins_encode( enc_lwz_ac(dst, mem) );
 5504   ins_pipe(pipe_class_memory);
 5505 %}
 5506 
 5507 // Load Compressed Pointer and decode it if narrow_oop_shift == 0.
 5508 instruct loadN2P_unscaled(iRegPdst dst, memory mem) %{
 5509   match(Set dst (DecodeN (LoadN mem)));
 5510   predicate(_kids[0]->_leaf->as_Load()->is_unordered() && CompressedOops::shift() == 0);
 5511   ins_cost(MEMORY_REF_COST);
 5512 
 5513   format %{ "LWZ     $dst, $mem \t// DecodeN (unscaled)" %}
 5514   size(4);
 5515   ins_encode( enc_lwz(dst, mem) );
 5516   ins_pipe(pipe_class_memory);
 5517 %}
 5518 
 5519 instruct loadN2P_klass_unscaled(iRegPdst dst, memory mem) %{
 5520   match(Set dst (DecodeNKlass (LoadNKlass mem)));
 5521   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0 &&
 5522             _kids[0]->_leaf->as_Load()->is_unordered());
 5523   ins_cost(MEMORY_REF_COST);
 5524 
 5525   format %{ "LWZ     $dst, $mem \t// DecodeN (unscaled)" %}
 5526   size(4);
 5527   ins_encode( enc_lwz(dst, mem) );
 5528   ins_pipe(pipe_class_memory);
 5529 %}
 5530 
 5531 // Load Pointer
 5532 instruct loadP(iRegPdst dst, memoryAlg4 mem) %{
 5533   match(Set dst (LoadP mem));
 5534   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5535   ins_cost(MEMORY_REF_COST);
 5536 
 5537   format %{ "LD      $dst, $mem \t// ptr" %}
 5538   size(4);
 5539   ins_encode( enc_ld(dst, mem) );
 5540   ins_pipe(pipe_class_memory);
 5541 %}
 5542 
 5543 // Load Pointer acquire.
 5544 instruct loadP_ac(iRegPdst dst, memoryAlg4 mem) %{
 5545   match(Set dst (LoadP mem));
 5546   ins_cost(3*MEMORY_REF_COST);
 5547 
 5548   format %{ "LD      $dst, $mem \t// ptr acquire\n\t"
 5549             "TWI     $dst\n\t"
 5550             "ISYNC" %}
 5551   size(12);
 5552   ins_encode( enc_ld_ac(dst, mem) );
 5553   ins_pipe(pipe_class_memory);
 5554 %}
 5555 
 5556 // LoadP + CastP2L
 5557 instruct loadP2X(iRegLdst dst, memoryAlg4 mem) %{
 5558   match(Set dst (CastP2X (LoadP mem)));
 5559   predicate(_kids[0]->_leaf->as_Load()->is_unordered());
 5560   ins_cost(MEMORY_REF_COST);
 5561 
 5562   format %{ "LD      $dst, $mem \t// ptr + p2x" %}
 5563   size(4);
 5564   ins_encode( enc_ld(dst, mem) );
 5565   ins_pipe(pipe_class_memory);
 5566 %}
 5567 
 5568 // Load compressed klass pointer.
 5569 instruct loadNKlass(iRegNdst dst, memory mem) %{
 5570   match(Set dst (LoadNKlass mem));
 5571   ins_cost(MEMORY_REF_COST);
 5572 
 5573   format %{ "LWZ     $dst, $mem \t// compressed klass ptr" %}
 5574   size(4);
 5575   ins_encode( enc_lwz(dst, mem) );
 5576   ins_pipe(pipe_class_memory);
 5577 %}
 5578 
 5579 // Load Klass Pointer
 5580 instruct loadKlass(iRegPdst dst, memoryAlg4 mem) %{
 5581   match(Set dst (LoadKlass mem));
 5582   ins_cost(MEMORY_REF_COST);
 5583 
 5584   format %{ "LD      $dst, $mem \t// klass ptr" %}
 5585   size(4);
 5586   ins_encode( enc_ld(dst, mem) );
 5587   ins_pipe(pipe_class_memory);
 5588 %}
 5589 
 5590 // Load Float
 5591 instruct loadF(regF dst, memory mem) %{
 5592   match(Set dst (LoadF mem));
 5593   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5594   ins_cost(MEMORY_REF_COST);
 5595 
 5596   format %{ "LFS     $dst, $mem" %}
 5597   size(4);
 5598   ins_encode %{
 5599     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5600     __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 5601   %}
 5602   ins_pipe(pipe_class_memory);
 5603 %}
 5604 
 5605 // Load Float acquire.
 5606 instruct loadF_ac(regF dst, memory mem, flagsRegCR0 cr0) %{
 5607   match(Set dst (LoadF mem));
 5608   effect(TEMP cr0);
 5609   ins_cost(3*MEMORY_REF_COST);
 5610 
 5611   format %{ "LFS     $dst, $mem \t// acquire\n\t"
 5612             "FCMPU   cr0, $dst, $dst\n\t"
 5613             "BNE     cr0, next\n"
 5614             "next:\n\t"
 5615             "ISYNC" %}
 5616   size(16);
 5617   ins_encode %{
 5618     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5619     Label next;
 5620     __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 5621     __ fcmpu(CCR0, $dst$$FloatRegister, $dst$$FloatRegister);
 5622     __ bne(CCR0, next);
 5623     __ bind(next);
 5624     __ isync();
 5625   %}
 5626   ins_pipe(pipe_class_memory);
 5627 %}
 5628 
 5629 // Load Double - aligned
 5630 instruct loadD(regD dst, memory mem) %{
 5631   match(Set dst (LoadD mem));
 5632   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5633   ins_cost(MEMORY_REF_COST);
 5634 
 5635   format %{ "LFD     $dst, $mem" %}
 5636   size(4);
 5637   ins_encode( enc_lfd(dst, mem) );
 5638   ins_pipe(pipe_class_memory);
 5639 %}
 5640 
 5641 // Load Double - aligned acquire.
 5642 instruct loadD_ac(regD dst, memory mem, flagsRegCR0 cr0) %{
 5643   match(Set dst (LoadD mem));
 5644   effect(TEMP cr0);
 5645   ins_cost(3*MEMORY_REF_COST);
 5646 
 5647   format %{ "LFD     $dst, $mem \t// acquire\n\t"
 5648             "FCMPU   cr0, $dst, $dst\n\t"
 5649             "BNE     cr0, next\n"
 5650             "next:\n\t"
 5651             "ISYNC" %}
 5652   size(16);
 5653   ins_encode %{
 5654     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5655     Label next;
 5656     __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 5657     __ fcmpu(CCR0, $dst$$FloatRegister, $dst$$FloatRegister);
 5658     __ bne(CCR0, next);
 5659     __ bind(next);
 5660     __ isync();
 5661   %}
 5662   ins_pipe(pipe_class_memory);
 5663 %}
 5664 
 5665 // Load Double - UNaligned
 5666 instruct loadD_unaligned(regD dst, memory mem) %{
 5667   match(Set dst (LoadD_unaligned mem));
 5668   // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense).
 5669   ins_cost(MEMORY_REF_COST);
 5670 
 5671   format %{ "LFD     $dst, $mem" %}
 5672   size(4);
 5673   ins_encode( enc_lfd(dst, mem) );
 5674   ins_pipe(pipe_class_memory);
 5675 %}
 5676 
 5677 //----------Constants--------------------------------------------------------
 5678 
 5679 // Load MachConstantTableBase: add hi offset to global toc.
 5680 // TODO: Handle hidden register r29 in bundler!
 5681 instruct loadToc_hi(iRegLdst dst) %{
 5682   effect(DEF dst);
 5683   ins_cost(DEFAULT_COST);
 5684 
 5685   format %{ "ADDIS   $dst, R29, DISP.hi \t// load TOC hi" %}
 5686   size(4);
 5687   ins_encode %{
 5688     __ calculate_address_from_global_toc_hi16only($dst$$Register, __ method_toc());
 5689   %}
 5690   ins_pipe(pipe_class_default);
 5691 %}
 5692 
 5693 // Load MachConstantTableBase: add lo offset to global toc.
 5694 instruct loadToc_lo(iRegLdst dst, iRegLdst src) %{
 5695   effect(DEF dst, USE src);
 5696   ins_cost(DEFAULT_COST);
 5697 
 5698   format %{ "ADDI    $dst, $src, DISP.lo \t// load TOC lo" %}
 5699   size(4);
 5700   ins_encode %{
 5701     __ calculate_address_from_global_toc_lo16only($dst$$Register, __ method_toc());
 5702   %}
 5703   ins_pipe(pipe_class_default);
 5704 %}
 5705 
 5706 // Load 16-bit integer constant 0xssss????
 5707 instruct loadConI16(iRegIdst dst, immI16 src) %{
 5708   match(Set dst src);
 5709 
 5710   format %{ "LI      $dst, $src" %}
 5711   size(4);
 5712   ins_encode %{
 5713     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
 5714   %}
 5715   ins_pipe(pipe_class_default);
 5716 %}
 5717 
 5718 // Load integer constant 0x????0000
 5719 instruct loadConIhi16(iRegIdst dst, immIhi16 src) %{
 5720   match(Set dst src);
 5721   ins_cost(DEFAULT_COST);
 5722 
 5723   format %{ "LIS     $dst, $src.hi" %}
 5724   size(4);
 5725   ins_encode %{
 5726     // Lis sign extends 16-bit src then shifts it 16 bit to the left.
 5727     __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16)));
 5728   %}
 5729   ins_pipe(pipe_class_default);
 5730 %}
 5731 
 5732 // Part 2 of loading 32 bit constant: hi16 is is src1 (properly shifted
 5733 // and sign extended), this adds the low 16 bits.
 5734 instruct loadConI32_lo16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
 5735   // no match-rule, false predicate
 5736   effect(DEF dst, USE src1, USE src2);
 5737   predicate(false);
 5738 
 5739   format %{ "ORI     $dst, $src1.hi, $src2.lo" %}
 5740   size(4);
 5741   ins_encode %{
 5742     __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
 5743   %}
 5744   ins_pipe(pipe_class_default);
 5745 %}
 5746 
 5747 instruct loadConI32(iRegIdst dst, immI32 src) %{
 5748   match(Set dst src);
 5749   // This macro is valid only in Power 10 and up, but adding the following predicate here
 5750   // caused a build error, so we comment it out for now.
 5751   // predicate(PowerArchitecturePPC64 >= 10);
 5752   ins_cost(DEFAULT_COST+1);
 5753 
 5754   format %{ "PLI     $dst, $src" %}
 5755   size(8);
 5756   ins_encode %{
 5757     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 5758     __ pli($dst$$Register, $src$$constant);
 5759   %}
 5760   ins_pipe(pipe_class_default);
 5761   ins_alignment(2);
 5762 %}
 5763 
 5764 instruct loadConI_Ex(iRegIdst dst, immI src) %{
 5765   match(Set dst src);
 5766   ins_cost(DEFAULT_COST*2);
 5767 
 5768   expand %{
 5769     // Would like to use $src$$constant.
 5770     immI16 srcLo %{ _opnds[1]->constant() %}
 5771     // srcHi can be 0000 if srcLo sign-extends to a negative number.
 5772     immIhi16 srcHi %{ _opnds[1]->constant() %}
 5773     iRegIdst tmpI;
 5774     loadConIhi16(tmpI, srcHi);
 5775     loadConI32_lo16(dst, tmpI, srcLo);
 5776   %}
 5777 %}
 5778 
 5779 // No constant pool entries required.
 5780 instruct loadConL16(iRegLdst dst, immL16 src) %{
 5781   match(Set dst src);
 5782 
 5783   format %{ "LI      $dst, $src \t// long" %}
 5784   size(4);
 5785   ins_encode %{
 5786     __ li($dst$$Register, (int)((short) ($src$$constant & 0xFFFF)));
 5787   %}
 5788   ins_pipe(pipe_class_default);
 5789 %}
 5790 
 5791 // Load long constant 0xssssssss????0000
 5792 instruct loadConL32hi16(iRegLdst dst, immL32hi16 src) %{
 5793   match(Set dst src);
 5794   ins_cost(DEFAULT_COST);
 5795 
 5796   format %{ "LIS     $dst, $src.hi \t// long" %}
 5797   size(4);
 5798   ins_encode %{
 5799     __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16)));
 5800   %}
 5801   ins_pipe(pipe_class_default);
 5802 %}
 5803 
 5804 // To load a 32 bit constant: merge lower 16 bits into already loaded
 5805 // high 16 bits.
 5806 instruct loadConL32_lo16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
 5807   // no match-rule, false predicate
 5808   effect(DEF dst, USE src1, USE src2);
 5809   predicate(false);
 5810 
 5811   format %{ "ORI     $dst, $src1, $src2.lo" %}
 5812   size(4);
 5813   ins_encode %{
 5814     __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
 5815   %}
 5816   ins_pipe(pipe_class_default);
 5817 %}
 5818 
 5819 // Load 32-bit long constant
 5820 instruct loadConL32_Ex(iRegLdst dst, immL32 src) %{
 5821   match(Set dst src);
 5822   ins_cost(DEFAULT_COST*2);
 5823 
 5824   expand %{
 5825     // Would like to use $src$$constant.
 5826     immL16     srcLo %{ _opnds[1]->constant() /*& 0x0000FFFFL */%}
 5827     // srcHi can be 0000 if srcLo sign-extends to a negative number.
 5828     immL32hi16 srcHi %{ _opnds[1]->constant() /*& 0xFFFF0000L */%}
 5829     iRegLdst tmpL;
 5830     loadConL32hi16(tmpL, srcHi);
 5831     loadConL32_lo16(dst, tmpL, srcLo);
 5832   %}
 5833 %}
 5834 
 5835 // Load 34-bit long constant using prefixed addi. No constant pool entries required.
 5836 instruct loadConL34(iRegLdst dst, immL34 src) %{
 5837   match(Set dst src);
 5838   // This macro is valid only in Power 10 and up, but adding the following predicate here
 5839   // caused a build error, so we comment it out for now.
 5840   // predicate(PowerArchitecturePPC64 >= 10);
 5841   ins_cost(DEFAULT_COST+1);
 5842 
 5843   format %{ "PLI     $dst, $src \t// long" %}
 5844   size(8);
 5845   ins_encode %{
 5846     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 5847     __ pli($dst$$Register, $src$$constant);
 5848   %}
 5849   ins_pipe(pipe_class_default);
 5850   ins_alignment(2);
 5851 %}
 5852 
 5853 // Load long constant 0x????000000000000.
 5854 instruct loadConLhighest16_Ex(iRegLdst dst, immLhighest16 src) %{
 5855   match(Set dst src);
 5856   ins_cost(DEFAULT_COST);
 5857 
 5858   expand %{
 5859     immL32hi16 srcHi %{ _opnds[1]->constant() >> 32 /*& 0xFFFF0000L */%}
 5860     immI shift32 %{ 32 %}
 5861     iRegLdst tmpL;
 5862     loadConL32hi16(tmpL, srcHi);
 5863     lshiftL_regL_immI(dst, tmpL, shift32);
 5864   %}
 5865 %}
 5866 
 5867 // Expand node for constant pool load: small offset.
 5868 instruct loadConL(iRegLdst dst, immL src, iRegLdst toc) %{
 5869   effect(DEF dst, USE src, USE toc);
 5870   ins_cost(MEMORY_REF_COST);
 5871 
 5872   ins_num_consts(1);
 5873   // Needed so that CallDynamicJavaDirect can compute the address of this
 5874   // instruction for relocation.
 5875   ins_field_cbuf_insts_offset(int);
 5876 
 5877   format %{ "LD      $dst, offset, $toc \t// load long $src from TOC" %}
 5878   size(4);
 5879   ins_encode( enc_load_long_constL(dst, src, toc) );
 5880   ins_pipe(pipe_class_memory);
 5881 %}
 5882 
 5883 // Expand node for constant pool load: large offset.
 5884 instruct loadConL_hi(iRegLdst dst, immL src, iRegLdst toc) %{
 5885   effect(DEF dst, USE src, USE toc);
 5886   predicate(false);
 5887 
 5888   ins_num_consts(1);
 5889   ins_field_const_toc_offset(int);
 5890   // Needed so that CallDynamicJavaDirect can compute the address of this
 5891   // instruction for relocation.
 5892   ins_field_cbuf_insts_offset(int);
 5893 
 5894   format %{ "ADDIS   $dst, $toc, offset \t// load long $src from TOC (hi)" %}
 5895   size(4);
 5896   ins_encode( enc_load_long_constL_hi(dst, toc, src) );
 5897   ins_pipe(pipe_class_default);
 5898 %}
 5899 
 5900 // Expand node for constant pool load: large offset.
 5901 // No constant pool entries required.
 5902 instruct loadConL_lo(iRegLdst dst, immL src, iRegLdst base) %{
 5903   effect(DEF dst, USE src, USE base);
 5904   predicate(false);
 5905 
 5906   ins_field_const_toc_offset_hi_node(loadConL_hiNode*);
 5907 
 5908   format %{ "LD      $dst, offset, $base \t// load long $src from TOC (lo)" %}
 5909   size(4);
 5910   ins_encode %{
 5911     int offset = ra_->C->output()->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset;
 5912     __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register);
 5913   %}
 5914   ins_pipe(pipe_class_memory);
 5915 %}
 5916 
 5917 // Load long constant from constant table. Expand in case of
 5918 // offset > 16 bit is needed.
 5919 // Adlc adds toc node MachConstantTableBase.
 5920 instruct loadConL_Ex(iRegLdst dst, immL src) %{
 5921   match(Set dst src);
 5922   ins_cost(MEMORY_REF_COST);
 5923 
 5924   format %{ "LD      $dst, offset, $constanttablebase\t// load long $src from table, postalloc expanded" %}
 5925   // We can not inline the enc_class for the expand as that does not support constanttablebase.
 5926   postalloc_expand( postalloc_expand_load_long_constant(dst, src, constanttablebase) );
 5927 %}
 5928 
 5929 // Load NULL as compressed oop.
 5930 instruct loadConN0(iRegNdst dst, immN_0 src) %{
 5931   match(Set dst src);
 5932   ins_cost(DEFAULT_COST);
 5933 
 5934   format %{ "LI      $dst, $src \t// compressed ptr" %}
 5935   size(4);
 5936   ins_encode %{
 5937     __ li($dst$$Register, 0);
 5938   %}
 5939   ins_pipe(pipe_class_default);
 5940 %}
 5941 
 5942 // Load hi part of compressed oop constant.
 5943 instruct loadConN_hi(iRegNdst dst, immN src) %{
 5944   effect(DEF dst, USE src);
 5945   ins_cost(DEFAULT_COST);
 5946 
 5947   format %{ "LIS     $dst, $src \t// narrow oop hi" %}
 5948   size(4);
 5949   ins_encode %{
 5950     __ lis($dst$$Register, (int)(short)(($src$$constant >> 16) & 0xffff));
 5951   %}
 5952   ins_pipe(pipe_class_default);
 5953 %}
 5954 
 5955 // Add lo part of compressed oop constant to already loaded hi part.
 5956 instruct loadConN_lo(iRegNdst dst, iRegNsrc src1, immN src2) %{
 5957   effect(DEF dst, USE src1, USE src2);
 5958   ins_cost(DEFAULT_COST);
 5959 
 5960   format %{ "ORI     $dst, $src1, $src2 \t// narrow oop lo" %}
 5961   size(4);
 5962   ins_encode %{
 5963     assert(__ oop_recorder() != NULL, "this assembler needs an OopRecorder");
 5964     int oop_index = __ oop_recorder()->find_index((jobject)$src2$$constant);
 5965     RelocationHolder rspec = oop_Relocation::spec(oop_index);
 5966     __ relocate(rspec, 1);
 5967     __ ori($dst$$Register, $src1$$Register, $src2$$constant & 0xffff);
 5968   %}
 5969   ins_pipe(pipe_class_default);
 5970 %}
 5971 
 5972 instruct rldicl(iRegLdst dst, iRegLsrc src, immI16 shift, immI16 mask_begin) %{
 5973   effect(DEF dst, USE src, USE shift, USE mask_begin);
 5974 
 5975   size(4);
 5976   ins_encode %{
 5977     __ rldicl($dst$$Register, $src$$Register, $shift$$constant, $mask_begin$$constant);
 5978   %}
 5979   ins_pipe(pipe_class_default);
 5980 %}
 5981 
 5982 // Needed to postalloc expand loadConN: ConN is loaded as ConI
 5983 // leaving the upper 32 bits with sign-extension bits.
 5984 // This clears these bits: dst = src & 0xFFFFFFFF.
 5985 // TODO: Eventually call this maskN_regN_FFFFFFFF.
 5986 instruct clearMs32b(iRegNdst dst, iRegNsrc src) %{
 5987   effect(DEF dst, USE src);
 5988   predicate(false);
 5989 
 5990   format %{ "MASK    $dst, $src, 0xFFFFFFFF" %} // mask
 5991   size(4);
 5992   ins_encode %{
 5993     __ clrldi($dst$$Register, $src$$Register, 0x20);
 5994   %}
 5995   ins_pipe(pipe_class_default);
 5996 %}
 5997 
 5998 // Optimize DecodeN for disjoint base.
 5999 // Load base of compressed oops into a register
 6000 instruct loadBase(iRegLdst dst) %{
 6001   effect(DEF dst);
 6002 
 6003   format %{ "LoadConst $dst, heapbase" %}
 6004   ins_encode %{
 6005     __ load_const_optimized($dst$$Register, CompressedOops::base(), R0);
 6006   %}
 6007   ins_pipe(pipe_class_default);
 6008 %}
 6009 
 6010 // Loading ConN must be postalloc expanded so that edges between
 6011 // the nodes are safe. They may not interfere with a safepoint.
 6012 // GL TODO: This needs three instructions: better put this into the constant pool.
 6013 instruct loadConN_Ex(iRegNdst dst, immN src) %{
 6014   match(Set dst src);
 6015   ins_cost(DEFAULT_COST*2);
 6016 
 6017   format %{ "LoadN   $dst, $src \t// postalloc expanded" %} // mask
 6018   postalloc_expand %{
 6019     MachNode *m1 = new loadConN_hiNode();
 6020     MachNode *m2 = new loadConN_loNode();
 6021     MachNode *m3 = new clearMs32bNode();
 6022     m1->add_req(NULL);
 6023     m2->add_req(NULL, m1);
 6024     m3->add_req(NULL, m2);
 6025     m1->_opnds[0] = op_dst;
 6026     m1->_opnds[1] = op_src;
 6027     m2->_opnds[0] = op_dst;
 6028     m2->_opnds[1] = op_dst;
 6029     m2->_opnds[2] = op_src;
 6030     m3->_opnds[0] = op_dst;
 6031     m3->_opnds[1] = op_dst;
 6032     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6033     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6034     ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6035     nodes->push(m1);
 6036     nodes->push(m2);
 6037     nodes->push(m3);
 6038   %}
 6039 %}
 6040 
 6041 // We have seen a safepoint between the hi and lo parts, and this node was handled
 6042 // as an oop. Therefore this needs a match rule so that build_oop_map knows this is
 6043 // not a narrow oop.
 6044 instruct loadConNKlass_hi(iRegNdst dst, immNKlass_NM src) %{
 6045   match(Set dst src);
 6046   effect(DEF dst, USE src);
 6047   ins_cost(DEFAULT_COST);
 6048 
 6049   format %{ "LIS     $dst, $src \t// narrow klass hi" %}
 6050   size(4);
 6051   ins_encode %{
 6052     intptr_t Csrc = CompressedKlassPointers::encode((Klass *)$src$$constant);
 6053     __ lis($dst$$Register, (int)(short)((Csrc >> 16) & 0xffff));
 6054   %}
 6055   ins_pipe(pipe_class_default);
 6056 %}
 6057 
 6058 // As loadConNKlass_hi this must be recognized as narrow klass, not oop!
 6059 instruct loadConNKlass_mask(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{
 6060   match(Set dst src1);
 6061   effect(TEMP src2);
 6062   ins_cost(DEFAULT_COST);
 6063 
 6064   format %{ "MASK    $dst, $src2, 0xFFFFFFFF" %} // mask
 6065   size(4);
 6066   ins_encode %{
 6067     __ clrldi($dst$$Register, $src2$$Register, 0x20);
 6068   %}
 6069   ins_pipe(pipe_class_default);
 6070 %}
 6071 
 6072 // This needs a match rule so that build_oop_map knows this is
 6073 // not a narrow oop.
 6074 instruct loadConNKlass_lo(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{
 6075   match(Set dst src1);
 6076   effect(TEMP src2);
 6077   ins_cost(DEFAULT_COST);
 6078 
 6079   format %{ "ORI     $dst, $src1, $src2 \t// narrow klass lo" %}
 6080   size(4);
 6081   ins_encode %{
 6082     intptr_t Csrc = CompressedKlassPointers::encode((Klass *)$src1$$constant);
 6083     assert(__ oop_recorder() != NULL, "this assembler needs an OopRecorder");
 6084     int klass_index = __ oop_recorder()->find_index((Klass *)$src1$$constant);
 6085     RelocationHolder rspec = metadata_Relocation::spec(klass_index);
 6086 
 6087     __ relocate(rspec, 1);
 6088     __ ori($dst$$Register, $src2$$Register, Csrc & 0xffff);
 6089   %}
 6090   ins_pipe(pipe_class_default);
 6091 %}
 6092 
 6093 // Loading ConNKlass must be postalloc expanded so that edges between
 6094 // the nodes are safe. They may not interfere with a safepoint.
 6095 instruct loadConNKlass_Ex(iRegNdst dst, immNKlass src) %{
 6096   match(Set dst src);
 6097   ins_cost(DEFAULT_COST*2);
 6098 
 6099   format %{ "LoadN   $dst, $src \t// postalloc expanded" %} // mask
 6100   postalloc_expand %{
 6101     // Load high bits into register. Sign extended.
 6102     MachNode *m1 = new loadConNKlass_hiNode();
 6103     m1->add_req(NULL);
 6104     m1->_opnds[0] = op_dst;
 6105     m1->_opnds[1] = op_src;
 6106     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6107     nodes->push(m1);
 6108 
 6109     MachNode *m2 = m1;
 6110     if (!Assembler::is_uimm((jlong)CompressedKlassPointers::encode((Klass *)op_src->constant()), 31)) {
 6111       // Value might be 1-extended. Mask out these bits.
 6112       m2 = new loadConNKlass_maskNode();
 6113       m2->add_req(NULL, m1);
 6114       m2->_opnds[0] = op_dst;
 6115       m2->_opnds[1] = op_src;
 6116       m2->_opnds[2] = op_dst;
 6117       ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6118       nodes->push(m2);
 6119     }
 6120 
 6121     MachNode *m3 = new loadConNKlass_loNode();
 6122     m3->add_req(NULL, m2);
 6123     m3->_opnds[0] = op_dst;
 6124     m3->_opnds[1] = op_src;
 6125     m3->_opnds[2] = op_dst;
 6126     ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6127     nodes->push(m3);
 6128   %}
 6129 %}
 6130 
 6131 // 0x1 is used in object initialization (initial object header).
 6132 // No constant pool entries required.
 6133 instruct loadConP0or1(iRegPdst dst, immP_0or1 src) %{
 6134   match(Set dst src);
 6135 
 6136   format %{ "LI      $dst, $src \t// ptr" %}
 6137   size(4);
 6138   ins_encode %{
 6139     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
 6140   %}
 6141   ins_pipe(pipe_class_default);
 6142 %}
 6143 
 6144 // Expand node for constant pool load: small offset.
 6145 // The match rule is needed to generate the correct bottom_type(),
 6146 // however this node should never match. The use of predicate is not
 6147 // possible since ADLC forbids predicates for chain rules. The higher
 6148 // costs do not prevent matching in this case. For that reason the
 6149 // operand immP_NM with predicate(false) is used.
 6150 instruct loadConP(iRegPdst dst, immP_NM src, iRegLdst toc) %{
 6151   match(Set dst src);
 6152   effect(TEMP toc);
 6153 
 6154   ins_num_consts(1);
 6155 
 6156   format %{ "LD      $dst, offset, $toc \t// load ptr $src from TOC" %}
 6157   size(4);
 6158   ins_encode( enc_load_long_constP(dst, src, toc) );
 6159   ins_pipe(pipe_class_memory);
 6160 %}
 6161 
 6162 // Expand node for constant pool load: large offset.
 6163 instruct loadConP_hi(iRegPdst dst, immP_NM src, iRegLdst toc) %{
 6164   effect(DEF dst, USE src, USE toc);
 6165   predicate(false);
 6166 
 6167   ins_num_consts(1);
 6168   ins_field_const_toc_offset(int);
 6169 
 6170   format %{ "ADDIS   $dst, $toc, offset \t// load ptr $src from TOC (hi)" %}
 6171   size(4);
 6172   ins_encode( enc_load_long_constP_hi(dst, src, toc) );
 6173   ins_pipe(pipe_class_default);
 6174 %}
 6175 
 6176 // Expand node for constant pool load: large offset.
 6177 instruct loadConP_lo(iRegPdst dst, immP_NM src, iRegLdst base) %{
 6178   match(Set dst src);
 6179   effect(TEMP base);
 6180 
 6181   ins_field_const_toc_offset_hi_node(loadConP_hiNode*);
 6182 
 6183   format %{ "LD      $dst, offset, $base \t// load ptr $src from TOC (lo)" %}
 6184   size(4);
 6185   ins_encode %{
 6186     int offset = ra_->C->output()->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset;
 6187     __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register);
 6188   %}
 6189   ins_pipe(pipe_class_memory);
 6190 %}
 6191 
 6192 // Load pointer constant from constant table. Expand in case an
 6193 // offset > 16 bit is needed.
 6194 // Adlc adds toc node MachConstantTableBase.
 6195 instruct loadConP_Ex(iRegPdst dst, immP src) %{
 6196   match(Set dst src);
 6197   ins_cost(MEMORY_REF_COST);
 6198 
 6199   // This rule does not use "expand" because then
 6200   // the result type is not known to be an Oop.  An ADLC
 6201   // enhancement will be needed to make that work - not worth it!
 6202 
 6203   // If this instruction rematerializes, it prolongs the live range
 6204   // of the toc node, causing illegal graphs.
 6205   // assert(edge_from_to(_reg_node[reg_lo],def)) fails in verify_good_schedule().
 6206   ins_cannot_rematerialize(true);
 6207 
 6208   format %{ "LD    $dst, offset, $constanttablebase \t//  load ptr $src from table, postalloc expanded" %}
 6209   postalloc_expand( postalloc_expand_load_ptr_constant(dst, src, constanttablebase) );
 6210 %}
 6211 
 6212 // Expand node for constant pool load: small offset.
 6213 instruct loadConF(regF dst, immF src, iRegLdst toc) %{
 6214   effect(DEF dst, USE src, USE toc);
 6215   ins_cost(MEMORY_REF_COST);
 6216 
 6217   ins_num_consts(1);
 6218 
 6219   format %{ "LFS     $dst, offset, $toc \t// load float $src from TOC" %}
 6220   size(4);
 6221   ins_encode %{
 6222     address float_address = __ float_constant($src$$constant);
 6223     if (float_address == NULL) {
 6224       ciEnv::current()->record_out_of_memory_failure();
 6225       return;
 6226     }
 6227     __ lfs($dst$$FloatRegister, __ offset_to_method_toc(float_address), $toc$$Register);
 6228   %}
 6229   ins_pipe(pipe_class_memory);
 6230 %}
 6231 
 6232 // Expand node for constant pool load: large offset.
 6233 instruct loadConFComp(regF dst, immF src, iRegLdst toc) %{
 6234   effect(DEF dst, USE src, USE toc);
 6235   ins_cost(MEMORY_REF_COST);
 6236 
 6237   ins_num_consts(1);
 6238 
 6239   format %{ "ADDIS   $toc, $toc, offset_hi\n\t"
 6240             "LFS     $dst, offset_lo, $toc \t// load float $src from TOC (hi/lo)\n\t"
 6241             "ADDIS   $toc, $toc, -offset_hi"%}
 6242   size(12);
 6243   ins_encode %{
 6244     FloatRegister Rdst    = $dst$$FloatRegister;
 6245     Register Rtoc         = $toc$$Register;
 6246     address float_address = __ float_constant($src$$constant);
 6247     if (float_address == NULL) {
 6248       ciEnv::current()->record_out_of_memory_failure();
 6249       return;
 6250     }
 6251     int offset            = __ offset_to_method_toc(float_address);
 6252     int hi = (offset + (1<<15))>>16;
 6253     int lo = offset - hi * (1<<16);
 6254 
 6255     __ addis(Rtoc, Rtoc, hi);
 6256     __ lfs(Rdst, lo, Rtoc);
 6257     __ addis(Rtoc, Rtoc, -hi);
 6258   %}
 6259   ins_pipe(pipe_class_memory);
 6260 %}
 6261 
 6262 // Adlc adds toc node MachConstantTableBase.
 6263 instruct loadConF_Ex(regF dst, immF src) %{
 6264   match(Set dst src);
 6265   ins_cost(MEMORY_REF_COST);
 6266 
 6267   // See loadConP.
 6268   ins_cannot_rematerialize(true);
 6269 
 6270   format %{ "LFS     $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %}
 6271   postalloc_expand( postalloc_expand_load_float_constant(dst, src, constanttablebase) );
 6272 %}
 6273 
 6274 // Expand node for constant pool load: small offset.
 6275 instruct loadConD(regD dst, immD src, iRegLdst toc) %{
 6276   effect(DEF dst, USE src, USE toc);
 6277   ins_cost(MEMORY_REF_COST);
 6278 
 6279   ins_num_consts(1);
 6280 
 6281   format %{ "LFD     $dst, offset, $toc \t// load double $src from TOC" %}
 6282   size(4);
 6283   ins_encode %{
 6284     address float_address = __ double_constant($src$$constant);
 6285     if (float_address == NULL) {
 6286       ciEnv::current()->record_out_of_memory_failure();
 6287       return;
 6288     }
 6289     int offset =  __ offset_to_method_toc(float_address);
 6290     __ lfd($dst$$FloatRegister, offset, $toc$$Register);
 6291   %}
 6292   ins_pipe(pipe_class_memory);
 6293 %}
 6294 
 6295 // Expand node for constant pool load: large offset.
 6296 instruct loadConDComp(regD dst, immD src, iRegLdst toc) %{
 6297   effect(DEF dst, USE src, USE toc);
 6298   ins_cost(MEMORY_REF_COST);
 6299 
 6300   ins_num_consts(1);
 6301 
 6302   format %{ "ADDIS   $toc, $toc, offset_hi\n\t"
 6303             "LFD     $dst, offset_lo, $toc \t// load double $src from TOC (hi/lo)\n\t"
 6304             "ADDIS   $toc, $toc, -offset_hi" %}
 6305   size(12);
 6306   ins_encode %{
 6307     FloatRegister Rdst    = $dst$$FloatRegister;
 6308     Register      Rtoc    = $toc$$Register;
 6309     address float_address = __ double_constant($src$$constant);
 6310     if (float_address == NULL) {
 6311       ciEnv::current()->record_out_of_memory_failure();
 6312       return;
 6313     }
 6314     int offset = __ offset_to_method_toc(float_address);
 6315     int hi = (offset + (1<<15))>>16;
 6316     int lo = offset - hi * (1<<16);
 6317 
 6318     __ addis(Rtoc, Rtoc, hi);
 6319     __ lfd(Rdst, lo, Rtoc);
 6320     __ addis(Rtoc, Rtoc, -hi);
 6321   %}
 6322   ins_pipe(pipe_class_memory);
 6323 %}
 6324 
 6325 // Adlc adds toc node MachConstantTableBase.
 6326 instruct loadConD_Ex(regD dst, immD src) %{
 6327   match(Set dst src);
 6328   ins_cost(MEMORY_REF_COST);
 6329 
 6330   // See loadConP.
 6331   ins_cannot_rematerialize(true);
 6332 
 6333   format %{ "ConD    $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %}
 6334   postalloc_expand( postalloc_expand_load_double_constant(dst, src, constanttablebase) );
 6335 %}
 6336 
 6337 // Prefetch instructions.
 6338 // Must be safe to execute with invalid address (cannot fault).
 6339 
 6340 // Special prefetch versions which use the dcbz instruction.
 6341 instruct prefetch_alloc_zero(indirectMemory mem, iRegLsrc src) %{
 6342   match(PrefetchAllocation (AddP mem src));
 6343   predicate(AllocatePrefetchStyle == 3);
 6344   ins_cost(MEMORY_REF_COST);
 6345 
 6346   format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many with zero" %}
 6347   size(4);
 6348   ins_encode %{
 6349     __ dcbz($src$$Register, $mem$$base$$Register);
 6350   %}
 6351   ins_pipe(pipe_class_memory);
 6352 %}
 6353 
 6354 instruct prefetch_alloc_zero_no_offset(indirectMemory mem) %{
 6355   match(PrefetchAllocation mem);
 6356   predicate(AllocatePrefetchStyle == 3);
 6357   ins_cost(MEMORY_REF_COST);
 6358 
 6359   format %{ "PREFETCH $mem, 2 \t// Prefetch write-many with zero" %}
 6360   size(4);
 6361   ins_encode %{
 6362     __ dcbz($mem$$base$$Register);
 6363   %}
 6364   ins_pipe(pipe_class_memory);
 6365 %}
 6366 
 6367 instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{
 6368   match(PrefetchAllocation (AddP mem src));
 6369   predicate(AllocatePrefetchStyle != 3);
 6370   ins_cost(MEMORY_REF_COST);
 6371 
 6372   format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many" %}
 6373   size(4);
 6374   ins_encode %{
 6375     __ dcbtst($src$$Register, $mem$$base$$Register);
 6376   %}
 6377   ins_pipe(pipe_class_memory);
 6378 %}
 6379 
 6380 instruct prefetch_alloc_no_offset(indirectMemory mem) %{
 6381   match(PrefetchAllocation mem);
 6382   predicate(AllocatePrefetchStyle != 3);
 6383   ins_cost(MEMORY_REF_COST);
 6384 
 6385   format %{ "PREFETCH $mem, 2 \t// Prefetch write-many" %}
 6386   size(4);
 6387   ins_encode %{
 6388     __ dcbtst($mem$$base$$Register);
 6389   %}
 6390   ins_pipe(pipe_class_memory);
 6391 %}
 6392 
 6393 //----------Store Instructions-------------------------------------------------
 6394 
 6395 // Store Byte
 6396 instruct storeB(memory mem, iRegIsrc src) %{
 6397   match(Set mem (StoreB mem src));
 6398   ins_cost(MEMORY_REF_COST);
 6399 
 6400   format %{ "STB     $src, $mem \t// byte" %}
 6401   size(4);
 6402   ins_encode %{
 6403     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 6404     __ stb($src$$Register, Idisp, $mem$$base$$Register);
 6405   %}
 6406   ins_pipe(pipe_class_memory);
 6407 %}
 6408 
 6409 // Store Char/Short
 6410 instruct storeC(memory mem, iRegIsrc src) %{
 6411   match(Set mem (StoreC mem src));
 6412   ins_cost(MEMORY_REF_COST);
 6413 
 6414   format %{ "STH     $src, $mem \t// short" %}
 6415   size(4);
 6416   ins_encode %{
 6417     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 6418     __ sth($src$$Register, Idisp, $mem$$base$$Register);
 6419   %}
 6420   ins_pipe(pipe_class_memory);
 6421 %}
 6422 
 6423 // Store Integer
 6424 instruct storeI(memory mem, iRegIsrc src) %{
 6425   match(Set mem (StoreI mem src));
 6426   ins_cost(MEMORY_REF_COST);
 6427 
 6428   format %{ "STW     $src, $mem" %}
 6429   size(4);
 6430   ins_encode( enc_stw(src, mem) );
 6431   ins_pipe(pipe_class_memory);
 6432 %}
 6433 
 6434 // ConvL2I + StoreI.
 6435 instruct storeI_convL2I(memory mem, iRegLsrc src) %{
 6436   match(Set mem (StoreI mem (ConvL2I src)));
 6437   ins_cost(MEMORY_REF_COST);
 6438 
 6439   format %{ "STW     l2i($src), $mem" %}
 6440   size(4);
 6441   ins_encode( enc_stw(src, mem) );
 6442   ins_pipe(pipe_class_memory);
 6443 %}
 6444 
 6445 // Store Long
 6446 instruct storeL(memoryAlg4 mem, iRegLsrc src) %{
 6447   match(Set mem (StoreL mem src));
 6448   ins_cost(MEMORY_REF_COST);
 6449 
 6450   format %{ "STD     $src, $mem \t// long" %}
 6451   size(4);
 6452   ins_encode( enc_std(src, mem) );
 6453   ins_pipe(pipe_class_memory);
 6454 %}
 6455 
 6456 // Store super word nodes.
 6457 
 6458 // Store Aligned Packed Byte long register to memory
 6459 instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{
 6460   predicate(n->as_StoreVector()->memory_size() == 8);
 6461   match(Set mem (StoreVector mem src));
 6462   ins_cost(MEMORY_REF_COST);
 6463 
 6464   format %{ "STD     $mem, $src \t// packed8B" %}
 6465   size(4);
 6466   ins_encode( enc_std(src, mem) );
 6467   ins_pipe(pipe_class_memory);
 6468 %}
 6469 
 6470 // Store Packed Byte long register to memory
 6471 instruct storeV16(indirect mem, vecX src) %{
 6472   predicate(n->as_StoreVector()->memory_size() == 16);
 6473   match(Set mem (StoreVector mem src));
 6474   ins_cost(MEMORY_REF_COST);
 6475 
 6476   format %{ "STXVD2X     $mem, $src \t// store 16-byte Vector" %}
 6477   size(4);
 6478   ins_encode %{
 6479     __ stxvd2x($src$$VectorSRegister, $mem$$Register);
 6480   %}
 6481   ins_pipe(pipe_class_default);
 6482 %}
 6483 
 6484 // Reinterpret: only one vector size used: either L or X
 6485 instruct reinterpretL(iRegLdst dst) %{
 6486   match(Set dst (VectorReinterpret dst));
 6487   ins_cost(0);
 6488   format %{ "reinterpret $dst" %}
 6489   ins_encode( /*empty*/ );
 6490   ins_pipe(pipe_class_empty);
 6491 %}
 6492 
 6493 instruct reinterpretX(vecX dst) %{
 6494   match(Set dst (VectorReinterpret dst));
 6495   ins_cost(0);
 6496   format %{ "reinterpret $dst" %}
 6497   ins_encode( /*empty*/ );
 6498   ins_pipe(pipe_class_empty);
 6499 %}
 6500 
 6501 // Store Compressed Oop
 6502 instruct storeN(memory dst, iRegN_P2N src) %{
 6503   match(Set dst (StoreN dst src));
 6504   ins_cost(MEMORY_REF_COST);
 6505 
 6506   format %{ "STW     $src, $dst \t// compressed oop" %}
 6507   size(4);
 6508   ins_encode( enc_stw(src, dst) );
 6509   ins_pipe(pipe_class_memory);
 6510 %}
 6511 
 6512 // Store Compressed KLass
 6513 instruct storeNKlass(memory dst, iRegN_P2N src) %{
 6514   match(Set dst (StoreNKlass dst src));
 6515   ins_cost(MEMORY_REF_COST);
 6516 
 6517   format %{ "STW     $src, $dst \t// compressed klass" %}
 6518   size(4);
 6519   ins_encode( enc_stw(src, dst) );
 6520   ins_pipe(pipe_class_memory);
 6521 %}
 6522 
 6523 // Store Pointer
 6524 instruct storeP(memoryAlg4 dst, iRegPsrc src) %{
 6525   match(Set dst (StoreP dst src));
 6526   ins_cost(MEMORY_REF_COST);
 6527 
 6528   format %{ "STD     $src, $dst \t// ptr" %}
 6529   size(4);
 6530   ins_encode( enc_std(src, dst) );
 6531   ins_pipe(pipe_class_memory);
 6532 %}
 6533 
 6534 // Store Float
 6535 instruct storeF(memory mem, regF src) %{
 6536   match(Set mem (StoreF mem src));
 6537   ins_cost(MEMORY_REF_COST);
 6538 
 6539   format %{ "STFS    $src, $mem" %}
 6540   size(4);
 6541   ins_encode( enc_stfs(src, mem) );
 6542   ins_pipe(pipe_class_memory);
 6543 %}
 6544 
 6545 // Store Double
 6546 instruct storeD(memory mem, regD src) %{
 6547   match(Set mem (StoreD mem src));
 6548   ins_cost(MEMORY_REF_COST);
 6549 
 6550   format %{ "STFD    $src, $mem" %}
 6551   size(4);
 6552   ins_encode( enc_stfd(src, mem) );
 6553   ins_pipe(pipe_class_memory);
 6554 %}
 6555 
 6556 //----------Store Instructions With Zeros--------------------------------------
 6557 
 6558 instruct storeCM(memory mem, immI_0 zero) %{
 6559   match(Set mem (StoreCM mem zero));
 6560   ins_cost(MEMORY_REF_COST);
 6561 
 6562   format %{ "STB     #0, $mem \t// CMS card-mark byte store" %}
 6563   size(8);
 6564   ins_encode %{
 6565     __ li(R0, 0);
 6566     // No release barrier: Oops are allowed to get visible after marking.
 6567     guarantee($mem$$base$$Register != R1_SP, "use frame_slots_bias");
 6568     __ stb(R0, $mem$$disp, $mem$$base$$Register);
 6569   %}
 6570   ins_pipe(pipe_class_memory);
 6571 %}
 6572 
 6573 // Convert oop pointer into compressed form.
 6574 
 6575 // Nodes for postalloc expand.
 6576 
 6577 // Shift node for expand.
 6578 instruct encodeP_shift(iRegNdst dst, iRegNsrc src) %{
 6579   // The match rule is needed to make it a 'MachTypeNode'!
 6580   match(Set dst (EncodeP src));
 6581   predicate(false);
 6582 
 6583   format %{ "SRDI    $dst, $src, 3 \t// encode" %}
 6584   size(4);
 6585   ins_encode %{
 6586     __ srdi($dst$$Register, $src$$Register, CompressedOops::shift() & 0x3f);
 6587   %}
 6588   ins_pipe(pipe_class_default);
 6589 %}
 6590 
 6591 // Add node for expand.
 6592 instruct encodeP_sub(iRegPdst dst, iRegPdst src) %{
 6593   // The match rule is needed to make it a 'MachTypeNode'!
 6594   match(Set dst (EncodeP src));
 6595   predicate(false);
 6596 
 6597   format %{ "SUB     $dst, $src, oop_base \t// encode" %}
 6598   ins_encode %{
 6599     __ sub_const_optimized($dst$$Register, $src$$Register, CompressedOops::base(), R0);
 6600   %}
 6601   ins_pipe(pipe_class_default);
 6602 %}
 6603 
 6604 // Conditional sub base.
 6605 instruct cond_sub_base(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
 6606   // The match rule is needed to make it a 'MachTypeNode'!
 6607   match(Set dst (EncodeP (Binary crx src1)));
 6608   predicate(false);
 6609 
 6610   format %{ "BEQ     $crx, done\n\t"
 6611             "SUB     $dst, $src1, heapbase \t// encode: subtract base if != NULL\n"
 6612             "done:" %}
 6613   ins_encode %{
 6614     Label done;
 6615     __ beq($crx$$CondRegister, done);
 6616     __ sub_const_optimized($dst$$Register, $src1$$Register, CompressedOops::base(), R0);
 6617     __ bind(done);
 6618   %}
 6619   ins_pipe(pipe_class_default);
 6620 %}
 6621 
 6622 // Power 7 can use isel instruction
 6623 instruct cond_set_0_oop(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
 6624   // The match rule is needed to make it a 'MachTypeNode'!
 6625   match(Set dst (EncodeP (Binary crx src1)));
 6626   predicate(false);
 6627 
 6628   format %{ "CMOVE   $dst, $crx eq, 0, $src1 \t// encode: preserve 0" %}
 6629   size(4);
 6630   ins_encode %{
 6631     // This is a Power7 instruction for which no machine description exists.
 6632     __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
 6633   %}
 6634   ins_pipe(pipe_class_default);
 6635 %}
 6636 
 6637 // Disjoint narrow oop base.
 6638 instruct encodeP_Disjoint(iRegNdst dst, iRegPsrc src) %{
 6639   match(Set dst (EncodeP src));
 6640   predicate(CompressedOops::base_disjoint());
 6641 
 6642   format %{ "EXTRDI  $dst, $src, #32, #3 \t// encode with disjoint base" %}
 6643   size(4);
 6644   ins_encode %{
 6645     __ rldicl($dst$$Register, $src$$Register, 64-CompressedOops::shift(), 32);
 6646   %}
 6647   ins_pipe(pipe_class_default);
 6648 %}
 6649 
 6650 // shift != 0, base != 0
 6651 instruct encodeP_Ex(iRegNdst dst, flagsReg crx, iRegPsrc src) %{
 6652   match(Set dst (EncodeP src));
 6653   effect(TEMP crx);
 6654   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull &&
 6655             CompressedOops::shift() != 0 &&
 6656             CompressedOops::base_overlaps());
 6657 
 6658   format %{ "EncodeP $dst, $crx, $src \t// postalloc expanded" %}
 6659   postalloc_expand( postalloc_expand_encode_oop(dst, src, crx));
 6660 %}
 6661 
 6662 // shift != 0, base != 0
 6663 instruct encodeP_not_null_Ex(iRegNdst dst, iRegPsrc src) %{
 6664   match(Set dst (EncodeP src));
 6665   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull &&
 6666             CompressedOops::shift() != 0 &&
 6667             CompressedOops::base_overlaps());
 6668 
 6669   format %{ "EncodeP $dst, $src\t// $src != Null, postalloc expanded" %}
 6670   postalloc_expand( postalloc_expand_encode_oop_not_null(dst, src) );
 6671 %}
 6672 
 6673 // shift != 0, base == 0
 6674 // TODO: This is the same as encodeP_shift. Merge!
 6675 instruct encodeP_not_null_base_null(iRegNdst dst, iRegPsrc src) %{
 6676   match(Set dst (EncodeP src));
 6677   predicate(CompressedOops::shift() != 0 &&
 6678             CompressedOops::base() ==0);
 6679 
 6680   format %{ "SRDI    $dst, $src, #3 \t// encodeP, $src != NULL" %}
 6681   size(4);
 6682   ins_encode %{
 6683     __ srdi($dst$$Register, $src$$Register, CompressedOops::shift() & 0x3f);
 6684   %}
 6685   ins_pipe(pipe_class_default);
 6686 %}
 6687 
 6688 // Compressed OOPs with narrow_oop_shift == 0.
 6689 // shift == 0, base == 0
 6690 instruct encodeP_narrow_oop_shift_0(iRegNdst dst, iRegPsrc src) %{
 6691   match(Set dst (EncodeP src));
 6692   predicate(CompressedOops::shift() == 0);
 6693 
 6694   format %{ "MR      $dst, $src \t// Ptr->Narrow" %}
 6695   // variable size, 0 or 4.
 6696   ins_encode %{
 6697     __ mr_if_needed($dst$$Register, $src$$Register);
 6698   %}
 6699   ins_pipe(pipe_class_default);
 6700 %}
 6701 
 6702 // Decode nodes.
 6703 
 6704 // Shift node for expand.
 6705 instruct decodeN_shift(iRegPdst dst, iRegPsrc src) %{
 6706   // The match rule is needed to make it a 'MachTypeNode'!
 6707   match(Set dst (DecodeN src));
 6708   predicate(false);
 6709 
 6710   format %{ "SLDI    $dst, $src, #3 \t// DecodeN" %}
 6711   size(4);
 6712   ins_encode %{
 6713     __ sldi($dst$$Register, $src$$Register, CompressedOops::shift());
 6714   %}
 6715   ins_pipe(pipe_class_default);
 6716 %}
 6717 
 6718 // Add node for expand.
 6719 instruct decodeN_add(iRegPdst dst, iRegPdst src) %{
 6720   // The match rule is needed to make it a 'MachTypeNode'!
 6721   match(Set dst (DecodeN src));
 6722   predicate(false);
 6723 
 6724   format %{ "ADD     $dst, $src, heapbase \t// DecodeN, add oop base" %}
 6725   ins_encode %{
 6726     __ add_const_optimized($dst$$Register, $src$$Register, CompressedOops::base(), R0);
 6727   %}
 6728   ins_pipe(pipe_class_default);
 6729 %}
 6730 
 6731 // conditianal add base for expand
 6732 instruct cond_add_base(iRegPdst dst, flagsRegSrc crx, iRegPsrc src) %{
 6733   // The match rule is needed to make it a 'MachTypeNode'!
 6734   // NOTICE that the rule is nonsense - we just have to make sure that:
 6735   //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
 6736   //  - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
 6737   match(Set dst (DecodeN (Binary crx src)));
 6738   predicate(false);
 6739 
 6740   format %{ "BEQ     $crx, done\n\t"
 6741             "ADD     $dst, $src, heapbase \t// DecodeN: add oop base if $src != NULL\n"
 6742             "done:" %}
 6743   ins_encode %{
 6744     Label done;
 6745     __ beq($crx$$CondRegister, done);
 6746     __ add_const_optimized($dst$$Register, $src$$Register, CompressedOops::base(), R0);
 6747     __ bind(done);
 6748   %}
 6749   ins_pipe(pipe_class_default);
 6750 %}
 6751 
 6752 instruct cond_set_0_ptr(iRegPdst dst, flagsRegSrc crx, iRegPsrc src1) %{
 6753   // The match rule is needed to make it a 'MachTypeNode'!
 6754   // NOTICE that the rule is nonsense - we just have to make sure that:
 6755   //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
 6756   //  - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
 6757   match(Set dst (DecodeN (Binary crx src1)));
 6758   predicate(false);
 6759 
 6760   format %{ "CMOVE   $dst, $crx eq, 0, $src1 \t// decode: preserve 0" %}
 6761   size(4);
 6762   ins_encode %{
 6763     // This is a Power7 instruction for which no machine description exists.
 6764     __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
 6765   %}
 6766   ins_pipe(pipe_class_default);
 6767 %}
 6768 
 6769 //  shift != 0, base != 0
 6770 instruct decodeN_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
 6771   match(Set dst (DecodeN src));
 6772   predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
 6773              n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) &&
 6774             CompressedOops::shift() != 0 &&
 6775             CompressedOops::base() != 0);
 6776   ins_cost(4 * DEFAULT_COST); // Should be more expensive than decodeN_Disjoint_isel_Ex.
 6777   effect(TEMP crx);
 6778 
 6779   format %{ "DecodeN $dst, $src \t// Kills $crx, postalloc expanded" %}
 6780   postalloc_expand( postalloc_expand_decode_oop(dst, src, crx) );
 6781 %}
 6782 
 6783 // shift != 0, base == 0
 6784 instruct decodeN_nullBase(iRegPdst dst, iRegNsrc src) %{
 6785   match(Set dst (DecodeN src));
 6786   predicate(CompressedOops::shift() != 0 &&
 6787             CompressedOops::base() == 0);
 6788 
 6789   format %{ "SLDI    $dst, $src, #3 \t// DecodeN (zerobased)" %}
 6790   size(4);
 6791   ins_encode %{
 6792     __ sldi($dst$$Register, $src$$Register, CompressedOops::shift());
 6793   %}
 6794   ins_pipe(pipe_class_default);
 6795 %}
 6796 
 6797 // Optimize DecodeN for disjoint base.
 6798 // Shift narrow oop and or it into register that already contains the heap base.
 6799 // Base == dst must hold, and is assured by construction in postaloc_expand.
 6800 instruct decodeN_mergeDisjoint(iRegPdst dst, iRegNsrc src, iRegLsrc base) %{
 6801   match(Set dst (DecodeN src));
 6802   effect(TEMP base);
 6803   predicate(false);
 6804 
 6805   format %{ "RLDIMI  $dst, $src, shift, 32-shift \t// DecodeN (disjoint base)" %}
 6806   size(4);
 6807   ins_encode %{
 6808     __ rldimi($dst$$Register, $src$$Register, CompressedOops::shift(), 32-CompressedOops::shift());
 6809   %}
 6810   ins_pipe(pipe_class_default);
 6811 %}
 6812 
 6813 // Optimize DecodeN for disjoint base.
 6814 // This node requires only one cycle on the critical path.
 6815 // We must postalloc_expand as we can not express use_def effects where
 6816 // the used register is L and the def'ed register P.
 6817 instruct decodeN_Disjoint_notNull_Ex(iRegPdst dst, iRegNsrc src) %{
 6818   match(Set dst (DecodeN src));
 6819   effect(TEMP_DEF dst);
 6820   predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
 6821              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
 6822             CompressedOops::base_disjoint());
 6823   ins_cost(DEFAULT_COST);
 6824 
 6825   format %{ "MOV     $dst, heapbase \t\n"
 6826             "RLDIMI  $dst, $src, shift, 32-shift \t// decode with disjoint base" %}
 6827   postalloc_expand %{
 6828     loadBaseNode *n1 = new loadBaseNode();
 6829     n1->add_req(NULL);
 6830     n1->_opnds[0] = op_dst;
 6831 
 6832     decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode();
 6833     n2->add_req(n_region, n_src, n1);
 6834     n2->_opnds[0] = op_dst;
 6835     n2->_opnds[1] = op_src;
 6836     n2->_opnds[2] = op_dst;
 6837     n2->_bottom_type = _bottom_type;
 6838 
 6839     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6840     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6841 
 6842     nodes->push(n1);
 6843     nodes->push(n2);
 6844   %}
 6845 %}
 6846 
 6847 instruct decodeN_Disjoint_isel_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
 6848   match(Set dst (DecodeN src));
 6849   effect(TEMP_DEF dst, TEMP crx);
 6850   predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
 6851              n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) &&
 6852             CompressedOops::base_disjoint() && VM_Version::has_isel());
 6853   ins_cost(3 * DEFAULT_COST);
 6854 
 6855   format %{ "DecodeN  $dst, $src \t// decode with disjoint base using isel" %}
 6856   postalloc_expand %{
 6857     loadBaseNode *n1 = new loadBaseNode();
 6858     n1->add_req(NULL);
 6859     n1->_opnds[0] = op_dst;
 6860 
 6861     cmpN_reg_imm0Node *n_compare  = new cmpN_reg_imm0Node();
 6862     n_compare->add_req(n_region, n_src);
 6863     n_compare->_opnds[0] = op_crx;
 6864     n_compare->_opnds[1] = op_src;
 6865     n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR);
 6866 
 6867     decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode();
 6868     n2->add_req(n_region, n_src, n1);
 6869     n2->_opnds[0] = op_dst;
 6870     n2->_opnds[1] = op_src;
 6871     n2->_opnds[2] = op_dst;
 6872     n2->_bottom_type = _bottom_type;
 6873 
 6874     cond_set_0_ptrNode *n_cond_set = new cond_set_0_ptrNode();
 6875     n_cond_set->add_req(n_region, n_compare, n2);
 6876     n_cond_set->_opnds[0] = op_dst;
 6877     n_cond_set->_opnds[1] = op_crx;
 6878     n_cond_set->_opnds[2] = op_dst;
 6879     n_cond_set->_bottom_type = _bottom_type;
 6880 
 6881     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 6882     ra_->set_oop(n_cond_set, true);
 6883 
 6884     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6885     ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 6886     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6887     ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6888 
 6889     nodes->push(n1);
 6890     nodes->push(n_compare);
 6891     nodes->push(n2);
 6892     nodes->push(n_cond_set);
 6893   %}
 6894 %}
 6895 
 6896 // src != 0, shift != 0, base != 0
 6897 instruct decodeN_notNull_addBase_Ex(iRegPdst dst, iRegNsrc src) %{
 6898   match(Set dst (DecodeN src));
 6899   predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
 6900              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
 6901             CompressedOops::shift() != 0 &&
 6902             CompressedOops::base() != 0);
 6903   ins_cost(2 * DEFAULT_COST);
 6904 
 6905   format %{ "DecodeN $dst, $src \t// $src != NULL, postalloc expanded" %}
 6906   postalloc_expand( postalloc_expand_decode_oop_not_null(dst, src));
 6907 %}
 6908 
 6909 // Compressed OOPs with narrow_oop_shift == 0.
 6910 instruct decodeN_unscaled(iRegPdst dst, iRegNsrc src) %{
 6911   match(Set dst (DecodeN src));
 6912   predicate(CompressedOops::shift() == 0);
 6913   ins_cost(DEFAULT_COST);
 6914 
 6915   format %{ "MR      $dst, $src \t// DecodeN (unscaled)" %}
 6916   // variable size, 0 or 4.
 6917   ins_encode %{
 6918     __ mr_if_needed($dst$$Register, $src$$Register);
 6919   %}
 6920   ins_pipe(pipe_class_default);
 6921 %}
 6922 
 6923 // Convert compressed oop into int for vectors alignment masking.
 6924 instruct decodeN2I_unscaled(iRegIdst dst, iRegNsrc src) %{
 6925   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 6926   predicate(CompressedOops::shift() == 0);
 6927   ins_cost(DEFAULT_COST);
 6928 
 6929   format %{ "MR      $dst, $src \t// (int)DecodeN (unscaled)" %}
 6930   // variable size, 0 or 4.
 6931   ins_encode %{
 6932     __ mr_if_needed($dst$$Register, $src$$Register);
 6933   %}
 6934   ins_pipe(pipe_class_default);
 6935 %}
 6936 
 6937 // Convert klass pointer into compressed form.
 6938 
 6939 // Nodes for postalloc expand.
 6940 
 6941 // Shift node for expand.
 6942 instruct encodePKlass_shift(iRegNdst dst, iRegNsrc src) %{
 6943   // The match rule is needed to make it a 'MachTypeNode'!
 6944   match(Set dst (EncodePKlass src));
 6945   predicate(false);
 6946 
 6947   format %{ "SRDI    $dst, $src, 3 \t// encode" %}
 6948   size(4);
 6949   ins_encode %{
 6950     __ srdi($dst$$Register, $src$$Register, CompressedKlassPointers::shift());
 6951   %}
 6952   ins_pipe(pipe_class_default);
 6953 %}
 6954 
 6955 // Add node for expand.
 6956 instruct encodePKlass_sub_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{
 6957   // The match rule is needed to make it a 'MachTypeNode'!
 6958   match(Set dst (EncodePKlass (Binary base src)));
 6959   predicate(false);
 6960 
 6961   format %{ "SUB     $dst, $base, $src \t// encode" %}
 6962   size(4);
 6963   ins_encode %{
 6964     __ subf($dst$$Register, $base$$Register, $src$$Register);
 6965   %}
 6966   ins_pipe(pipe_class_default);
 6967 %}
 6968 
 6969 // Disjoint narrow oop base.
 6970 instruct encodePKlass_Disjoint(iRegNdst dst, iRegPsrc src) %{
 6971   match(Set dst (EncodePKlass src));
 6972   predicate(false /* TODO: PPC port CompressedKlassPointers::base_disjoint()*/);
 6973 
 6974   format %{ "EXTRDI  $dst, $src, #32, #3 \t// encode with disjoint base" %}
 6975   size(4);
 6976   ins_encode %{
 6977     __ rldicl($dst$$Register, $src$$Register, 64-CompressedKlassPointers::shift(), 32);
 6978   %}
 6979   ins_pipe(pipe_class_default);
 6980 %}
 6981 
 6982 // shift != 0, base != 0
 6983 instruct encodePKlass_not_null_Ex(iRegNdst dst, iRegLsrc base, iRegPsrc src) %{
 6984   match(Set dst (EncodePKlass (Binary base src)));
 6985   predicate(false);
 6986 
 6987   format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %}
 6988   postalloc_expand %{
 6989     encodePKlass_sub_baseNode *n1 = new encodePKlass_sub_baseNode();
 6990     n1->add_req(n_region, n_base, n_src);
 6991     n1->_opnds[0] = op_dst;
 6992     n1->_opnds[1] = op_base;
 6993     n1->_opnds[2] = op_src;
 6994     n1->_bottom_type = _bottom_type;
 6995 
 6996     encodePKlass_shiftNode *n2 = new encodePKlass_shiftNode();
 6997     n2->add_req(n_region, n1);
 6998     n2->_opnds[0] = op_dst;
 6999     n2->_opnds[1] = op_dst;
 7000     n2->_bottom_type = _bottom_type;
 7001     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 7002     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 7003 
 7004     nodes->push(n1);
 7005     nodes->push(n2);
 7006   %}
 7007 %}
 7008 
 7009 // shift != 0, base != 0
 7010 instruct encodePKlass_not_null_ExEx(iRegNdst dst, iRegPsrc src) %{
 7011   match(Set dst (EncodePKlass src));
 7012   //predicate(CompressedKlassPointers::shift() != 0 &&
 7013   //          true /* TODO: PPC port CompressedKlassPointers::base_overlaps()*/);
 7014 
 7015   //format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %}
 7016   ins_cost(DEFAULT_COST*2);  // Don't count constant.
 7017   expand %{
 7018     immL baseImm %{ (jlong)(intptr_t)CompressedKlassPointers::base() %}
 7019     iRegLdst base;
 7020     loadConL_Ex(base, baseImm);
 7021     encodePKlass_not_null_Ex(dst, base, src);
 7022   %}
 7023 %}
 7024 
 7025 // Decode nodes.
 7026 
 7027 // Shift node for expand.
 7028 instruct decodeNKlass_shift(iRegPdst dst, iRegPsrc src) %{
 7029   // The match rule is needed to make it a 'MachTypeNode'!
 7030   match(Set dst (DecodeNKlass src));
 7031   predicate(false);
 7032 
 7033   format %{ "SLDI    $dst, $src, #3 \t// DecodeNKlass" %}
 7034   size(4);
 7035   ins_encode %{
 7036     __ sldi($dst$$Register, $src$$Register, CompressedKlassPointers::shift());
 7037   %}
 7038   ins_pipe(pipe_class_default);
 7039 %}
 7040 
 7041 // Add node for expand.
 7042 
 7043 instruct decodeNKlass_add_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{
 7044   // The match rule is needed to make it a 'MachTypeNode'!
 7045   match(Set dst (DecodeNKlass (Binary base src)));
 7046   predicate(false);
 7047 
 7048   format %{ "ADD     $dst, $base, $src \t// DecodeNKlass, add klass base" %}
 7049   size(4);
 7050   ins_encode %{
 7051     __ add($dst$$Register, $base$$Register, $src$$Register);
 7052   %}
 7053   ins_pipe(pipe_class_default);
 7054 %}
 7055 
 7056 // src != 0, shift != 0, base != 0
 7057 instruct decodeNKlass_notNull_addBase_Ex(iRegPdst dst, iRegLsrc base, iRegNsrc src) %{
 7058   match(Set dst (DecodeNKlass (Binary base src)));
 7059   //effect(kill src); // We need a register for the immediate result after shifting.
 7060   predicate(false);
 7061 
 7062   format %{ "DecodeNKlass $dst =  $base + ($src << 3) \t// $src != NULL, postalloc expanded" %}
 7063   postalloc_expand %{
 7064     decodeNKlass_add_baseNode *n1 = new decodeNKlass_add_baseNode();
 7065     n1->add_req(n_region, n_base, n_src);
 7066     n1->_opnds[0] = op_dst;
 7067     n1->_opnds[1] = op_base;
 7068     n1->_opnds[2] = op_src;
 7069     n1->_bottom_type = _bottom_type;
 7070 
 7071     decodeNKlass_shiftNode *n2 = new decodeNKlass_shiftNode();
 7072     n2->add_req(n_region, n1);
 7073     n2->_opnds[0] = op_dst;
 7074     n2->_opnds[1] = op_dst;
 7075     n2->_bottom_type = _bottom_type;
 7076 
 7077     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 7078     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 7079 
 7080     nodes->push(n1);
 7081     nodes->push(n2);
 7082   %}
 7083 %}
 7084 
 7085 // src != 0, shift != 0, base != 0
 7086 instruct decodeNKlass_notNull_addBase_ExEx(iRegPdst dst, iRegNsrc src) %{
 7087   match(Set dst (DecodeNKlass src));
 7088   // predicate(CompressedKlassPointers::shift() != 0 &&
 7089   //           CompressedKlassPointers::base() != 0);
 7090 
 7091   //format %{ "DecodeNKlass $dst, $src \t// $src != NULL, expanded" %}
 7092 
 7093   ins_cost(DEFAULT_COST*2);  // Don't count constant.
 7094   expand %{
 7095     // We add first, then we shift. Like this, we can get along with one register less.
 7096     // But we have to load the base pre-shifted.
 7097     immL baseImm %{ (jlong)((intptr_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift()) %}
 7098     iRegLdst base;
 7099     loadConL_Ex(base, baseImm);
 7100     decodeNKlass_notNull_addBase_Ex(dst, base, src);
 7101   %}
 7102 %}
 7103 
 7104 //----------MemBar Instructions-----------------------------------------------
 7105 // Memory barrier flavors
 7106 
 7107 instruct membar_acquire() %{
 7108   match(LoadFence);
 7109   ins_cost(4*MEMORY_REF_COST);
 7110 
 7111   format %{ "MEMBAR-acquire" %}
 7112   size(4);
 7113   ins_encode %{
 7114     __ acquire();
 7115   %}
 7116   ins_pipe(pipe_class_default);
 7117 %}
 7118 
 7119 instruct unnecessary_membar_acquire() %{
 7120   match(MemBarAcquire);
 7121   ins_cost(0);
 7122 
 7123   format %{ " -- \t// redundant MEMBAR-acquire - empty" %}
 7124   size(0);
 7125   ins_encode( /*empty*/ );
 7126   ins_pipe(pipe_class_default);
 7127 %}
 7128 
 7129 instruct membar_acquire_lock() %{
 7130   match(MemBarAcquireLock);
 7131   ins_cost(0);
 7132 
 7133   format %{ " -- \t// redundant MEMBAR-acquire - empty (acquire as part of CAS in prior FastLock)" %}
 7134   size(0);
 7135   ins_encode( /*empty*/ );
 7136   ins_pipe(pipe_class_default);
 7137 %}
 7138 
 7139 instruct membar_release() %{
 7140   match(MemBarRelease);
 7141   match(StoreFence);
 7142   ins_cost(4*MEMORY_REF_COST);
 7143 
 7144   format %{ "MEMBAR-release" %}
 7145   size(4);
 7146   ins_encode %{
 7147     __ release();
 7148   %}
 7149   ins_pipe(pipe_class_default);
 7150 %}
 7151 
 7152 instruct membar_storestore() %{
 7153   match(MemBarStoreStore);
 7154   ins_cost(4*MEMORY_REF_COST);
 7155 
 7156   format %{ "MEMBAR-store-store" %}
 7157   size(4);
 7158   ins_encode %{
 7159     __ membar(Assembler::StoreStore);
 7160   %}
 7161   ins_pipe(pipe_class_default);
 7162 %}
 7163 
 7164 instruct membar_release_lock() %{
 7165   match(MemBarReleaseLock);
 7166   ins_cost(0);
 7167 
 7168   format %{ " -- \t// redundant MEMBAR-release - empty (release in FastUnlock)" %}
 7169   size(0);
 7170   ins_encode( /*empty*/ );
 7171   ins_pipe(pipe_class_default);
 7172 %}
 7173 
 7174 instruct membar_volatile() %{
 7175   match(MemBarVolatile);
 7176   ins_cost(4*MEMORY_REF_COST);
 7177 
 7178   format %{ "MEMBAR-volatile" %}
 7179   size(4);
 7180   ins_encode %{
 7181     __ fence();
 7182   %}
 7183   ins_pipe(pipe_class_default);
 7184 %}
 7185 
 7186 // This optimization is wrong on PPC. The following pattern is not supported:
 7187 //  MemBarVolatile
 7188 //   ^        ^
 7189 //   |        |
 7190 //  CtrlProj MemProj
 7191 //   ^        ^
 7192 //   |        |
 7193 //   |       Load
 7194 //   |
 7195 //  MemBarVolatile
 7196 //
 7197 //  The first MemBarVolatile could get optimized out! According to
 7198 //  Vladimir, this pattern can not occur on Oracle platforms.
 7199 //  However, it does occur on PPC64 (because of membars in
 7200 //  inline_unsafe_load_store).
 7201 //
 7202 // Add this node again if we found a good solution for inline_unsafe_load_store().
 7203 // Don't forget to look at the implementation of post_store_load_barrier again,
 7204 // we did other fixes in that method.
 7205 //instruct unnecessary_membar_volatile() %{
 7206 //  match(MemBarVolatile);
 7207 //  predicate(Matcher::post_store_load_barrier(n));
 7208 //  ins_cost(0);
 7209 //
 7210 //  format %{ " -- \t// redundant MEMBAR-volatile - empty" %}
 7211 //  size(0);
 7212 //  ins_encode( /*empty*/ );
 7213 //  ins_pipe(pipe_class_default);
 7214 //%}
 7215 
 7216 instruct membar_CPUOrder() %{
 7217   match(MemBarCPUOrder);
 7218   ins_cost(0);
 7219 
 7220   format %{ " -- \t// MEMBAR-CPUOrder - empty: PPC64 processors are self-consistent." %}
 7221   size(0);
 7222   ins_encode( /*empty*/ );
 7223   ins_pipe(pipe_class_default);
 7224 %}
 7225 
 7226 //----------Conditional Move---------------------------------------------------
 7227 
 7228 // Cmove using isel.
 7229 instruct cmovI_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
 7230   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
 7231   predicate(VM_Version::has_isel());
 7232   ins_cost(DEFAULT_COST);
 7233 
 7234   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7235   size(4);
 7236   ins_encode %{
 7237     // This is a Power7 instruction for which no machine description
 7238     // exists. Anyways, the scheduler should be off on Power7.
 7239     int cc        = $cmp$$cmpcode;
 7240     __ isel($dst$$Register, $crx$$CondRegister,
 7241             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7242   %}
 7243   ins_pipe(pipe_class_default);
 7244 %}
 7245 
 7246 instruct cmovI_reg(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
 7247   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
 7248   predicate(!VM_Version::has_isel());
 7249   ins_cost(DEFAULT_COST+BRANCH_COST);
 7250 
 7251   ins_variable_size_depending_on_alignment(true);
 7252 
 7253   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7254   // Worst case is branch + move + stop, no stop without scheduler
 7255   size(8);
 7256   ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
 7257   ins_pipe(pipe_class_default);
 7258 %}
 7259 
 7260 instruct cmovI_imm(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, immI16 src) %{
 7261   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
 7262   ins_cost(DEFAULT_COST+BRANCH_COST);
 7263 
 7264   ins_variable_size_depending_on_alignment(true);
 7265 
 7266   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7267   // Worst case is branch + move + stop, no stop without scheduler
 7268   size(8);
 7269   ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
 7270   ins_pipe(pipe_class_default);
 7271 %}
 7272 
 7273 // Cmove using isel.
 7274 instruct cmovL_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
 7275   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
 7276   predicate(VM_Version::has_isel());
 7277   ins_cost(DEFAULT_COST);
 7278 
 7279   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7280   size(4);
 7281   ins_encode %{
 7282     // This is a Power7 instruction for which no machine description
 7283     // exists. Anyways, the scheduler should be off on Power7.
 7284     int cc        = $cmp$$cmpcode;
 7285     __ isel($dst$$Register, $crx$$CondRegister,
 7286             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7287   %}
 7288   ins_pipe(pipe_class_default);
 7289 %}
 7290 
 7291 instruct cmovL_reg(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
 7292   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
 7293   predicate(!VM_Version::has_isel());
 7294   ins_cost(DEFAULT_COST+BRANCH_COST);
 7295 
 7296   ins_variable_size_depending_on_alignment(true);
 7297 
 7298   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7299   // Worst case is branch + move + stop, no stop without scheduler.
 7300   size(8);
 7301   ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
 7302   ins_pipe(pipe_class_default);
 7303 %}
 7304 
 7305 instruct cmovL_imm(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, immL16 src) %{
 7306   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
 7307   ins_cost(DEFAULT_COST+BRANCH_COST);
 7308 
 7309   ins_variable_size_depending_on_alignment(true);
 7310 
 7311   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7312   // Worst case is branch + move + stop, no stop without scheduler.
 7313   size(8);
 7314   ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
 7315   ins_pipe(pipe_class_default);
 7316 %}
 7317 
 7318 // Cmove using isel.
 7319 instruct cmovN_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
 7320   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
 7321   predicate(VM_Version::has_isel());
 7322   ins_cost(DEFAULT_COST);
 7323 
 7324   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7325   size(4);
 7326   ins_encode %{
 7327     // This is a Power7 instruction for which no machine description
 7328     // exists. Anyways, the scheduler should be off on Power7.
 7329     int cc        = $cmp$$cmpcode;
 7330     __ isel($dst$$Register, $crx$$CondRegister,
 7331             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7332   %}
 7333   ins_pipe(pipe_class_default);
 7334 %}
 7335 
 7336 // Conditional move for RegN. Only cmov(reg, reg).
 7337 instruct cmovN_reg(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
 7338   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
 7339   predicate(!VM_Version::has_isel());
 7340   ins_cost(DEFAULT_COST+BRANCH_COST);
 7341 
 7342   ins_variable_size_depending_on_alignment(true);
 7343 
 7344   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7345   // Worst case is branch + move + stop, no stop without scheduler.
 7346   size(8);
 7347   ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
 7348   ins_pipe(pipe_class_default);
 7349 %}
 7350 
 7351 instruct cmovN_imm(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, immN_0 src) %{
 7352   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
 7353   ins_cost(DEFAULT_COST+BRANCH_COST);
 7354 
 7355   ins_variable_size_depending_on_alignment(true);
 7356 
 7357   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7358   // Worst case is branch + move + stop, no stop without scheduler.
 7359   size(8);
 7360   ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
 7361   ins_pipe(pipe_class_default);
 7362 %}
 7363 
 7364 // Cmove using isel.
 7365 instruct cmovP_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegPsrc src) %{
 7366   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
 7367   predicate(VM_Version::has_isel());
 7368   ins_cost(DEFAULT_COST);
 7369 
 7370   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7371   size(4);
 7372   ins_encode %{
 7373     // This is a Power7 instruction for which no machine description
 7374     // exists. Anyways, the scheduler should be off on Power7.
 7375     int cc        = $cmp$$cmpcode;
 7376     __ isel($dst$$Register, $crx$$CondRegister,
 7377             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7378   %}
 7379   ins_pipe(pipe_class_default);
 7380 %}
 7381 
 7382 instruct cmovP_reg(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegP_N2P src) %{
 7383   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
 7384   predicate(!VM_Version::has_isel());
 7385   ins_cost(DEFAULT_COST+BRANCH_COST);
 7386 
 7387   ins_variable_size_depending_on_alignment(true);
 7388 
 7389   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7390   // Worst case is branch + move + stop, no stop without scheduler.
 7391   size(8);
 7392   ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
 7393   ins_pipe(pipe_class_default);
 7394 %}
 7395 
 7396 instruct cmovP_imm(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, immP_0 src) %{
 7397   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
 7398   ins_cost(DEFAULT_COST+BRANCH_COST);
 7399 
 7400   ins_variable_size_depending_on_alignment(true);
 7401 
 7402   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7403   // Worst case is branch + move + stop, no stop without scheduler.
 7404   size(8);
 7405   ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
 7406   ins_pipe(pipe_class_default);
 7407 %}
 7408 
 7409 instruct cmovF_reg(cmpOp cmp, flagsRegSrc crx, regF dst, regF src) %{
 7410   match(Set dst (CMoveF (Binary cmp crx) (Binary dst src)));
 7411   ins_cost(DEFAULT_COST+BRANCH_COST);
 7412 
 7413   ins_variable_size_depending_on_alignment(true);
 7414 
 7415   format %{ "CMOVEF  $cmp, $crx, $dst, $src\n\t" %}
 7416   // Worst case is branch + move + stop, no stop without scheduler.
 7417   size(8);
 7418   ins_encode %{
 7419     Label done;
 7420     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 7421     // Branch if not (cmp crx).
 7422     __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
 7423     __ fmr($dst$$FloatRegister, $src$$FloatRegister);
 7424     __ bind(done);
 7425   %}
 7426   ins_pipe(pipe_class_default);
 7427 %}
 7428 
 7429 instruct cmovD_reg(cmpOp cmp, flagsRegSrc crx, regD dst, regD src) %{
 7430   match(Set dst (CMoveD (Binary cmp crx) (Binary dst src)));
 7431   ins_cost(DEFAULT_COST+BRANCH_COST);
 7432 
 7433   ins_variable_size_depending_on_alignment(true);
 7434 
 7435   format %{ "CMOVEF  $cmp, $crx, $dst, $src\n\t" %}
 7436   // Worst case is branch + move + stop, no stop without scheduler.
 7437   size(8);
 7438   ins_encode %{
 7439     Label done;
 7440     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 7441     // Branch if not (cmp crx).
 7442     __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
 7443     __ fmr($dst$$FloatRegister, $src$$FloatRegister);
 7444     __ bind(done);
 7445   %}
 7446   ins_pipe(pipe_class_default);
 7447 %}
 7448 
 7449 //----------Conditional_store--------------------------------------------------
 7450 // Conditional-store of the updated heap-top.
 7451 // Used during allocation of the shared heap.
 7452 // Sets flags (EQ) on success. Implemented with a CASA on Sparc.
 7453 
 7454 // As compareAndSwapL, but return flag register instead of boolean value in
 7455 // int register.
 7456 // Used by sun/misc/AtomicLongCSImpl.java.
 7457 // Mem_ptr must be a memory operand, else this node does not get
 7458 // Flag_needs_anti_dependence_check set by adlc. If this is not set this node
 7459 // can be rematerialized which leads to errors.
 7460 instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal, flagsRegCR0 cr0) %{
 7461   match(Set crx (StoreLConditional mem_ptr (Binary oldVal newVal)));
 7462   effect(TEMP cr0);
 7463   format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
 7464   ins_encode %{
 7465     __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register,
 7466                 MacroAssembler::MemBarAcq, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7467                 noreg, NULL, true);
 7468   %}
 7469   ins_pipe(pipe_class_default);
 7470 %}
 7471 
 7472 // As compareAndSwapP, but return flag register instead of boolean value in
 7473 // int register.
 7474 // This instruction is matched if UseTLAB is off.
 7475 // Mem_ptr must be a memory operand, else this node does not get
 7476 // Flag_needs_anti_dependence_check set by adlc. If this is not set this node
 7477 // can be rematerialized which leads to errors.
 7478 instruct storePConditional_regP_regP_regP(flagsRegCR0 cr0, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{
 7479   match(Set cr0 (StorePConditional mem_ptr (Binary oldVal newVal)));
 7480   ins_cost(2*MEMORY_REF_COST);
 7481 
 7482   format %{ "STDCX_  if ($cr0 = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
 7483   ins_encode %{
 7484     __ stdcx_($newVal$$Register, $mem_ptr$$Register);
 7485   %}
 7486   ins_pipe(pipe_class_memory);
 7487 %}
 7488 
 7489 // Implement LoadPLocked. Must be ordered against changes of the memory location
 7490 // by storePConditional.
 7491 // Don't know whether this is ever used.
 7492 instruct loadPLocked(iRegPdst dst, memory mem) %{
 7493   match(Set dst (LoadPLocked mem));
 7494   ins_cost(2*MEMORY_REF_COST);
 7495 
 7496   format %{ "LDARX   $dst, $mem \t// loadPLocked\n\t" %}
 7497   size(4);
 7498   ins_encode %{
 7499     __ ldarx($dst$$Register, $mem$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 7500   %}
 7501   ins_pipe(pipe_class_memory);
 7502 %}
 7503 
 7504 //----------Compare-And-Swap---------------------------------------------------
 7505 
 7506 // CompareAndSwap{P,I,L} have more than one output, therefore "CmpI
 7507 // (CompareAndSwap ...)" or "If (CmpI (CompareAndSwap ..))"  cannot be
 7508 // matched.
 7509 
 7510 // Strong versions:
 7511 
 7512 instruct compareAndSwapB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7513   match(Set res (CompareAndSwapB mem_ptr (Binary src1 src2)));
 7514   predicate(VM_Version::has_lqarx());
 7515   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7516   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7517   ins_encode %{
 7518     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7519     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7520                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7521                 $res$$Register, true);
 7522     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7523       __ isync();
 7524     } else {
 7525       __ sync();
 7526     }
 7527   %}
 7528   ins_pipe(pipe_class_default);
 7529 %}
 7530 
 7531 instruct compareAndSwapB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7532   match(Set res (CompareAndSwapB mem_ptr (Binary src1 src2)));
 7533   predicate(!VM_Version::has_lqarx());
 7534   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7535   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7536   ins_encode %{
 7537     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7538     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7539                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7540                 $res$$Register, true);
 7541     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7542       __ isync();
 7543     } else {
 7544       __ sync();
 7545     }
 7546   %}
 7547   ins_pipe(pipe_class_default);
 7548 %}
 7549 
 7550 instruct compareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7551   match(Set res (CompareAndSwapS mem_ptr (Binary src1 src2)));
 7552   predicate(VM_Version::has_lqarx());
 7553   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7554   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7555   ins_encode %{
 7556     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7557     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7558                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7559                 $res$$Register, true);
 7560     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7561       __ isync();
 7562     } else {
 7563       __ sync();
 7564     }
 7565   %}
 7566   ins_pipe(pipe_class_default);
 7567 %}
 7568 
 7569 instruct compareAndSwapS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7570   match(Set res (CompareAndSwapS mem_ptr (Binary src1 src2)));
 7571   predicate(!VM_Version::has_lqarx());
 7572   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7573   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7574   ins_encode %{
 7575     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7576     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7577                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7578                 $res$$Register, true);
 7579     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7580       __ isync();
 7581     } else {
 7582       __ sync();
 7583     }
 7584   %}
 7585   ins_pipe(pipe_class_default);
 7586 %}
 7587 
 7588 instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7589   match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2)));
 7590   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7591   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7592   ins_encode %{
 7593     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7594     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7595                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7596                 $res$$Register, true);
 7597     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7598       __ isync();
 7599     } else {
 7600       __ sync();
 7601     }
 7602   %}
 7603   ins_pipe(pipe_class_default);
 7604 %}
 7605 
 7606 instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7607   match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2)));
 7608   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7609   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7610   ins_encode %{
 7611     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7612     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7613                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7614                 $res$$Register, true);
 7615     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7616       __ isync();
 7617     } else {
 7618       __ sync();
 7619     }
 7620   %}
 7621   ins_pipe(pipe_class_default);
 7622 %}
 7623 
 7624 instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7625   match(Set res (CompareAndSwapL mem_ptr (Binary src1 src2)));
 7626   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7627   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
 7628   ins_encode %{
 7629     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7630     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7631                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7632                 $res$$Register, NULL, true);
 7633     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7634       __ isync();
 7635     } else {
 7636       __ sync();
 7637     }
 7638   %}
 7639   ins_pipe(pipe_class_default);
 7640 %}
 7641 
 7642 instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7643   match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2)));
 7644   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7645   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
 7646   ins_encode %{
 7647     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7648     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7649                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7650                 $res$$Register, NULL, true);
 7651     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7652       __ isync();
 7653     } else {
 7654       __ sync();
 7655     }
 7656   %}
 7657   ins_pipe(pipe_class_default);
 7658 %}
 7659 
 7660 // Weak versions:
 7661 
 7662 instruct weakCompareAndSwapB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7663   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7664   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
 7665   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7666   format %{ "weak CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7667   ins_encode %{
 7668     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7669     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7670                 MacroAssembler::MemBarNone,
 7671                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7672   %}
 7673   ins_pipe(pipe_class_default);
 7674 %}
 7675 
 7676 instruct weakCompareAndSwapB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7677   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7678   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
 7679   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7680   format %{ "weak CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7681   ins_encode %{
 7682     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7683     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7684                 MacroAssembler::MemBarNone,
 7685                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7686   %}
 7687   ins_pipe(pipe_class_default);
 7688 %}
 7689 
 7690 instruct weakCompareAndSwapB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7691   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7692   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
 7693   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7694   format %{ "weak CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7695   ins_encode %{
 7696     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7697     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7698                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7699                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7700   %}
 7701   ins_pipe(pipe_class_default);
 7702 %}
 7703 
 7704 instruct weakCompareAndSwapB4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7705   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7706   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
 7707   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7708   format %{ "weak CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7709   ins_encode %{
 7710     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7711     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7712                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7713                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7714   %}
 7715   ins_pipe(pipe_class_default);
 7716 %}
 7717 
 7718 instruct weakCompareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7719   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7720   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
 7721   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7722   format %{ "weak CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7723   ins_encode %{
 7724     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7725     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7726                 MacroAssembler::MemBarNone,
 7727                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7728   %}
 7729   ins_pipe(pipe_class_default);
 7730 %}
 7731 
 7732 instruct weakCompareAndSwapS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7733   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7734   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
 7735   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7736   format %{ "weak CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7737   ins_encode %{
 7738     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7739     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7740                 MacroAssembler::MemBarNone,
 7741                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7742   %}
 7743   ins_pipe(pipe_class_default);
 7744 %}
 7745 
 7746 instruct weakCompareAndSwapS_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7747   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7748   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
 7749   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7750   format %{ "weak CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7751   ins_encode %{
 7752     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7753     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7754                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7755                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7756   %}
 7757   ins_pipe(pipe_class_default);
 7758 %}
 7759 
 7760 instruct weakCompareAndSwapS4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7761   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7762   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
 7763   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7764   format %{ "weak CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7765   ins_encode %{
 7766     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7767     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7768                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7769                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7770   %}
 7771   ins_pipe(pipe_class_default);
 7772 %}
 7773 
 7774 instruct weakCompareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7775   match(Set res (WeakCompareAndSwapI mem_ptr (Binary src1 src2)));
 7776   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7777   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7778   format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7779   ins_encode %{
 7780     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7781     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7782                 MacroAssembler::MemBarNone,
 7783                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7784   %}
 7785   ins_pipe(pipe_class_default);
 7786 %}
 7787 
 7788 instruct weakCompareAndSwapI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7789   match(Set res (WeakCompareAndSwapI mem_ptr (Binary src1 src2)));
 7790   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7791   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7792   format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7793   ins_encode %{
 7794     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7795     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7796     // value is never passed to caller.
 7797     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7798                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7799                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7800   %}
 7801   ins_pipe(pipe_class_default);
 7802 %}
 7803 
 7804 instruct weakCompareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7805   match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
 7806   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7807   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7808   format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7809   ins_encode %{
 7810     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7811     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7812                 MacroAssembler::MemBarNone,
 7813                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7814   %}
 7815   ins_pipe(pipe_class_default);
 7816 %}
 7817 
 7818 instruct weakCompareAndSwapN_acq_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7819   match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
 7820   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7821   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7822   format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7823   ins_encode %{
 7824     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7825     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7826     // value is never passed to caller.
 7827     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7828                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7829                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7830   %}
 7831   ins_pipe(pipe_class_default);
 7832 %}
 7833 
 7834 instruct weakCompareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7835   match(Set res (WeakCompareAndSwapL mem_ptr (Binary src1 src2)));
 7836   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7837   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7838   format %{ "weak CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
 7839   ins_encode %{
 7840     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7841     // value is never passed to caller.
 7842     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7843                 MacroAssembler::MemBarNone,
 7844                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true);
 7845   %}
 7846   ins_pipe(pipe_class_default);
 7847 %}
 7848 
 7849 instruct weakCompareAndSwapL_acq_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7850   match(Set res (WeakCompareAndSwapL mem_ptr (Binary src1 src2)));
 7851   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7852   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7853   format %{ "weak CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7854   ins_encode %{
 7855     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7856     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7857     // value is never passed to caller.
 7858     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7859                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7860                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true);
 7861   %}
 7862   ins_pipe(pipe_class_default);
 7863 %}
 7864 
 7865 instruct weakCompareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7866   match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2)));
 7867   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7868   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7869   format %{ "weak CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
 7870   ins_encode %{
 7871     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7872     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7873                 MacroAssembler::MemBarNone,
 7874                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true);
 7875   %}
 7876   ins_pipe(pipe_class_default);
 7877 %}
 7878 
 7879 instruct weakCompareAndSwapP_acq_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7880   match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2)));
 7881   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7882   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7883   format %{ "weak CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
 7884   ins_encode %{
 7885     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7886     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7887     // value is never passed to caller.
 7888     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7889                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7890                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true);
 7891   %}
 7892   ins_pipe(pipe_class_default);
 7893 %}
 7894 
 7895 // CompareAndExchange
 7896 
 7897 instruct compareAndExchangeB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7898   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7899   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
 7900   effect(TEMP_DEF res, TEMP cr0);
 7901   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as int" %}
 7902   ins_encode %{
 7903     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7904     __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7905                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7906                 noreg, true);
 7907   %}
 7908   ins_pipe(pipe_class_default);
 7909 %}
 7910 
 7911 instruct compareAndExchangeB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
 7912   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7913   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
 7914   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
 7915   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as int" %}
 7916   ins_encode %{
 7917     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7918     __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
 7919                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7920                 noreg, true);
 7921   %}
 7922   ins_pipe(pipe_class_default);
 7923 %}
 7924 
 7925 instruct compareAndExchangeB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7926   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7927   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
 7928   effect(TEMP_DEF res, TEMP cr0);
 7929   format %{ "CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as int" %}
 7930   ins_encode %{
 7931     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7932     __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7933                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7934                 noreg, true);
 7935     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7936       __ isync();
 7937     } else {
 7938       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 7939       __ sync();
 7940     }
 7941   %}
 7942   ins_pipe(pipe_class_default);
 7943 %}
 7944 
 7945 instruct compareAndExchangeB4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
 7946   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7947   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
 7948   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
 7949   format %{ "CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as int" %}
 7950   ins_encode %{
 7951     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7952     __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
 7953                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7954                 noreg, true);
 7955     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7956       __ isync();
 7957     } else {
 7958       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 7959       __ sync();
 7960     }
 7961   %}
 7962   ins_pipe(pipe_class_default);
 7963 %}
 7964 
 7965 instruct compareAndExchangeS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7966   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 7967   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
 7968   effect(TEMP_DEF res, TEMP cr0);
 7969   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as int" %}
 7970   ins_encode %{
 7971     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7972     __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7973                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7974                 noreg, true);
 7975   %}
 7976   ins_pipe(pipe_class_default);
 7977 %}
 7978 
 7979 instruct compareAndExchangeS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
 7980   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 7981   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
 7982   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
 7983   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as int" %}
 7984   ins_encode %{
 7985     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7986     __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
 7987                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7988                 noreg, true);
 7989   %}
 7990   ins_pipe(pipe_class_default);
 7991 %}
 7992 
 7993 instruct compareAndExchangeS_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7994   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 7995   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
 7996   effect(TEMP_DEF res, TEMP cr0);
 7997   format %{ "CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as int" %}
 7998   ins_encode %{
 7999     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8000     __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 8001                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8002                 noreg, true);
 8003     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8004       __ isync();
 8005     } else {
 8006       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8007       __ sync();
 8008     }
 8009   %}
 8010   ins_pipe(pipe_class_default);
 8011 %}
 8012 
 8013 instruct compareAndExchangeS4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
 8014   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 8015   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
 8016   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
 8017   format %{ "CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as int" %}
 8018   ins_encode %{
 8019     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8020     __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
 8021                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8022                 noreg, true);
 8023     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8024       __ isync();
 8025     } else {
 8026       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8027       __ sync();
 8028     }
 8029   %}
 8030   ins_pipe(pipe_class_default);
 8031 %}
 8032 
 8033 instruct compareAndExchangeI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 8034   match(Set res (CompareAndExchangeI mem_ptr (Binary src1 src2)));
 8035   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 8036   effect(TEMP_DEF res, TEMP cr0);
 8037   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as int" %}
 8038   ins_encode %{
 8039     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8040     __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8041                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8042                 noreg, true);
 8043   %}
 8044   ins_pipe(pipe_class_default);
 8045 %}
 8046 
 8047 instruct compareAndExchangeI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 8048   match(Set res (CompareAndExchangeI mem_ptr (Binary src1 src2)));
 8049   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 8050   effect(TEMP_DEF res, TEMP cr0);
 8051   format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as int" %}
 8052   ins_encode %{
 8053     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8054     __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8055                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8056                 noreg, true);
 8057     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8058       __ isync();
 8059     } else {
 8060       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8061       __ sync();
 8062     }
 8063   %}
 8064   ins_pipe(pipe_class_default);
 8065 %}
 8066 
 8067 instruct compareAndExchangeN_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 8068   match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2)));
 8069   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 8070   effect(TEMP_DEF res, TEMP cr0);
 8071   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as narrow oop" %}
 8072   ins_encode %{
 8073     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8074     __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8075                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8076                 noreg, true);
 8077   %}
 8078   ins_pipe(pipe_class_default);
 8079 %}
 8080 
 8081 instruct compareAndExchangeN_acq_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 8082   match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2)));
 8083   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 8084   effect(TEMP_DEF res, TEMP cr0);
 8085   format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as narrow oop" %}
 8086   ins_encode %{
 8087     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8088     __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8089                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8090                 noreg, true);
 8091     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8092       __ isync();
 8093     } else {
 8094       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8095       __ sync();
 8096     }
 8097   %}
 8098   ins_pipe(pipe_class_default);
 8099 %}
 8100 
 8101 instruct compareAndExchangeL_regP_regL_regL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 8102   match(Set res (CompareAndExchangeL mem_ptr (Binary src1 src2)));
 8103   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 8104   effect(TEMP_DEF res, TEMP cr0);
 8105   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as long" %}
 8106   ins_encode %{
 8107     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8108     __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8109                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8110                 noreg, NULL, true);
 8111   %}
 8112   ins_pipe(pipe_class_default);
 8113 %}
 8114 
 8115 instruct compareAndExchangeL_acq_regP_regL_regL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 8116   match(Set res (CompareAndExchangeL mem_ptr (Binary src1 src2)));
 8117   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 8118   effect(TEMP_DEF res, TEMP cr0);
 8119   format %{ "CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as long" %}
 8120   ins_encode %{
 8121     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8122     __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8123                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8124                 noreg, NULL, true);
 8125     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8126       __ isync();
 8127     } else {
 8128       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8129       __ sync();
 8130     }
 8131   %}
 8132   ins_pipe(pipe_class_default);
 8133 %}
 8134 
 8135 instruct compareAndExchangeP_regP_regP_regP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 8136   match(Set res (CompareAndExchangeP mem_ptr (Binary src1 src2)));
 8137   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 8138   effect(TEMP_DEF res, TEMP cr0);
 8139   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as ptr; ptr" %}
 8140   ins_encode %{
 8141     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8142     __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8143                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8144                 noreg, NULL, true);
 8145   %}
 8146   ins_pipe(pipe_class_default);
 8147 %}
 8148 
 8149 instruct compareAndExchangeP_acq_regP_regP_regP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 8150   match(Set res (CompareAndExchangeP mem_ptr (Binary src1 src2)));
 8151   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 8152   effect(TEMP_DEF res, TEMP cr0);
 8153   format %{ "CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as ptr; ptr" %}
 8154   ins_encode %{
 8155     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8156     __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8157                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8158                 noreg, NULL, true);
 8159     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8160       __ isync();
 8161     } else {
 8162       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8163       __ sync();
 8164     }
 8165   %}
 8166   ins_pipe(pipe_class_default);
 8167 %}
 8168 
 8169 // Special RMW
 8170 
 8171 instruct getAndAddB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8172   match(Set res (GetAndAddB mem_ptr src));
 8173   predicate(VM_Version::has_lqarx());
 8174   effect(TEMP_DEF res, TEMP cr0);
 8175   format %{ "GetAndAddB $res, $mem_ptr, $src" %}
 8176   ins_encode %{
 8177     __ getandaddb($res$$Register, $src$$Register, $mem_ptr$$Register,
 8178                   R0, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 8179     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8180       __ isync();
 8181     } else {
 8182       __ sync();
 8183     }
 8184   %}
 8185   ins_pipe(pipe_class_default);
 8186 %}
 8187 
 8188 instruct getAndAddB4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
 8189   match(Set res (GetAndAddB mem_ptr src));
 8190   predicate(!VM_Version::has_lqarx());
 8191   effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
 8192   format %{ "GetAndAddB $res, $mem_ptr, $src" %}
 8193   ins_encode %{
 8194     __ getandaddb($res$$Register, $src$$Register, $mem_ptr$$Register,
 8195                   R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 8196     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8197       __ isync();
 8198     } else {
 8199       __ sync();
 8200     }
 8201   %}
 8202   ins_pipe(pipe_class_default);
 8203 %}
 8204 
 8205 instruct getAndAddS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8206   match(Set res (GetAndAddS mem_ptr src));
 8207   predicate(VM_Version::has_lqarx());
 8208   effect(TEMP_DEF res, TEMP cr0);
 8209   format %{ "GetAndAddS $res, $mem_ptr, $src" %}
 8210   ins_encode %{
 8211     __ getandaddh($res$$Register, $src$$Register, $mem_ptr$$Register,
 8212                   R0, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 8213     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8214       __ isync();
 8215     } else {
 8216       __ sync();
 8217     }
 8218   %}
 8219   ins_pipe(pipe_class_default);
 8220 %}
 8221 
 8222 instruct getAndAddS4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
 8223   match(Set res (GetAndAddS mem_ptr src));
 8224   predicate(!VM_Version::has_lqarx());
 8225   effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
 8226   format %{ "GetAndAddS $res, $mem_ptr, $src" %}
 8227   ins_encode %{
 8228     __ getandaddh($res$$Register, $src$$Register, $mem_ptr$$Register,
 8229                   R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 8230     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8231       __ isync();
 8232     } else {
 8233       __ sync();
 8234     }
 8235   %}
 8236   ins_pipe(pipe_class_default);
 8237 %}
 8238 
 8239 instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8240   match(Set res (GetAndAddI mem_ptr src));
 8241   effect(TEMP_DEF res, TEMP cr0);
 8242   format %{ "GetAndAddI $res, $mem_ptr, $src" %}
 8243   ins_encode %{
 8244     __ getandaddw($res$$Register, $src$$Register, $mem_ptr$$Register,
 8245                   R0, MacroAssembler::cmpxchgx_hint_atomic_update());
 8246     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8247       __ isync();
 8248     } else {
 8249       __ sync();
 8250     }
 8251   %}
 8252   ins_pipe(pipe_class_default);
 8253 %}
 8254 
 8255 instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
 8256   match(Set res (GetAndAddL mem_ptr src));
 8257   effect(TEMP_DEF res, TEMP cr0);
 8258   format %{ "GetAndAddL $res, $mem_ptr, $src" %}
 8259   ins_encode %{
 8260     __ getandaddd($res$$Register, $src$$Register, $mem_ptr$$Register,
 8261                   R0, MacroAssembler::cmpxchgx_hint_atomic_update());
 8262     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8263       __ isync();
 8264     } else {
 8265       __ sync();
 8266     }
 8267   %}
 8268   ins_pipe(pipe_class_default);
 8269 %}
 8270 
 8271 instruct getAndSetB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8272   match(Set res (GetAndSetB mem_ptr src));
 8273   predicate(VM_Version::has_lqarx());
 8274   effect(TEMP_DEF res, TEMP cr0);
 8275   format %{ "GetAndSetB $res, $mem_ptr, $src" %}
 8276   ins_encode %{
 8277     __ getandsetb($res$$Register, $src$$Register, $mem_ptr$$Register,
 8278                   noreg, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 8279     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8280       __ isync();
 8281     } else {
 8282       __ sync();
 8283     }
 8284   %}
 8285   ins_pipe(pipe_class_default);
 8286 %}
 8287 
 8288 instruct getAndSetB4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
 8289   match(Set res (GetAndSetB mem_ptr src));
 8290   predicate(!VM_Version::has_lqarx());
 8291   effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
 8292   format %{ "GetAndSetB $res, $mem_ptr, $src" %}
 8293   ins_encode %{
 8294     __ getandsetb($res$$Register, $src$$Register, $mem_ptr$$Register,
 8295                   R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 8296     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8297       __ isync();
 8298     } else {
 8299       __ sync();
 8300     }
 8301   %}
 8302   ins_pipe(pipe_class_default);
 8303 %}
 8304 
 8305 instruct getAndSetS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8306   match(Set res (GetAndSetS mem_ptr src));
 8307   predicate(VM_Version::has_lqarx());
 8308   effect(TEMP_DEF res, TEMP cr0);
 8309   format %{ "GetAndSetS $res, $mem_ptr, $src" %}
 8310   ins_encode %{
 8311     __ getandseth($res$$Register, $src$$Register, $mem_ptr$$Register,
 8312                   noreg, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 8313     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8314       __ isync();
 8315     } else {
 8316       __ sync();
 8317     }
 8318   %}
 8319   ins_pipe(pipe_class_default);
 8320 %}
 8321 
 8322 instruct getAndSetS4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
 8323   match(Set res (GetAndSetS mem_ptr src));
 8324   predicate(!VM_Version::has_lqarx());
 8325   effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
 8326   format %{ "GetAndSetS $res, $mem_ptr, $src" %}
 8327   ins_encode %{
 8328     __ getandseth($res$$Register, $src$$Register, $mem_ptr$$Register,
 8329                   R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 8330     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8331       __ isync();
 8332     } else {
 8333       __ sync();
 8334     }
 8335   %}
 8336   ins_pipe(pipe_class_default);
 8337 %}
 8338 
 8339 instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8340   match(Set res (GetAndSetI mem_ptr src));
 8341   effect(TEMP_DEF res, TEMP cr0);
 8342   format %{ "GetAndSetI $res, $mem_ptr, $src" %}
 8343   ins_encode %{
 8344     __ getandsetw($res$$Register, $src$$Register, $mem_ptr$$Register,
 8345                   MacroAssembler::cmpxchgx_hint_atomic_update());
 8346     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8347       __ isync();
 8348     } else {
 8349       __ sync();
 8350     }
 8351   %}
 8352   ins_pipe(pipe_class_default);
 8353 %}
 8354 
 8355 instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
 8356   match(Set res (GetAndSetL mem_ptr src));
 8357   effect(TEMP_DEF res, TEMP cr0);
 8358   format %{ "GetAndSetL $res, $mem_ptr, $src" %}
 8359   ins_encode %{
 8360     __ getandsetd($res$$Register, $src$$Register, $mem_ptr$$Register,
 8361                   MacroAssembler::cmpxchgx_hint_atomic_update());
 8362     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8363       __ isync();
 8364     } else {
 8365       __ sync();
 8366     }
 8367   %}
 8368   ins_pipe(pipe_class_default);
 8369 %}
 8370 
 8371 instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr0) %{
 8372   match(Set res (GetAndSetP mem_ptr src));
 8373   effect(TEMP_DEF res, TEMP cr0);
 8374   format %{ "GetAndSetP $res, $mem_ptr, $src" %}
 8375   ins_encode %{
 8376     __ getandsetd($res$$Register, $src$$Register, $mem_ptr$$Register,
 8377                   MacroAssembler::cmpxchgx_hint_atomic_update());
 8378     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8379       __ isync();
 8380     } else {
 8381       __ sync();
 8382     }
 8383   %}
 8384   ins_pipe(pipe_class_default);
 8385 %}
 8386 
 8387 instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{
 8388   match(Set res (GetAndSetN mem_ptr src));
 8389   effect(TEMP_DEF res, TEMP cr0);
 8390   format %{ "GetAndSetN $res, $mem_ptr, $src" %}
 8391   ins_encode %{
 8392     __ getandsetw($res$$Register, $src$$Register, $mem_ptr$$Register,
 8393                   MacroAssembler::cmpxchgx_hint_atomic_update());
 8394     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8395       __ isync();
 8396     } else {
 8397       __ sync();
 8398     }
 8399   %}
 8400   ins_pipe(pipe_class_default);
 8401 %}
 8402 
 8403 //----------Arithmetic Instructions--------------------------------------------
 8404 // Addition Instructions
 8405 
 8406 // Register Addition
 8407 instruct addI_reg_reg(iRegIdst dst, iRegIsrc_iRegL2Isrc src1, iRegIsrc_iRegL2Isrc src2) %{
 8408   match(Set dst (AddI src1 src2));
 8409   format %{ "ADD     $dst, $src1, $src2" %}
 8410   size(4);
 8411   ins_encode %{
 8412     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8413   %}
 8414   ins_pipe(pipe_class_default);
 8415 %}
 8416 
 8417 // Expand does not work with above instruct. (??)
 8418 instruct addI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8419   // no match-rule
 8420   effect(DEF dst, USE src1, USE src2);
 8421   format %{ "ADD     $dst, $src1, $src2" %}
 8422   size(4);
 8423   ins_encode %{
 8424     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8425   %}
 8426   ins_pipe(pipe_class_default);
 8427 %}
 8428 
 8429 instruct tree_addI_addI_addI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
 8430   match(Set dst (AddI (AddI (AddI src1 src2) src3) src4));
 8431   ins_cost(DEFAULT_COST*3);
 8432 
 8433   expand %{
 8434     // FIXME: we should do this in the ideal world.
 8435     iRegIdst tmp1;
 8436     iRegIdst tmp2;
 8437     addI_reg_reg(tmp1, src1, src2);
 8438     addI_reg_reg_2(tmp2, src3, src4); // Adlc complains about addI_reg_reg.
 8439     addI_reg_reg(dst, tmp1, tmp2);
 8440   %}
 8441 %}
 8442 
 8443 // Immediate Addition
 8444 instruct addI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
 8445   match(Set dst (AddI src1 src2));
 8446   format %{ "ADDI    $dst, $src1, $src2" %}
 8447   size(4);
 8448   ins_encode %{
 8449     __ addi($dst$$Register, $src1$$Register, $src2$$constant);
 8450   %}
 8451   ins_pipe(pipe_class_default);
 8452 %}
 8453 
 8454 // Immediate Addition with 16-bit shifted operand
 8455 instruct addI_reg_immhi16(iRegIdst dst, iRegIsrc src1, immIhi16 src2) %{
 8456   match(Set dst (AddI src1 src2));
 8457   format %{ "ADDIS   $dst, $src1, $src2" %}
 8458   size(4);
 8459   ins_encode %{
 8460     __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
 8461   %}
 8462   ins_pipe(pipe_class_default);
 8463 %}
 8464 
 8465 // Immediate Addition using prefixed addi
 8466 instruct addI_reg_imm32(iRegIdst dst, iRegIsrc src1, immI32 src2) %{
 8467   match(Set dst (AddI src1 src2));
 8468   predicate(PowerArchitecturePPC64 >= 10);
 8469   ins_cost(DEFAULT_COST+1);
 8470   format %{ "PADDI   $dst, $src1, $src2" %}
 8471   size(8);
 8472   ins_encode %{
 8473     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 8474     __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
 8475   %}
 8476   ins_pipe(pipe_class_default);
 8477   ins_alignment(2);
 8478 %}
 8479 
 8480 // Long Addition
 8481 instruct addL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8482   match(Set dst (AddL src1 src2));
 8483   format %{ "ADD     $dst, $src1, $src2 \t// long" %}
 8484   size(4);
 8485   ins_encode %{
 8486     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8487   %}
 8488   ins_pipe(pipe_class_default);
 8489 %}
 8490 
 8491 // Expand does not work with above instruct. (??)
 8492 instruct addL_reg_reg_2(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8493   // no match-rule
 8494   effect(DEF dst, USE src1, USE src2);
 8495   format %{ "ADD     $dst, $src1, $src2 \t// long" %}
 8496   size(4);
 8497   ins_encode %{
 8498     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8499   %}
 8500   ins_pipe(pipe_class_default);
 8501 %}
 8502 
 8503 instruct tree_addL_addL_addL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, iRegLsrc src3, iRegLsrc src4) %{
 8504   match(Set dst (AddL (AddL (AddL src1 src2) src3) src4));
 8505   ins_cost(DEFAULT_COST*3);
 8506 
 8507   expand %{
 8508     // FIXME: we should do this in the ideal world.
 8509     iRegLdst tmp1;
 8510     iRegLdst tmp2;
 8511     addL_reg_reg(tmp1, src1, src2);
 8512     addL_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg.
 8513     addL_reg_reg(dst, tmp1, tmp2);
 8514   %}
 8515 %}
 8516 
 8517 // AddL + ConvL2I.
 8518 instruct addI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8519   match(Set dst (ConvL2I (AddL src1 src2)));
 8520 
 8521   format %{ "ADD     $dst, $src1, $src2 \t// long + l2i" %}
 8522   size(4);
 8523   ins_encode %{
 8524     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8525   %}
 8526   ins_pipe(pipe_class_default);
 8527 %}
 8528 
 8529 // No constant pool entries required.
 8530 instruct addL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
 8531   match(Set dst (AddL src1 src2));
 8532 
 8533   format %{ "ADDI    $dst, $src1, $src2" %}
 8534   size(4);
 8535   ins_encode %{
 8536     __ addi($dst$$Register, $src1$$Register, $src2$$constant);
 8537   %}
 8538   ins_pipe(pipe_class_default);
 8539 %}
 8540 
 8541 // Long Immediate Addition with 16-bit shifted operand.
 8542 // No constant pool entries required.
 8543 instruct addL_reg_immhi16(iRegLdst dst, iRegLsrc src1, immL32hi16 src2) %{
 8544   match(Set dst (AddL src1 src2));
 8545 
 8546   format %{ "ADDIS   $dst, $src1, $src2" %}
 8547   size(4);
 8548   ins_encode %{
 8549     __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
 8550   %}
 8551   ins_pipe(pipe_class_default);
 8552 %}
 8553 
 8554 // Long Immediate Addition using prefixed addi
 8555 // No constant pool entries required.
 8556 instruct addL_reg_imm34(iRegLdst dst, iRegLsrc src1, immL34 src2) %{
 8557   match(Set dst (AddL src1 src2));
 8558   predicate(PowerArchitecturePPC64 >= 10);
 8559   ins_cost(DEFAULT_COST+1);
 8560 
 8561   format %{ "PADDI   $dst, $src1, $src2" %}
 8562   size(8);
 8563   ins_encode %{
 8564     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 8565     __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
 8566   %}
 8567   ins_pipe(pipe_class_default);
 8568   ins_alignment(2);
 8569 %}
 8570 
 8571 // Pointer Register Addition
 8572 instruct addP_reg_reg(iRegPdst dst, iRegP_N2P src1, iRegLsrc src2) %{
 8573   match(Set dst (AddP src1 src2));
 8574   format %{ "ADD     $dst, $src1, $src2" %}
 8575   size(4);
 8576   ins_encode %{
 8577     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8578   %}
 8579   ins_pipe(pipe_class_default);
 8580 %}
 8581 
 8582 // Pointer Immediate Addition
 8583 // No constant pool entries required.
 8584 instruct addP_reg_imm16(iRegPdst dst, iRegP_N2P src1, immL16 src2) %{
 8585   match(Set dst (AddP src1 src2));
 8586 
 8587   format %{ "ADDI    $dst, $src1, $src2" %}
 8588   size(4);
 8589   ins_encode %{
 8590     __ addi($dst$$Register, $src1$$Register, $src2$$constant);
 8591   %}
 8592   ins_pipe(pipe_class_default);
 8593 %}
 8594 
 8595 // Pointer Immediate Addition with 16-bit shifted operand.
 8596 // No constant pool entries required.
 8597 instruct addP_reg_immhi16(iRegPdst dst, iRegP_N2P src1, immL32hi16 src2) %{
 8598   match(Set dst (AddP src1 src2));
 8599 
 8600   format %{ "ADDIS   $dst, $src1, $src2" %}
 8601   size(4);
 8602   ins_encode %{
 8603     __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
 8604   %}
 8605   ins_pipe(pipe_class_default);
 8606 %}
 8607 
 8608 // Pointer Immediate Addition using prefixed addi
 8609 // No constant pool entries required.
 8610 instruct addP_reg_imm34(iRegPdst dst, iRegP_N2P src1, immL34 src2) %{
 8611   match(Set dst (AddP src1 src2));
 8612   predicate(PowerArchitecturePPC64 >= 10);
 8613   ins_cost(DEFAULT_COST+1);
 8614 
 8615   format %{ "PADDI    $dst, $src1, $src2" %}
 8616   size(8);
 8617   ins_encode %{
 8618     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 8619     __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
 8620   %}
 8621   ins_pipe(pipe_class_default);
 8622   ins_alignment(2);
 8623 %}
 8624 
 8625 //---------------------
 8626 // Subtraction Instructions
 8627 
 8628 // Register Subtraction
 8629 instruct subI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8630   match(Set dst (SubI src1 src2));
 8631   format %{ "SUBF    $dst, $src2, $src1" %}
 8632   size(4);
 8633   ins_encode %{
 8634     __ subf($dst$$Register, $src2$$Register, $src1$$Register);
 8635   %}
 8636   ins_pipe(pipe_class_default);
 8637 %}
 8638 
 8639 // Immediate Subtraction
 8640 // Immediate Subtraction: The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal),
 8641 // Don't try to use addi with - $src2$$constant since it can overflow when $src2$$constant == minI16.
 8642 
 8643 // SubI from constant (using subfic).
 8644 instruct subI_imm16_reg(iRegIdst dst, immI16 src1, iRegIsrc src2) %{
 8645   match(Set dst (SubI src1 src2));
 8646   format %{ "SUBI    $dst, $src1, $src2" %}
 8647 
 8648   size(4);
 8649   ins_encode %{
 8650     __ subfic($dst$$Register, $src2$$Register, $src1$$constant);
 8651   %}
 8652   ins_pipe(pipe_class_default);
 8653 %}
 8654 
 8655 // Turn the sign-bit of an integer into a 32-bit mask, 0x0...0 for
 8656 // positive integers and 0xF...F for negative ones.
 8657 instruct signmask32I_regI(iRegIdst dst, iRegIsrc src) %{
 8658   // no match-rule, false predicate
 8659   effect(DEF dst, USE src);
 8660   predicate(false);
 8661 
 8662   format %{ "SRAWI   $dst, $src, #31" %}
 8663   size(4);
 8664   ins_encode %{
 8665     __ srawi($dst$$Register, $src$$Register, 0x1f);
 8666   %}
 8667   ins_pipe(pipe_class_default);
 8668 %}
 8669 
 8670 instruct absI_reg_Ex(iRegIdst dst, iRegIsrc src) %{
 8671   match(Set dst (AbsI src));
 8672   ins_cost(DEFAULT_COST*3);
 8673 
 8674   expand %{
 8675     iRegIdst tmp1;
 8676     iRegIdst tmp2;
 8677     signmask32I_regI(tmp1, src);
 8678     xorI_reg_reg(tmp2, tmp1, src);
 8679     subI_reg_reg(dst, tmp2, tmp1);
 8680   %}
 8681 %}
 8682 
 8683 instruct negI_regI(iRegIdst dst, immI_0 zero, iRegIsrc src2) %{
 8684   match(Set dst (SubI zero src2));
 8685   format %{ "NEG     $dst, $src2" %}
 8686   size(4);
 8687   ins_encode %{
 8688     __ neg($dst$$Register, $src2$$Register);
 8689   %}
 8690   ins_pipe(pipe_class_default);
 8691 %}
 8692 
 8693 // Long subtraction
 8694 instruct subL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8695   match(Set dst (SubL src1 src2));
 8696   format %{ "SUBF    $dst, $src2, $src1 \t// long" %}
 8697   size(4);
 8698   ins_encode %{
 8699     __ subf($dst$$Register, $src2$$Register, $src1$$Register);
 8700   %}
 8701   ins_pipe(pipe_class_default);
 8702 %}
 8703 
 8704 // SubL + convL2I.
 8705 instruct subI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8706   match(Set dst (ConvL2I (SubL src1 src2)));
 8707 
 8708   format %{ "SUBF    $dst, $src2, $src1 \t// long + l2i" %}
 8709   size(4);
 8710   ins_encode %{
 8711     __ subf($dst$$Register, $src2$$Register, $src1$$Register);
 8712   %}
 8713   ins_pipe(pipe_class_default);
 8714 %}
 8715 
 8716 // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
 8717 // positive longs and 0xF...F for negative ones.
 8718 instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{
 8719   // no match-rule, false predicate
 8720   effect(DEF dst, USE src);
 8721   predicate(false);
 8722 
 8723   format %{ "SRADI   $dst, $src, #63" %}
 8724   size(4);
 8725   ins_encode %{
 8726     __ sradi($dst$$Register, $src$$Register, 0x3f);
 8727   %}
 8728   ins_pipe(pipe_class_default);
 8729 %}
 8730 
 8731 // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
 8732 // positive longs and 0xF...F for negative ones.
 8733 instruct signmask64L_regL(iRegLdst dst, iRegLsrc src) %{
 8734   // no match-rule, false predicate
 8735   effect(DEF dst, USE src);
 8736   predicate(false);
 8737 
 8738   format %{ "SRADI   $dst, $src, #63" %}
 8739   size(4);
 8740   ins_encode %{
 8741     __ sradi($dst$$Register, $src$$Register, 0x3f);
 8742   %}
 8743   ins_pipe(pipe_class_default);
 8744 %}
 8745 
 8746 instruct absL_reg_Ex(iRegLdst dst, iRegLsrc src) %{
 8747   match(Set dst (AbsL src));
 8748   ins_cost(DEFAULT_COST*3);
 8749 
 8750   expand %{
 8751     iRegLdst tmp1;
 8752     iRegLdst tmp2;
 8753     signmask64L_regL(tmp1, src);
 8754     xorL_reg_reg(tmp2, tmp1, src);
 8755     subL_reg_reg(dst, tmp2, tmp1);
 8756   %}
 8757 %}
 8758 
 8759 // Long negation
 8760 instruct negL_reg_reg(iRegLdst dst, immL_0 zero, iRegLsrc src2) %{
 8761   match(Set dst (SubL zero src2));
 8762   format %{ "NEG     $dst, $src2 \t// long" %}
 8763   size(4);
 8764   ins_encode %{
 8765     __ neg($dst$$Register, $src2$$Register);
 8766   %}
 8767   ins_pipe(pipe_class_default);
 8768 %}
 8769 
 8770 // NegL + ConvL2I.
 8771 instruct negI_con0_regL(iRegIdst dst, immL_0 zero, iRegLsrc src2) %{
 8772   match(Set dst (ConvL2I (SubL zero src2)));
 8773 
 8774   format %{ "NEG     $dst, $src2 \t// long + l2i" %}
 8775   size(4);
 8776   ins_encode %{
 8777     __ neg($dst$$Register, $src2$$Register);
 8778   %}
 8779   ins_pipe(pipe_class_default);
 8780 %}
 8781 
 8782 // Multiplication Instructions
 8783 // Integer Multiplication
 8784 
 8785 // Register Multiplication
 8786 instruct mulI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8787   match(Set dst (MulI src1 src2));
 8788   ins_cost(DEFAULT_COST);
 8789 
 8790   format %{ "MULLW   $dst, $src1, $src2" %}
 8791   size(4);
 8792   ins_encode %{
 8793     __ mullw($dst$$Register, $src1$$Register, $src2$$Register);
 8794   %}
 8795   ins_pipe(pipe_class_default);
 8796 %}
 8797 
 8798 // Immediate Multiplication
 8799 instruct mulI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
 8800   match(Set dst (MulI src1 src2));
 8801   ins_cost(DEFAULT_COST);
 8802 
 8803   format %{ "MULLI   $dst, $src1, $src2" %}
 8804   size(4);
 8805   ins_encode %{
 8806     __ mulli($dst$$Register, $src1$$Register, $src2$$constant);
 8807   %}
 8808   ins_pipe(pipe_class_default);
 8809 %}
 8810 
 8811 instruct mulL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8812   match(Set dst (MulL src1 src2));
 8813   ins_cost(DEFAULT_COST);
 8814 
 8815   format %{ "MULLD   $dst $src1, $src2 \t// long" %}
 8816   size(4);
 8817   ins_encode %{
 8818     __ mulld($dst$$Register, $src1$$Register, $src2$$Register);
 8819   %}
 8820   ins_pipe(pipe_class_default);
 8821 %}
 8822 
 8823 // Multiply high for optimized long division by constant.
 8824 instruct mulHighL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8825   match(Set dst (MulHiL src1 src2));
 8826   ins_cost(DEFAULT_COST);
 8827 
 8828   format %{ "MULHD   $dst $src1, $src2 \t// long" %}
 8829   size(4);
 8830   ins_encode %{
 8831     __ mulhd($dst$$Register, $src1$$Register, $src2$$Register);
 8832   %}
 8833   ins_pipe(pipe_class_default);
 8834 %}
 8835 
 8836 // Immediate Multiplication
 8837 instruct mulL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
 8838   match(Set dst (MulL src1 src2));
 8839   ins_cost(DEFAULT_COST);
 8840 
 8841   format %{ "MULLI   $dst, $src1, $src2" %}
 8842   size(4);
 8843   ins_encode %{
 8844     __ mulli($dst$$Register, $src1$$Register, $src2$$constant);
 8845   %}
 8846   ins_pipe(pipe_class_default);
 8847 %}
 8848 
 8849 // Integer Division with Immediate -1: Negate.
 8850 instruct divI_reg_immIvalueMinus1(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{
 8851   match(Set dst (DivI src1 src2));
 8852   ins_cost(DEFAULT_COST);
 8853 
 8854   format %{ "NEG     $dst, $src1 \t// /-1" %}
 8855   size(4);
 8856   ins_encode %{
 8857     __ neg($dst$$Register, $src1$$Register);
 8858   %}
 8859   ins_pipe(pipe_class_default);
 8860 %}
 8861 
 8862 // Integer Division with constant, but not -1.
 8863 // We should be able to improve this by checking the type of src2.
 8864 // It might well be that src2 is known to be positive.
 8865 instruct divI_reg_regnotMinus1(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8866   match(Set dst (DivI src1 src2));
 8867   predicate(n->in(2)->find_int_con(-1) != -1); // src2 is a constant, but not -1
 8868   ins_cost(2*DEFAULT_COST);
 8869 
 8870   format %{ "DIVW    $dst, $src1, $src2 \t// /not-1" %}
 8871   size(4);
 8872   ins_encode %{
 8873     __ divw($dst$$Register, $src1$$Register, $src2$$Register);
 8874   %}
 8875   ins_pipe(pipe_class_default);
 8876 %}
 8877 
 8878 instruct cmovI_bne_negI_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src1) %{
 8879   effect(USE_DEF dst, USE src1, USE crx);
 8880   predicate(false);
 8881 
 8882   ins_variable_size_depending_on_alignment(true);
 8883 
 8884   format %{ "CMOVE   $dst, neg($src1), $crx" %}
 8885   // Worst case is branch + move + stop, no stop without scheduler.
 8886   size(8);
 8887   ins_encode %{
 8888     Label done;
 8889     __ bne($crx$$CondRegister, done);
 8890     __ neg($dst$$Register, $src1$$Register);
 8891     __ bind(done);
 8892   %}
 8893   ins_pipe(pipe_class_default);
 8894 %}
 8895 
 8896 // Integer Division with Registers not containing constants.
 8897 instruct divI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8898   match(Set dst (DivI src1 src2));
 8899   ins_cost(10*DEFAULT_COST);
 8900 
 8901   expand %{
 8902     immI16 imm %{ (int)-1 %}
 8903     flagsReg tmp1;
 8904     cmpI_reg_imm16(tmp1, src2, imm);          // check src2 == -1
 8905     divI_reg_regnotMinus1(dst, src1, src2);   // dst = src1 / src2
 8906     cmovI_bne_negI_reg(dst, tmp1, src1);      // cmove dst = neg(src1) if src2 == -1
 8907   %}
 8908 %}
 8909 
 8910 // Long Division with Immediate -1: Negate.
 8911 instruct divL_reg_immLvalueMinus1(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{
 8912   match(Set dst (DivL src1 src2));
 8913   ins_cost(DEFAULT_COST);
 8914 
 8915   format %{ "NEG     $dst, $src1 \t// /-1, long" %}
 8916   size(4);
 8917   ins_encode %{
 8918     __ neg($dst$$Register, $src1$$Register);
 8919   %}
 8920   ins_pipe(pipe_class_default);
 8921 %}
 8922 
 8923 // Long Division with constant, but not -1.
 8924 instruct divL_reg_regnotMinus1(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8925   match(Set dst (DivL src1 src2));
 8926   predicate(n->in(2)->find_long_con(-1L) != -1L); // Src2 is a constant, but not -1.
 8927   ins_cost(2*DEFAULT_COST);
 8928 
 8929   format %{ "DIVD    $dst, $src1, $src2 \t// /not-1, long" %}
 8930   size(4);
 8931   ins_encode %{
 8932     __ divd($dst$$Register, $src1$$Register, $src2$$Register);
 8933   %}
 8934   ins_pipe(pipe_class_default);
 8935 %}
 8936 
 8937 instruct cmovL_bne_negL_reg(iRegLdst dst, flagsRegSrc crx, iRegLsrc src1) %{
 8938   effect(USE_DEF dst, USE src1, USE crx);
 8939   predicate(false);
 8940 
 8941   ins_variable_size_depending_on_alignment(true);
 8942 
 8943   format %{ "CMOVE   $dst, neg($src1), $crx" %}
 8944   // Worst case is branch + move + stop, no stop without scheduler.
 8945   size(8);
 8946   ins_encode %{
 8947     Label done;
 8948     __ bne($crx$$CondRegister, done);
 8949     __ neg($dst$$Register, $src1$$Register);
 8950     __ bind(done);
 8951   %}
 8952   ins_pipe(pipe_class_default);
 8953 %}
 8954 
 8955 // Long Division with Registers not containing constants.
 8956 instruct divL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8957   match(Set dst (DivL src1 src2));
 8958   ins_cost(10*DEFAULT_COST);
 8959 
 8960   expand %{
 8961     immL16 imm %{ (int)-1 %}
 8962     flagsReg tmp1;
 8963     cmpL_reg_imm16(tmp1, src2, imm);          // check src2 == -1
 8964     divL_reg_regnotMinus1(dst, src1, src2);   // dst = src1 / src2
 8965     cmovL_bne_negL_reg(dst, tmp1, src1);      // cmove dst = neg(src1) if src2 == -1
 8966   %}
 8967 %}
 8968 
 8969 // Integer Remainder with registers.
 8970 instruct modI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8971   match(Set dst (ModI src1 src2));
 8972   ins_cost(10*DEFAULT_COST);
 8973 
 8974   expand %{
 8975     immI16 imm %{ (int)-1 %}
 8976     flagsReg tmp1;
 8977     iRegIdst tmp2;
 8978     iRegIdst tmp3;
 8979     cmpI_reg_imm16(tmp1, src2, imm);           // check src2 == -1
 8980     divI_reg_regnotMinus1(tmp2, src1, src2);   // tmp2 = src1 / src2
 8981     cmovI_bne_negI_reg(tmp2, tmp1, src1);      // cmove tmp2 = neg(src1) if src2 == -1
 8982     mulI_reg_reg(tmp3, src2, tmp2);            // tmp3 = src2 * tmp2
 8983     subI_reg_reg(dst, src1, tmp3);             // dst = src1 - tmp3
 8984   %}
 8985 %}
 8986 
 8987 // Long Remainder with registers
 8988 instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8989   match(Set dst (ModL src1 src2));
 8990   ins_cost(10*DEFAULT_COST);
 8991 
 8992   expand %{
 8993     immL16 imm %{ (int)-1 %}
 8994     flagsReg tmp1;
 8995     iRegLdst tmp2;
 8996     iRegLdst tmp3;
 8997     cmpL_reg_imm16(tmp1, src2, imm);             // check src2 == -1
 8998     divL_reg_regnotMinus1(tmp2, src1, src2);     // tmp2 = src1 / src2
 8999     cmovL_bne_negL_reg(tmp2, tmp1, src1);        // cmove tmp2 = neg(src1) if src2 == -1
 9000     mulL_reg_reg(tmp3, src2, tmp2);              // tmp3 = src2 * tmp2
 9001     subL_reg_reg(dst, src1, tmp3);               // dst = src1 - tmp3
 9002   %}
 9003 %}
 9004 
 9005 // Integer Shift Instructions
 9006 
 9007 // Register Shift Left
 9008 
 9009 // Clear all but the lowest #mask bits.
 9010 // Used to normalize shift amounts in registers.
 9011 instruct maskI_reg_imm(iRegIdst dst, iRegIsrc src, uimmI6 mask) %{
 9012   // no match-rule, false predicate
 9013   effect(DEF dst, USE src, USE mask);
 9014   predicate(false);
 9015 
 9016   format %{ "MASK    $dst, $src, $mask \t// clear $mask upper bits" %}
 9017   size(4);
 9018   ins_encode %{
 9019     __ clrldi($dst$$Register, $src$$Register, $mask$$constant);
 9020   %}
 9021   ins_pipe(pipe_class_default);
 9022 %}
 9023 
 9024 instruct lShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9025   // no match-rule, false predicate
 9026   effect(DEF dst, USE src1, USE src2);
 9027   predicate(false);
 9028 
 9029   format %{ "SLW     $dst, $src1, $src2" %}
 9030   size(4);
 9031   ins_encode %{
 9032     __ slw($dst$$Register, $src1$$Register, $src2$$Register);
 9033   %}
 9034   ins_pipe(pipe_class_default);
 9035 %}
 9036 
 9037 instruct lShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9038   match(Set dst (LShiftI src1 src2));
 9039   ins_cost(DEFAULT_COST*2);
 9040   expand %{
 9041     uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
 9042     iRegIdst tmpI;
 9043     maskI_reg_imm(tmpI, src2, mask);
 9044     lShiftI_reg_reg(dst, src1, tmpI);
 9045   %}
 9046 %}
 9047 
 9048 // Register Shift Left Immediate
 9049 instruct lShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
 9050   match(Set dst (LShiftI src1 src2));
 9051 
 9052   format %{ "SLWI    $dst, $src1, ($src2 & 0x1f)" %}
 9053   size(4);
 9054   ins_encode %{
 9055     __ slwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
 9056   %}
 9057   ins_pipe(pipe_class_default);
 9058 %}
 9059 
 9060 // AndI with negpow2-constant + LShiftI
 9061 instruct lShiftI_andI_immInegpow2_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{
 9062   match(Set dst (LShiftI (AndI src1 src2) src3));
 9063   predicate(UseRotateAndMaskInstructionsPPC64);
 9064 
 9065   format %{ "RLWINM  $dst, lShiftI(AndI($src1, $src2), $src3)" %}
 9066   size(4);
 9067   ins_encode %{
 9068     long src3      = $src3$$constant;
 9069     long maskbits  = src3 + log2i_exact(-(juint)$src2$$constant);
 9070     if (maskbits >= 32) {
 9071       __ li($dst$$Register, 0); // addi
 9072     } else {
 9073       __ rlwinm($dst$$Register, $src1$$Register, src3 & 0x1f, 0, (31-maskbits) & 0x1f);
 9074     }
 9075   %}
 9076   ins_pipe(pipe_class_default);
 9077 %}
 9078 
 9079 // RShiftI + AndI with negpow2-constant + LShiftI
 9080 instruct lShiftI_andI_immInegpow2_rShiftI_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{
 9081   match(Set dst (LShiftI (AndI (RShiftI src1 src3) src2) src3));
 9082   predicate(UseRotateAndMaskInstructionsPPC64);
 9083 
 9084   format %{ "RLWINM  $dst, lShiftI(AndI(RShiftI($src1, $src3), $src2), $src3)" %}
 9085   size(4);
 9086   ins_encode %{
 9087     long src3      = $src3$$constant;
 9088     long maskbits  = src3 + log2i_exact(-(juint)$src2$$constant);
 9089     if (maskbits >= 32) {
 9090       __ li($dst$$Register, 0); // addi
 9091     } else {
 9092       __ rlwinm($dst$$Register, $src1$$Register, 0, 0, (31-maskbits) & 0x1f);
 9093     }
 9094   %}
 9095   ins_pipe(pipe_class_default);
 9096 %}
 9097 
 9098 instruct lShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9099   // no match-rule, false predicate
 9100   effect(DEF dst, USE src1, USE src2);
 9101   predicate(false);
 9102 
 9103   format %{ "SLD     $dst, $src1, $src2" %}
 9104   size(4);
 9105   ins_encode %{
 9106     __ sld($dst$$Register, $src1$$Register, $src2$$Register);
 9107   %}
 9108   ins_pipe(pipe_class_default);
 9109 %}
 9110 
 9111 // Register Shift Left
 9112 instruct lShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9113   match(Set dst (LShiftL src1 src2));
 9114   ins_cost(DEFAULT_COST*2);
 9115   expand %{
 9116     uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
 9117     iRegIdst tmpI;
 9118     maskI_reg_imm(tmpI, src2, mask);
 9119     lShiftL_regL_regI(dst, src1, tmpI);
 9120   %}
 9121 %}
 9122 
 9123 // Register Shift Left Immediate
 9124 instruct lshiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
 9125   match(Set dst (LShiftL src1 src2));
 9126   format %{ "SLDI    $dst, $src1, ($src2 & 0x3f)" %}
 9127   size(4);
 9128   ins_encode %{
 9129     __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9130   %}
 9131   ins_pipe(pipe_class_default);
 9132 %}
 9133 
 9134 // If we shift more than 32 bits, we need not convert I2L.
 9135 instruct lShiftL_regI_immGE32(iRegLdst dst, iRegIsrc src1, uimmI6_ge32 src2) %{
 9136   match(Set dst (LShiftL (ConvI2L src1) src2));
 9137   ins_cost(DEFAULT_COST);
 9138 
 9139   size(4);
 9140   format %{ "SLDI    $dst, i2l($src1), $src2" %}
 9141   ins_encode %{
 9142     __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9143   %}
 9144   ins_pipe(pipe_class_default);
 9145 %}
 9146 
 9147 // Shift a postivie int to the left.
 9148 // Clrlsldi clears the upper 32 bits and shifts.
 9149 instruct scaledPositiveI2L_lShiftL_convI2L_reg_imm6(iRegLdst dst, iRegIsrc src1, uimmI6 src2) %{
 9150   match(Set dst (LShiftL (ConvI2L src1) src2));
 9151   predicate(((ConvI2LNode*)(_kids[0]->_leaf))->type()->is_long()->is_positive_int());
 9152 
 9153   format %{ "SLDI    $dst, i2l(positive_int($src1)), $src2" %}
 9154   size(4);
 9155   ins_encode %{
 9156     __ clrlsldi($dst$$Register, $src1$$Register, 0x20, $src2$$constant);
 9157   %}
 9158   ins_pipe(pipe_class_default);
 9159 %}
 9160 
 9161 instruct arShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9162   // no match-rule, false predicate
 9163   effect(DEF dst, USE src1, USE src2);
 9164   predicate(false);
 9165 
 9166   format %{ "SRAW    $dst, $src1, $src2" %}
 9167   size(4);
 9168   ins_encode %{
 9169     __ sraw($dst$$Register, $src1$$Register, $src2$$Register);
 9170   %}
 9171   ins_pipe(pipe_class_default);
 9172 %}
 9173 
 9174 // Register Arithmetic Shift Right
 9175 instruct arShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9176   match(Set dst (RShiftI src1 src2));
 9177   ins_cost(DEFAULT_COST*2);
 9178   expand %{
 9179     uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
 9180     iRegIdst tmpI;
 9181     maskI_reg_imm(tmpI, src2, mask);
 9182     arShiftI_reg_reg(dst, src1, tmpI);
 9183   %}
 9184 %}
 9185 
 9186 // Register Arithmetic Shift Right Immediate
 9187 instruct arShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
 9188   match(Set dst (RShiftI src1 src2));
 9189 
 9190   format %{ "SRAWI   $dst, $src1, ($src2 & 0x1f)" %}
 9191   size(4);
 9192   ins_encode %{
 9193     __ srawi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
 9194   %}
 9195   ins_pipe(pipe_class_default);
 9196 %}
 9197 
 9198 instruct arShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9199   // no match-rule, false predicate
 9200   effect(DEF dst, USE src1, USE src2);
 9201   predicate(false);
 9202 
 9203   format %{ "SRAD    $dst, $src1, $src2" %}
 9204   size(4);
 9205   ins_encode %{
 9206     __ srad($dst$$Register, $src1$$Register, $src2$$Register);
 9207   %}
 9208   ins_pipe(pipe_class_default);
 9209 %}
 9210 
 9211 // Register Shift Right Arithmetic Long
 9212 instruct arShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9213   match(Set dst (RShiftL src1 src2));
 9214   ins_cost(DEFAULT_COST*2);
 9215 
 9216   expand %{
 9217     uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
 9218     iRegIdst tmpI;
 9219     maskI_reg_imm(tmpI, src2, mask);
 9220     arShiftL_regL_regI(dst, src1, tmpI);
 9221   %}
 9222 %}
 9223 
 9224 // Register Shift Right Immediate
 9225 instruct arShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
 9226   match(Set dst (RShiftL src1 src2));
 9227 
 9228   format %{ "SRADI   $dst, $src1, ($src2 & 0x3f)" %}
 9229   size(4);
 9230   ins_encode %{
 9231     __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9232   %}
 9233   ins_pipe(pipe_class_default);
 9234 %}
 9235 
 9236 // RShiftL + ConvL2I
 9237 instruct convL2I_arShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{
 9238   match(Set dst (ConvL2I (RShiftL src1 src2)));
 9239 
 9240   format %{ "SRADI   $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %}
 9241   size(4);
 9242   ins_encode %{
 9243     __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9244   %}
 9245   ins_pipe(pipe_class_default);
 9246 %}
 9247 
 9248 instruct urShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9249   // no match-rule, false predicate
 9250   effect(DEF dst, USE src1, USE src2);
 9251   predicate(false);
 9252 
 9253   format %{ "SRW     $dst, $src1, $src2" %}
 9254   size(4);
 9255   ins_encode %{
 9256     __ srw($dst$$Register, $src1$$Register, $src2$$Register);
 9257   %}
 9258   ins_pipe(pipe_class_default);
 9259 %}
 9260 
 9261 // Register Shift Right
 9262 instruct urShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9263   match(Set dst (URShiftI src1 src2));
 9264   ins_cost(DEFAULT_COST*2);
 9265 
 9266   expand %{
 9267     uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
 9268     iRegIdst tmpI;
 9269     maskI_reg_imm(tmpI, src2, mask);
 9270     urShiftI_reg_reg(dst, src1, tmpI);
 9271   %}
 9272 %}
 9273 
 9274 // Register Shift Right Immediate
 9275 instruct urShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
 9276   match(Set dst (URShiftI src1 src2));
 9277 
 9278   format %{ "SRWI    $dst, $src1, ($src2 & 0x1f)" %}
 9279   size(4);
 9280   ins_encode %{
 9281     __ srwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
 9282   %}
 9283   ins_pipe(pipe_class_default);
 9284 %}
 9285 
 9286 instruct urShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9287   // no match-rule, false predicate
 9288   effect(DEF dst, USE src1, USE src2);
 9289   predicate(false);
 9290 
 9291   format %{ "SRD     $dst, $src1, $src2" %}
 9292   size(4);
 9293   ins_encode %{
 9294     __ srd($dst$$Register, $src1$$Register, $src2$$Register);
 9295   %}
 9296   ins_pipe(pipe_class_default);
 9297 %}
 9298 
 9299 // Register Shift Right
 9300 instruct urShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9301   match(Set dst (URShiftL src1 src2));
 9302   ins_cost(DEFAULT_COST*2);
 9303 
 9304   expand %{
 9305     uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
 9306     iRegIdst tmpI;
 9307     maskI_reg_imm(tmpI, src2, mask);
 9308     urShiftL_regL_regI(dst, src1, tmpI);
 9309   %}
 9310 %}
 9311 
 9312 // Register Shift Right Immediate
 9313 instruct urShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
 9314   match(Set dst (URShiftL src1 src2));
 9315 
 9316   format %{ "SRDI    $dst, $src1, ($src2 & 0x3f)" %}
 9317   size(4);
 9318   ins_encode %{
 9319     __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9320   %}
 9321   ins_pipe(pipe_class_default);
 9322 %}
 9323 
 9324 // URShiftL + ConvL2I.
 9325 instruct convL2I_urShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{
 9326   match(Set dst (ConvL2I (URShiftL src1 src2)));
 9327 
 9328   format %{ "SRDI    $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %}
 9329   size(4);
 9330   ins_encode %{
 9331     __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9332   %}
 9333   ins_pipe(pipe_class_default);
 9334 %}
 9335 
 9336 // Register Shift Right Immediate with a CastP2X
 9337 instruct shrP_convP2X_reg_imm6(iRegLdst dst, iRegP_N2P src1, uimmI6 src2) %{
 9338   match(Set dst (URShiftL (CastP2X src1) src2));
 9339 
 9340   format %{ "SRDI    $dst, $src1, $src2 \t// Cast ptr $src1 to long and shift" %}
 9341   size(4);
 9342   ins_encode %{
 9343     __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9344   %}
 9345   ins_pipe(pipe_class_default);
 9346 %}
 9347 
 9348 // Bitfield Extract: URShiftI + AndI
 9349 instruct andI_urShiftI_regI_immI_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immI src2, immIpow2minus1 src3) %{
 9350   match(Set dst (AndI (URShiftI src1 src2) src3));
 9351 
 9352   format %{ "EXTRDI  $dst, $src1, shift=$src2, mask=$src3 \t// int bitfield extract" %}
 9353   size(4);
 9354   ins_encode %{
 9355     int rshift = ($src2$$constant) & 0x1f;
 9356     int length = log2i_exact((juint)$src3$$constant + 1u);
 9357     if (rshift + length > 32) {
 9358       // if necessary, adjust mask to omit rotated bits.
 9359       length = 32 - rshift;
 9360     }
 9361     __ extrdi($dst$$Register, $src1$$Register, length, 64 - (rshift + length));
 9362   %}
 9363   ins_pipe(pipe_class_default);
 9364 %}
 9365 
 9366 // Bitfield Extract: URShiftL + AndL
 9367 instruct andL_urShiftL_regL_immI_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immI src2, immLpow2minus1 src3) %{
 9368   match(Set dst (AndL (URShiftL src1 src2) src3));
 9369 
 9370   format %{ "EXTRDI  $dst, $src1, shift=$src2, mask=$src3 \t// long bitfield extract" %}
 9371   size(4);
 9372   ins_encode %{
 9373     int rshift  = ($src2$$constant) & 0x3f;
 9374     int length = log2i_exact((julong)$src3$$constant + 1ull);
 9375     if (rshift + length > 64) {
 9376       // if necessary, adjust mask to omit rotated bits.
 9377       length = 64 - rshift;
 9378     }
 9379     __ extrdi($dst$$Register, $src1$$Register, length, 64 - (rshift + length));
 9380   %}
 9381   ins_pipe(pipe_class_default);
 9382 %}
 9383 
 9384 instruct sxtI_reg(iRegIdst dst, iRegIsrc src) %{
 9385   match(Set dst (ConvL2I (ConvI2L src)));
 9386 
 9387   format %{ "EXTSW   $dst, $src \t// int->int" %}
 9388   size(4);
 9389   ins_encode %{
 9390     __ extsw($dst$$Register, $src$$Register);
 9391   %}
 9392   ins_pipe(pipe_class_default);
 9393 %}
 9394 
 9395 //----------Rotate Instructions------------------------------------------------
 9396 
 9397 // Rotate Left by 8-bit immediate
 9398 instruct rotlI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 lshift, immI8 rshift) %{
 9399   match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift)));
 9400   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 9401 
 9402   format %{ "ROTLWI  $dst, $src, $lshift" %}
 9403   size(4);
 9404   ins_encode %{
 9405     __ rotlwi($dst$$Register, $src$$Register, $lshift$$constant);
 9406   %}
 9407   ins_pipe(pipe_class_default);
 9408 %}
 9409 
 9410 // Rotate Right by 8-bit immediate
 9411 instruct rotrI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 rshift, immI8 lshift) %{
 9412   match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
 9413   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 9414 
 9415   format %{ "ROTRWI  $dst, $rshift" %}
 9416   size(4);
 9417   ins_encode %{
 9418     __ rotrwi($dst$$Register, $src$$Register, $rshift$$constant);
 9419   %}
 9420   ins_pipe(pipe_class_default);
 9421 %}
 9422 
 9423 //----------Floating Point Arithmetic Instructions-----------------------------
 9424 
 9425 // Add float single precision
 9426 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
 9427   match(Set dst (AddF src1 src2));
 9428 
 9429   format %{ "FADDS   $dst, $src1, $src2" %}
 9430   size(4);
 9431   ins_encode %{
 9432     __ fadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9433   %}
 9434   ins_pipe(pipe_class_default);
 9435 %}
 9436 
 9437 // Add float double precision
 9438 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
 9439   match(Set dst (AddD src1 src2));
 9440 
 9441   format %{ "FADD    $dst, $src1, $src2" %}
 9442   size(4);
 9443   ins_encode %{
 9444     __ fadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9445   %}
 9446   ins_pipe(pipe_class_default);
 9447 %}
 9448 
 9449 // Sub float single precision
 9450 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
 9451   match(Set dst (SubF src1 src2));
 9452 
 9453   format %{ "FSUBS   $dst, $src1, $src2" %}
 9454   size(4);
 9455   ins_encode %{
 9456     __ fsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9457   %}
 9458   ins_pipe(pipe_class_default);
 9459 %}
 9460 
 9461 // Sub float double precision
 9462 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
 9463   match(Set dst (SubD src1 src2));
 9464   format %{ "FSUB    $dst, $src1, $src2" %}
 9465   size(4);
 9466   ins_encode %{
 9467     __ fsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9468   %}
 9469   ins_pipe(pipe_class_default);
 9470 %}
 9471 
 9472 // Mul float single precision
 9473 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
 9474   match(Set dst (MulF src1 src2));
 9475   format %{ "FMULS   $dst, $src1, $src2" %}
 9476   size(4);
 9477   ins_encode %{
 9478     __ fmuls($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9479   %}
 9480   ins_pipe(pipe_class_default);
 9481 %}
 9482 
 9483 // Mul float double precision
 9484 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
 9485   match(Set dst (MulD src1 src2));
 9486   format %{ "FMUL    $dst, $src1, $src2" %}
 9487   size(4);
 9488   ins_encode %{
 9489     __ fmul($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9490   %}
 9491   ins_pipe(pipe_class_default);
 9492 %}
 9493 
 9494 // Div float single precision
 9495 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
 9496   match(Set dst (DivF src1 src2));
 9497   format %{ "FDIVS   $dst, $src1, $src2" %}
 9498   size(4);
 9499   ins_encode %{
 9500     __ fdivs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9501   %}
 9502   ins_pipe(pipe_class_default);
 9503 %}
 9504 
 9505 // Div float double precision
 9506 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
 9507   match(Set dst (DivD src1 src2));
 9508   format %{ "FDIV    $dst, $src1, $src2" %}
 9509   size(4);
 9510   ins_encode %{
 9511     __ fdiv($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9512   %}
 9513   ins_pipe(pipe_class_default);
 9514 %}
 9515 
 9516 // Absolute float single precision
 9517 instruct absF_reg(regF dst, regF src) %{
 9518   match(Set dst (AbsF src));
 9519   format %{ "FABS    $dst, $src \t// float" %}
 9520   size(4);
 9521   ins_encode %{
 9522     __ fabs($dst$$FloatRegister, $src$$FloatRegister);
 9523   %}
 9524   ins_pipe(pipe_class_default);
 9525 %}
 9526 
 9527 // Absolute float double precision
 9528 instruct absD_reg(regD dst, regD src) %{
 9529   match(Set dst (AbsD src));
 9530   format %{ "FABS    $dst, $src \t// double" %}
 9531   size(4);
 9532   ins_encode %{
 9533     __ fabs($dst$$FloatRegister, $src$$FloatRegister);
 9534   %}
 9535   ins_pipe(pipe_class_default);
 9536 %}
 9537 
 9538 instruct negF_reg(regF dst, regF src) %{
 9539   match(Set dst (NegF src));
 9540   format %{ "FNEG    $dst, $src \t// float" %}
 9541   size(4);
 9542   ins_encode %{
 9543     __ fneg($dst$$FloatRegister, $src$$FloatRegister);
 9544   %}
 9545   ins_pipe(pipe_class_default);
 9546 %}
 9547 
 9548 instruct negD_reg(regD dst, regD src) %{
 9549   match(Set dst (NegD src));
 9550   format %{ "FNEG    $dst, $src \t// double" %}
 9551   size(4);
 9552   ins_encode %{
 9553     __ fneg($dst$$FloatRegister, $src$$FloatRegister);
 9554   %}
 9555   ins_pipe(pipe_class_default);
 9556 %}
 9557 
 9558 // AbsF + NegF.
 9559 instruct negF_absF_reg(regF dst, regF src) %{
 9560   match(Set dst (NegF (AbsF src)));
 9561   format %{ "FNABS   $dst, $src \t// float" %}
 9562   size(4);
 9563   ins_encode %{
 9564     __ fnabs($dst$$FloatRegister, $src$$FloatRegister);
 9565   %}
 9566   ins_pipe(pipe_class_default);
 9567 %}
 9568 
 9569 // AbsD + NegD.
 9570 instruct negD_absD_reg(regD dst, regD src) %{
 9571   match(Set dst (NegD (AbsD src)));
 9572   format %{ "FNABS   $dst, $src \t// double" %}
 9573   size(4);
 9574   ins_encode %{
 9575     __ fnabs($dst$$FloatRegister, $src$$FloatRegister);
 9576   %}
 9577   ins_pipe(pipe_class_default);
 9578 %}
 9579 
 9580 // VM_Version::has_fsqrt() decides if this node will be used.
 9581 // Sqrt float double precision
 9582 instruct sqrtD_reg(regD dst, regD src) %{
 9583   match(Set dst (SqrtD src));
 9584   format %{ "FSQRT   $dst, $src" %}
 9585   size(4);
 9586   ins_encode %{
 9587     __ fsqrt($dst$$FloatRegister, $src$$FloatRegister);
 9588   %}
 9589   ins_pipe(pipe_class_default);
 9590 %}
 9591 
 9592 // Single-precision sqrt.
 9593 instruct sqrtF_reg(regF dst, regF src) %{
 9594   match(Set dst (SqrtF src));
 9595   predicate(VM_Version::has_fsqrts());
 9596   ins_cost(DEFAULT_COST);
 9597 
 9598   format %{ "FSQRTS  $dst, $src" %}
 9599   size(4);
 9600   ins_encode %{
 9601     __ fsqrts($dst$$FloatRegister, $src$$FloatRegister);
 9602   %}
 9603   ins_pipe(pipe_class_default);
 9604 %}
 9605 
 9606 instruct roundDouble_nop(regD dst) %{
 9607   match(Set dst (RoundDouble dst));
 9608   ins_cost(0);
 9609 
 9610   format %{ " -- \t// RoundDouble not needed - empty" %}
 9611   size(0);
 9612   // PPC results are already "rounded" (i.e., normal-format IEEE).
 9613   ins_encode( /*empty*/ );
 9614   ins_pipe(pipe_class_default);
 9615 %}
 9616 
 9617 instruct roundFloat_nop(regF dst) %{
 9618   match(Set dst (RoundFloat dst));
 9619   ins_cost(0);
 9620 
 9621   format %{ " -- \t// RoundFloat not needed - empty" %}
 9622   size(0);
 9623   // PPC results are already "rounded" (i.e., normal-format IEEE).
 9624   ins_encode( /*empty*/ );
 9625   ins_pipe(pipe_class_default);
 9626 %}
 9627 
 9628 
 9629 // Multiply-Accumulate
 9630 // src1 * src2 + src3
 9631 instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9632   match(Set dst (FmaF src3 (Binary src1 src2)));
 9633 
 9634   format %{ "FMADDS  $dst, $src1, $src2, $src3" %}
 9635   size(4);
 9636   ins_encode %{
 9637     __ fmadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9638   %}
 9639   ins_pipe(pipe_class_default);
 9640 %}
 9641 
 9642 // src1 * src2 + src3
 9643 instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9644   match(Set dst (FmaD src3 (Binary src1 src2)));
 9645 
 9646   format %{ "FMADD   $dst, $src1, $src2, $src3" %}
 9647   size(4);
 9648   ins_encode %{
 9649     __ fmadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9650   %}
 9651   ins_pipe(pipe_class_default);
 9652 %}
 9653 
 9654 // -src1 * src2 + src3 = -(src1*src2-src3)
 9655 instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9656   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
 9657   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
 9658 
 9659   format %{ "FNMSUBS $dst, $src1, $src2, $src3" %}
 9660   size(4);
 9661   ins_encode %{
 9662     __ fnmsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9663   %}
 9664   ins_pipe(pipe_class_default);
 9665 %}
 9666 
 9667 // -src1 * src2 + src3 = -(src1*src2-src3)
 9668 instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9669   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
 9670   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
 9671 
 9672   format %{ "FNMSUB  $dst, $src1, $src2, $src3" %}
 9673   size(4);
 9674   ins_encode %{
 9675     __ fnmsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9676   %}
 9677   ins_pipe(pipe_class_default);
 9678 %}
 9679 
 9680 // -src1 * src2 - src3 = -(src1*src2+src3)
 9681 instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9682   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
 9683   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
 9684 
 9685   format %{ "FNMADDS $dst, $src1, $src2, $src3" %}
 9686   size(4);
 9687   ins_encode %{
 9688     __ fnmadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9689   %}
 9690   ins_pipe(pipe_class_default);
 9691 %}
 9692 
 9693 // -src1 * src2 - src3 = -(src1*src2+src3)
 9694 instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9695   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
 9696   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
 9697 
 9698   format %{ "FNMADD  $dst, $src1, $src2, $src3" %}
 9699   size(4);
 9700   ins_encode %{
 9701     __ fnmadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9702   %}
 9703   ins_pipe(pipe_class_default);
 9704 %}
 9705 
 9706 // src1 * src2 - src3
 9707 instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9708   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
 9709 
 9710   format %{ "FMSUBS  $dst, $src1, $src2, $src3" %}
 9711   size(4);
 9712   ins_encode %{
 9713     __ fmsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9714   %}
 9715   ins_pipe(pipe_class_default);
 9716 %}
 9717 
 9718 // src1 * src2 - src3
 9719 instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9720   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
 9721 
 9722   format %{ "FMSUB   $dst, $src1, $src2, $src3" %}
 9723   size(4);
 9724   ins_encode %{
 9725     __ fmsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9726   %}
 9727   ins_pipe(pipe_class_default);
 9728 %}
 9729 
 9730 
 9731 //----------Logical Instructions-----------------------------------------------
 9732 
 9733 // And Instructions
 9734 
 9735 // Register And
 9736 instruct andI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9737   match(Set dst (AndI src1 src2));
 9738   format %{ "AND     $dst, $src1, $src2" %}
 9739   size(4);
 9740   ins_encode %{
 9741     __ andr($dst$$Register, $src1$$Register, $src2$$Register);
 9742   %}
 9743   ins_pipe(pipe_class_default);
 9744 %}
 9745 
 9746 // Left shifted Immediate And
 9747 instruct andI_reg_immIhi16(iRegIdst dst, iRegIsrc src1, immIhi16  src2, flagsRegCR0 cr0) %{
 9748   match(Set dst (AndI src1 src2));
 9749   effect(KILL cr0);
 9750   format %{ "ANDIS   $dst, $src1, $src2.hi" %}
 9751   size(4);
 9752   ins_encode %{
 9753     __ andis_($dst$$Register, $src1$$Register, (int)((unsigned short)(($src2$$constant & 0xFFFF0000) >> 16)));
 9754   %}
 9755   ins_pipe(pipe_class_default);
 9756 %}
 9757 
 9758 // Immediate And
 9759 instruct andI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2, flagsRegCR0 cr0) %{
 9760   match(Set dst (AndI src1 src2));
 9761   effect(KILL cr0);
 9762 
 9763   format %{ "ANDI    $dst, $src1, $src2" %}
 9764   size(4);
 9765   ins_encode %{
 9766     // FIXME: avoid andi_ ?
 9767     __ andi_($dst$$Register, $src1$$Register, $src2$$constant);
 9768   %}
 9769   ins_pipe(pipe_class_default);
 9770 %}
 9771 
 9772 // Immediate And where the immediate is a negative power of 2.
 9773 instruct andI_reg_immInegpow2(iRegIdst dst, iRegIsrc src1, immInegpow2 src2) %{
 9774   match(Set dst (AndI src1 src2));
 9775   format %{ "ANDWI   $dst, $src1, $src2" %}
 9776   size(4);
 9777   ins_encode %{
 9778     __ clrrdi($dst$$Register, $src1$$Register, log2i_exact(-(juint)$src2$$constant));
 9779   %}
 9780   ins_pipe(pipe_class_default);
 9781 %}
 9782 
 9783 instruct andI_reg_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immIpow2minus1 src2) %{
 9784   match(Set dst (AndI src1 src2));
 9785   format %{ "ANDWI   $dst, $src1, $src2" %}
 9786   size(4);
 9787   ins_encode %{
 9788     __ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((juint)$src2$$constant + 1u));
 9789   %}
 9790   ins_pipe(pipe_class_default);
 9791 %}
 9792 
 9793 instruct andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src1, immIpowerOf2 src2) %{
 9794   match(Set dst (AndI src1 src2));
 9795   predicate(UseRotateAndMaskInstructionsPPC64);
 9796   format %{ "ANDWI   $dst, $src1, $src2" %}
 9797   size(4);
 9798   ins_encode %{
 9799     int bitpos = 31 - log2i_exact((juint)$src2$$constant);
 9800     __ rlwinm($dst$$Register, $src1$$Register, 0, bitpos, bitpos);
 9801   %}
 9802   ins_pipe(pipe_class_default);
 9803 %}
 9804 
 9805 // Register And Long
 9806 instruct andL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9807   match(Set dst (AndL src1 src2));
 9808   ins_cost(DEFAULT_COST);
 9809 
 9810   format %{ "AND     $dst, $src1, $src2 \t// long" %}
 9811   size(4);
 9812   ins_encode %{
 9813     __ andr($dst$$Register, $src1$$Register, $src2$$Register);
 9814   %}
 9815   ins_pipe(pipe_class_default);
 9816 %}
 9817 
 9818 // Immediate And long
 9819 instruct andL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2, flagsRegCR0 cr0) %{
 9820   match(Set dst (AndL src1 src2));
 9821   effect(KILL cr0);
 9822 
 9823   format %{ "ANDI    $dst, $src1, $src2 \t// long" %}
 9824   size(4);
 9825   ins_encode %{
 9826     // FIXME: avoid andi_ ?
 9827     __ andi_($dst$$Register, $src1$$Register, $src2$$constant);
 9828   %}
 9829   ins_pipe(pipe_class_default);
 9830 %}
 9831 
 9832 // Immediate And Long where the immediate is a negative power of 2.
 9833 instruct andL_reg_immLnegpow2(iRegLdst dst, iRegLsrc src1, immLnegpow2 src2) %{
 9834   match(Set dst (AndL src1 src2));
 9835   format %{ "ANDDI   $dst, $src1, $src2" %}
 9836   size(4);
 9837   ins_encode %{
 9838     __ clrrdi($dst$$Register, $src1$$Register, log2i_exact(-(julong)$src2$$constant));
 9839   %}
 9840   ins_pipe(pipe_class_default);
 9841 %}
 9842 
 9843 instruct andL_reg_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immLpow2minus1 src2) %{
 9844   match(Set dst (AndL src1 src2));
 9845   format %{ "ANDDI   $dst, $src1, $src2" %}
 9846   size(4);
 9847   ins_encode %{
 9848     __ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((julong)$src2$$constant + 1ull));
 9849   %}
 9850   ins_pipe(pipe_class_default);
 9851 %}
 9852 
 9853 // AndL + ConvL2I.
 9854 instruct convL2I_andL_reg_immLpow2minus1(iRegIdst dst, iRegLsrc src1, immLpow2minus1 src2) %{
 9855   match(Set dst (ConvL2I (AndL src1 src2)));
 9856   ins_cost(DEFAULT_COST);
 9857 
 9858   format %{ "ANDDI   $dst, $src1, $src2 \t// long + l2i" %}
 9859   size(4);
 9860   ins_encode %{
 9861     __ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((julong)$src2$$constant + 1ull));
 9862   %}
 9863   ins_pipe(pipe_class_default);
 9864 %}
 9865 
 9866 // Or Instructions
 9867 
 9868 // Register Or
 9869 instruct orI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9870   match(Set dst (OrI src1 src2));
 9871   format %{ "OR      $dst, $src1, $src2" %}
 9872   size(4);
 9873   ins_encode %{
 9874     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9875   %}
 9876   ins_pipe(pipe_class_default);
 9877 %}
 9878 
 9879 // Expand does not work with above instruct. (??)
 9880 instruct orI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9881   // no match-rule
 9882   effect(DEF dst, USE src1, USE src2);
 9883   format %{ "OR      $dst, $src1, $src2" %}
 9884   size(4);
 9885   ins_encode %{
 9886     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9887   %}
 9888   ins_pipe(pipe_class_default);
 9889 %}
 9890 
 9891 instruct tree_orI_orI_orI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
 9892   match(Set dst (OrI (OrI (OrI src1 src2) src3) src4));
 9893   ins_cost(DEFAULT_COST*3);
 9894 
 9895   expand %{
 9896     // FIXME: we should do this in the ideal world.
 9897     iRegIdst tmp1;
 9898     iRegIdst tmp2;
 9899     orI_reg_reg(tmp1, src1, src2);
 9900     orI_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg.
 9901     orI_reg_reg(dst, tmp1, tmp2);
 9902   %}
 9903 %}
 9904 
 9905 // Immediate Or
 9906 instruct orI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{
 9907   match(Set dst (OrI src1 src2));
 9908   format %{ "ORI     $dst, $src1, $src2" %}
 9909   size(4);
 9910   ins_encode %{
 9911     __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
 9912   %}
 9913   ins_pipe(pipe_class_default);
 9914 %}
 9915 
 9916 // Register Or Long
 9917 instruct orL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9918   match(Set dst (OrL src1 src2));
 9919   ins_cost(DEFAULT_COST);
 9920 
 9921   size(4);
 9922   format %{ "OR      $dst, $src1, $src2 \t// long" %}
 9923   ins_encode %{
 9924     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9925   %}
 9926   ins_pipe(pipe_class_default);
 9927 %}
 9928 
 9929 // OrL + ConvL2I.
 9930 instruct orI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9931   match(Set dst (ConvL2I (OrL src1 src2)));
 9932   ins_cost(DEFAULT_COST);
 9933 
 9934   format %{ "OR      $dst, $src1, $src2 \t// long + l2i" %}
 9935   size(4);
 9936   ins_encode %{
 9937     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9938   %}
 9939   ins_pipe(pipe_class_default);
 9940 %}
 9941 
 9942 // Immediate Or long
 9943 instruct orL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 con) %{
 9944   match(Set dst (OrL src1 con));
 9945   ins_cost(DEFAULT_COST);
 9946 
 9947   format %{ "ORI     $dst, $src1, $con \t// long" %}
 9948   size(4);
 9949   ins_encode %{
 9950     __ ori($dst$$Register, $src1$$Register, ($con$$constant) & 0xFFFF);
 9951   %}
 9952   ins_pipe(pipe_class_default);
 9953 %}
 9954 
 9955 // Xor Instructions
 9956 
 9957 // Register Xor
 9958 instruct xorI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9959   match(Set dst (XorI src1 src2));
 9960   format %{ "XOR     $dst, $src1, $src2" %}
 9961   size(4);
 9962   ins_encode %{
 9963     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
 9964   %}
 9965   ins_pipe(pipe_class_default);
 9966 %}
 9967 
 9968 // Expand does not work with above instruct. (??)
 9969 instruct xorI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9970   // no match-rule
 9971   effect(DEF dst, USE src1, USE src2);
 9972   format %{ "XOR     $dst, $src1, $src2" %}
 9973   size(4);
 9974   ins_encode %{
 9975     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
 9976   %}
 9977   ins_pipe(pipe_class_default);
 9978 %}
 9979 
 9980 instruct tree_xorI_xorI_xorI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
 9981   match(Set dst (XorI (XorI (XorI src1 src2) src3) src4));
 9982   ins_cost(DEFAULT_COST*3);
 9983 
 9984   expand %{
 9985     // FIXME: we should do this in the ideal world.
 9986     iRegIdst tmp1;
 9987     iRegIdst tmp2;
 9988     xorI_reg_reg(tmp1, src1, src2);
 9989     xorI_reg_reg_2(tmp2, src3, src4); // Adlc complains about xorI_reg_reg.
 9990     xorI_reg_reg(dst, tmp1, tmp2);
 9991   %}
 9992 %}
 9993 
 9994 // Immediate Xor
 9995 instruct xorI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{
 9996   match(Set dst (XorI src1 src2));
 9997   format %{ "XORI    $dst, $src1, $src2" %}
 9998   size(4);
 9999   ins_encode %{
10000     __ xori($dst$$Register, $src1$$Register, $src2$$constant);
10001   %}
10002   ins_pipe(pipe_class_default);
10003 %}
10004 
10005 // Register Xor Long
10006 instruct xorL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
10007   match(Set dst (XorL src1 src2));
10008   ins_cost(DEFAULT_COST);
10009 
10010   format %{ "XOR     $dst, $src1, $src2 \t// long" %}
10011   size(4);
10012   ins_encode %{
10013     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
10014   %}
10015   ins_pipe(pipe_class_default);
10016 %}
10017 
10018 // XorL + ConvL2I.
10019 instruct xorI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
10020   match(Set dst (ConvL2I (XorL src1 src2)));
10021   ins_cost(DEFAULT_COST);
10022 
10023   format %{ "XOR     $dst, $src1, $src2 \t// long + l2i" %}
10024   size(4);
10025   ins_encode %{
10026     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
10027   %}
10028   ins_pipe(pipe_class_default);
10029 %}
10030 
10031 // Immediate Xor Long
10032 instruct xorL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2) %{
10033   match(Set dst (XorL src1 src2));
10034   ins_cost(DEFAULT_COST);
10035 
10036   format %{ "XORI    $dst, $src1, $src2 \t// long" %}
10037   size(4);
10038   ins_encode %{
10039     __ xori($dst$$Register, $src1$$Register, $src2$$constant);
10040   %}
10041   ins_pipe(pipe_class_default);
10042 %}
10043 
10044 instruct notI_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{
10045   match(Set dst (XorI src1 src2));
10046   ins_cost(DEFAULT_COST);
10047 
10048   format %{ "NOT     $dst, $src1 ($src2)" %}
10049   size(4);
10050   ins_encode %{
10051     __ nor($dst$$Register, $src1$$Register, $src1$$Register);
10052   %}
10053   ins_pipe(pipe_class_default);
10054 %}
10055 
10056 instruct notL_reg(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{
10057   match(Set dst (XorL src1 src2));
10058   ins_cost(DEFAULT_COST);
10059 
10060   format %{ "NOT     $dst, $src1 ($src2) \t// long" %}
10061   size(4);
10062   ins_encode %{
10063     __ nor($dst$$Register, $src1$$Register, $src1$$Register);
10064   %}
10065   ins_pipe(pipe_class_default);
10066 %}
10067 
10068 // And-complement
10069 instruct andcI_reg_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2, iRegIsrc src3) %{
10070   match(Set dst (AndI (XorI src1 src2) src3));
10071   ins_cost(DEFAULT_COST);
10072 
10073   format %{ "ANDW    $dst, xori($src1, $src2), $src3" %}
10074   size(4);
10075   ins_encode( enc_andc(dst, src3, src1) );
10076   ins_pipe(pipe_class_default);
10077 %}
10078 
10079 // And-complement
10080 instruct andcL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
10081   // no match-rule, false predicate
10082   effect(DEF dst, USE src1, USE src2);
10083   predicate(false);
10084 
10085   format %{ "ANDC    $dst, $src1, $src2" %}
10086   size(4);
10087   ins_encode %{
10088     __ andc($dst$$Register, $src1$$Register, $src2$$Register);
10089   %}
10090   ins_pipe(pipe_class_default);
10091 %}
10092 
10093 //----------Moves between int/long and float/double----------------------------
10094 //
10095 // The following rules move values from int/long registers/stack-locations
10096 // to float/double registers/stack-locations and vice versa, without doing any
10097 // conversions. These rules are used to implement the bit-conversion methods
10098 // of java.lang.Float etc., e.g.
10099 //   int   floatToIntBits(float value)
10100 //   float intBitsToFloat(int bits)
10101 //
10102 // Notes on the implementation on ppc64:
10103 // For Power7 and earlier, the rules are limited to those which move between a
10104 // register and a stack-location, because we always have to go through memory
10105 // when moving between a float register and an integer register.
10106 // This restriction is removed in Power8 with the introduction of the mtfprd
10107 // and mffprd instructions.
10108 
10109 instruct moveL2D_reg(regD dst, iRegLsrc src) %{
10110   match(Set dst (MoveL2D src));
10111   predicate(VM_Version::has_mtfprd());
10112 
10113   format %{ "MTFPRD  $dst, $src" %}
10114   size(4);
10115   ins_encode %{
10116     __ mtfprd($dst$$FloatRegister, $src$$Register);
10117   %}
10118   ins_pipe(pipe_class_default);
10119 %}
10120 
10121 instruct moveI2D_reg(regD dst, iRegIsrc src) %{
10122   // no match-rule, false predicate
10123   effect(DEF dst, USE src);
10124   predicate(false);
10125 
10126   format %{ "MTFPRWA $dst, $src" %}
10127   size(4);
10128   ins_encode %{
10129     __ mtfprwa($dst$$FloatRegister, $src$$Register);
10130   %}
10131   ins_pipe(pipe_class_default);
10132 %}
10133 
10134 //---------- Chain stack slots between similar types --------
10135 
10136 // These are needed so that the rules below can match.
10137 
10138 // Load integer from stack slot
10139 instruct stkI_to_regI(iRegIdst dst, stackSlotI src) %{
10140   match(Set dst src);
10141   ins_cost(MEMORY_REF_COST);
10142 
10143   format %{ "LWZ     $dst, $src" %}
10144   size(4);
10145   ins_encode( enc_lwz(dst, src) );
10146   ins_pipe(pipe_class_memory);
10147 %}
10148 
10149 // Store integer to stack slot
10150 instruct regI_to_stkI(stackSlotI dst, iRegIsrc src) %{
10151   match(Set dst src);
10152   ins_cost(MEMORY_REF_COST);
10153 
10154   format %{ "STW     $src, $dst \t// stk" %}
10155   size(4);
10156   ins_encode( enc_stw(src, dst) ); // rs=rt
10157   ins_pipe(pipe_class_memory);
10158 %}
10159 
10160 // Load long from stack slot
10161 instruct stkL_to_regL(iRegLdst dst, stackSlotL src) %{
10162   match(Set dst src);
10163   ins_cost(MEMORY_REF_COST);
10164 
10165   format %{ "LD      $dst, $src \t// long" %}
10166   size(4);
10167   ins_encode( enc_ld(dst, src) );
10168   ins_pipe(pipe_class_memory);
10169 %}
10170 
10171 // Store long to stack slot
10172 instruct regL_to_stkL(stackSlotL dst, iRegLsrc src) %{
10173   match(Set dst src);
10174   ins_cost(MEMORY_REF_COST);
10175 
10176   format %{ "STD     $src, $dst \t// long" %}
10177   size(4);
10178   ins_encode( enc_std(src, dst) ); // rs=rt
10179   ins_pipe(pipe_class_memory);
10180 %}
10181 
10182 //----------Moves between int and float
10183 
10184 // Move float value from float stack-location to integer register.
10185 instruct moveF2I_stack_reg(iRegIdst dst, stackSlotF src) %{
10186   match(Set dst (MoveF2I src));
10187   ins_cost(MEMORY_REF_COST);
10188 
10189   format %{ "LWZ     $dst, $src \t// MoveF2I" %}
10190   size(4);
10191   ins_encode( enc_lwz(dst, src) );
10192   ins_pipe(pipe_class_memory);
10193 %}
10194 
10195 // Move float value from float register to integer stack-location.
10196 instruct moveF2I_reg_stack(stackSlotI dst, regF src) %{
10197   match(Set dst (MoveF2I src));
10198   ins_cost(MEMORY_REF_COST);
10199 
10200   format %{ "STFS    $src, $dst \t// MoveF2I" %}
10201   size(4);
10202   ins_encode( enc_stfs(src, dst) );
10203   ins_pipe(pipe_class_memory);
10204 %}
10205 
10206 // Move integer value from integer stack-location to float register.
10207 instruct moveI2F_stack_reg(regF dst, stackSlotI src) %{
10208   match(Set dst (MoveI2F src));
10209   ins_cost(MEMORY_REF_COST);
10210 
10211   format %{ "LFS     $dst, $src \t// MoveI2F" %}
10212   size(4);
10213   ins_encode %{
10214     int Idisp = $src$$disp + frame_slots_bias($src$$base, ra_);
10215     __ lfs($dst$$FloatRegister, Idisp, $src$$base$$Register);
10216   %}
10217   ins_pipe(pipe_class_memory);
10218 %}
10219 
10220 // Move integer value from integer register to float stack-location.
10221 instruct moveI2F_reg_stack(stackSlotF dst, iRegIsrc src) %{
10222   match(Set dst (MoveI2F src));
10223   ins_cost(MEMORY_REF_COST);
10224 
10225   format %{ "STW     $src, $dst \t// MoveI2F" %}
10226   size(4);
10227   ins_encode( enc_stw(src, dst) );
10228   ins_pipe(pipe_class_memory);
10229 %}
10230 
10231 //----------Moves between long and float
10232 
10233 instruct moveF2L_reg_stack(stackSlotL dst, regF src) %{
10234   // no match-rule, false predicate
10235   effect(DEF dst, USE src);
10236   predicate(false);
10237 
10238   format %{ "storeD  $src, $dst \t// STACK" %}
10239   size(4);
10240   ins_encode( enc_stfd(src, dst) );
10241   ins_pipe(pipe_class_default);
10242 %}
10243 
10244 //----------Moves between long and double
10245 
10246 // Move double value from double stack-location to long register.
10247 instruct moveD2L_stack_reg(iRegLdst dst, stackSlotD src) %{
10248   match(Set dst (MoveD2L src));
10249   ins_cost(MEMORY_REF_COST);
10250   size(4);
10251   format %{ "LD      $dst, $src \t// MoveD2L" %}
10252   ins_encode( enc_ld(dst, src) );
10253   ins_pipe(pipe_class_memory);
10254 %}
10255 
10256 // Move double value from double register to long stack-location.
10257 instruct moveD2L_reg_stack(stackSlotL dst, regD src) %{
10258   match(Set dst (MoveD2L src));
10259   effect(DEF dst, USE src);
10260   ins_cost(MEMORY_REF_COST);
10261 
10262   format %{ "STFD    $src, $dst \t// MoveD2L" %}
10263   size(4);
10264   ins_encode( enc_stfd(src, dst) );
10265   ins_pipe(pipe_class_memory);
10266 %}
10267 
10268 // Move long value from long stack-location to double register.
10269 instruct moveL2D_stack_reg(regD dst, stackSlotL src) %{
10270   match(Set dst (MoveL2D src));
10271   ins_cost(MEMORY_REF_COST);
10272 
10273   format %{ "LFD     $dst, $src \t// MoveL2D" %}
10274   size(4);
10275   ins_encode( enc_lfd(dst, src) );
10276   ins_pipe(pipe_class_memory);
10277 %}
10278 
10279 // Move long value from long register to double stack-location.
10280 instruct moveL2D_reg_stack(stackSlotD dst, iRegLsrc src) %{
10281   match(Set dst (MoveL2D src));
10282   ins_cost(MEMORY_REF_COST);
10283 
10284   format %{ "STD     $src, $dst \t// MoveL2D" %}
10285   size(4);
10286   ins_encode( enc_std(src, dst) );
10287   ins_pipe(pipe_class_memory);
10288 %}
10289 
10290 //----------Register Move Instructions-----------------------------------------
10291 
10292 // Replicate for Superword
10293 
10294 instruct moveReg(iRegLdst dst, iRegIsrc src) %{
10295   predicate(false);
10296   effect(DEF dst, USE src);
10297 
10298   format %{ "MR      $dst, $src \t// replicate " %}
10299   // variable size, 0 or 4.
10300   ins_encode %{
10301     __ mr_if_needed($dst$$Register, $src$$Register);
10302   %}
10303   ins_pipe(pipe_class_default);
10304 %}
10305 
10306 //----------Cast instructions (Java-level type cast)---------------------------
10307 
10308 // Cast Long to Pointer for unsafe natives.
10309 instruct castX2P(iRegPdst dst, iRegLsrc src) %{
10310   match(Set dst (CastX2P src));
10311 
10312   format %{ "MR      $dst, $src \t// Long->Ptr" %}
10313   // variable size, 0 or 4.
10314   ins_encode %{
10315     __ mr_if_needed($dst$$Register, $src$$Register);
10316   %}
10317  ins_pipe(pipe_class_default);
10318 %}
10319 
10320 // Cast Pointer to Long for unsafe natives.
10321 instruct castP2X(iRegLdst dst, iRegP_N2P src) %{
10322   match(Set dst (CastP2X src));
10323 
10324   format %{ "MR      $dst, $src \t// Ptr->Long" %}
10325   // variable size, 0 or 4.
10326   ins_encode %{
10327     __ mr_if_needed($dst$$Register, $src$$Register);
10328   %}
10329   ins_pipe(pipe_class_default);
10330 %}
10331 
10332 instruct castPP(iRegPdst dst) %{
10333   match(Set dst (CastPP dst));
10334   format %{ " -- \t// castPP of $dst" %}
10335   size(0);
10336   ins_encode( /*empty*/ );
10337   ins_pipe(pipe_class_default);
10338 %}
10339 
10340 instruct castII(iRegIdst dst) %{
10341   match(Set dst (CastII dst));
10342   format %{ " -- \t// castII of $dst" %}
10343   size(0);
10344   ins_encode( /*empty*/ );
10345   ins_pipe(pipe_class_default);
10346 %}
10347 
10348 instruct castLL(iRegLdst dst) %{
10349   match(Set dst (CastLL dst));
10350   format %{ " -- \t// castLL of $dst" %}
10351   size(0);
10352   ins_encode( /*empty*/ );
10353   ins_pipe(pipe_class_default);
10354 %}
10355 
10356 instruct castFF(regF dst) %{
10357   match(Set dst (CastFF dst));
10358   format %{ " -- \t// castFF of $dst" %}
10359   size(0);
10360   ins_encode( /*empty*/ );
10361   ins_pipe(pipe_class_default);
10362 %}
10363 
10364 instruct castDD(regD dst) %{
10365   match(Set dst (CastDD dst));
10366   format %{ " -- \t// castDD of $dst" %}
10367   size(0);
10368   ins_encode( /*empty*/ );
10369   ins_pipe(pipe_class_default);
10370 %}
10371 
10372 instruct castVV8(iRegLdst dst) %{
10373   match(Set dst (CastVV dst));
10374   format %{ " -- \t// castVV of $dst" %}
10375   size(0);
10376   ins_encode( /*empty*/ );
10377   ins_pipe(pipe_class_default);
10378 %}
10379 
10380 instruct castVV16(vecX dst) %{
10381   match(Set dst (CastVV dst));
10382   format %{ " -- \t// castVV of $dst" %}
10383   size(0);
10384   ins_encode( /*empty*/ );
10385   ins_pipe(pipe_class_default);
10386 %}
10387 
10388 instruct checkCastPP(iRegPdst dst) %{
10389   match(Set dst (CheckCastPP dst));
10390   format %{ " -- \t// checkcastPP of $dst" %}
10391   size(0);
10392   ins_encode( /*empty*/ );
10393   ins_pipe(pipe_class_default);
10394 %}
10395 
10396 //----------Convert instructions-----------------------------------------------
10397 
10398 // Convert to boolean.
10399 
10400 // int_to_bool(src) : { 1   if src != 0
10401 //                    { 0   else
10402 //
10403 // strategy:
10404 // 1) Count leading zeros of 32 bit-value src,
10405 //    this returns 32 (0b10.0000) iff src == 0 and <32 otherwise.
10406 // 2) Shift 5 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise.
10407 // 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0.
10408 
10409 // convI2Bool
10410 instruct convI2Bool_reg__cntlz_Ex(iRegIdst dst, iRegIsrc src) %{
10411   match(Set dst (Conv2B src));
10412   predicate(UseCountLeadingZerosInstructionsPPC64);
10413   ins_cost(DEFAULT_COST);
10414 
10415   expand %{
10416     immI shiftAmount %{ 0x5 %}
10417     uimmI16 mask %{ 0x1 %}
10418     iRegIdst tmp1;
10419     iRegIdst tmp2;
10420     countLeadingZerosI(tmp1, src);
10421     urShiftI_reg_imm(tmp2, tmp1, shiftAmount);
10422     xorI_reg_uimm16(dst, tmp2, mask);
10423   %}
10424 %}
10425 
10426 instruct convI2Bool_reg__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx) %{
10427   match(Set dst (Conv2B src));
10428   effect(TEMP crx);
10429   predicate(!UseCountLeadingZerosInstructionsPPC64);
10430   ins_cost(DEFAULT_COST);
10431 
10432   format %{ "CMPWI   $crx, $src, #0 \t// convI2B"
10433             "LI      $dst, #0\n\t"
10434             "BEQ     $crx, done\n\t"
10435             "LI      $dst, #1\n"
10436             "done:" %}
10437   size(16);
10438   ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x0, 0x1) );
10439   ins_pipe(pipe_class_compare);
10440 %}
10441 
10442 // ConvI2B + XorI
10443 instruct xorI_convI2Bool_reg_immIvalue1__cntlz_Ex(iRegIdst dst, iRegIsrc src, immI_1 mask) %{
10444   match(Set dst (XorI (Conv2B src) mask));
10445   predicate(UseCountLeadingZerosInstructionsPPC64);
10446   ins_cost(DEFAULT_COST);
10447 
10448   expand %{
10449     immI shiftAmount %{ 0x5 %}
10450     iRegIdst tmp1;
10451     countLeadingZerosI(tmp1, src);
10452     urShiftI_reg_imm(dst, tmp1, shiftAmount);
10453   %}
10454 %}
10455 
10456 instruct xorI_convI2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI_1 mask) %{
10457   match(Set dst (XorI (Conv2B src) mask));
10458   effect(TEMP crx);
10459   predicate(!UseCountLeadingZerosInstructionsPPC64);
10460   ins_cost(DEFAULT_COST);
10461 
10462   format %{ "CMPWI   $crx, $src, #0 \t// Xor(convI2B($src), $mask)"
10463             "LI      $dst, #1\n\t"
10464             "BEQ     $crx, done\n\t"
10465             "LI      $dst, #0\n"
10466             "done:" %}
10467   size(16);
10468   ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x1, 0x0) );
10469   ins_pipe(pipe_class_compare);
10470 %}
10471 
10472 // AndI 0b0..010..0 + ConvI2B
10473 instruct convI2Bool_andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src, immIpowerOf2 mask) %{
10474   match(Set dst (Conv2B (AndI src mask)));
10475   predicate(UseRotateAndMaskInstructionsPPC64);
10476   ins_cost(DEFAULT_COST);
10477 
10478   format %{ "RLWINM  $dst, $src, $mask \t// convI2B(AndI($src, $mask))" %}
10479   size(4);
10480   ins_encode %{
10481     __ rlwinm($dst$$Register, $src$$Register, 32 - log2i_exact((juint)($mask$$constant)), 31, 31);
10482   %}
10483   ins_pipe(pipe_class_default);
10484 %}
10485 
10486 // Convert pointer to boolean.
10487 //
10488 // ptr_to_bool(src) : { 1   if src != 0
10489 //                    { 0   else
10490 //
10491 // strategy:
10492 // 1) Count leading zeros of 64 bit-value src,
10493 //    this returns 64 (0b100.0000) iff src == 0 and <64 otherwise.
10494 // 2) Shift 6 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise.
10495 // 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0.
10496 
10497 // ConvP2B
10498 instruct convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src) %{
10499   match(Set dst (Conv2B src));
10500   predicate(UseCountLeadingZerosInstructionsPPC64);
10501   ins_cost(DEFAULT_COST);
10502 
10503   expand %{
10504     immI shiftAmount %{ 0x6 %}
10505     uimmI16 mask %{ 0x1 %}
10506     iRegIdst tmp1;
10507     iRegIdst tmp2;
10508     countLeadingZerosP(tmp1, src);
10509     urShiftI_reg_imm(tmp2, tmp1, shiftAmount);
10510     xorI_reg_uimm16(dst, tmp2, mask);
10511   %}
10512 %}
10513 
10514 instruct convP2Bool_reg__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx) %{
10515   match(Set dst (Conv2B src));
10516   effect(TEMP crx);
10517   predicate(!UseCountLeadingZerosInstructionsPPC64);
10518   ins_cost(DEFAULT_COST);
10519 
10520   format %{ "CMPDI   $crx, $src, #0 \t// convP2B"
10521             "LI      $dst, #0\n\t"
10522             "BEQ     $crx, done\n\t"
10523             "LI      $dst, #1\n"
10524             "done:" %}
10525   size(16);
10526   ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x0, 0x1) );
10527   ins_pipe(pipe_class_compare);
10528 %}
10529 
10530 // ConvP2B + XorI
10531 instruct xorI_convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src, immI_1 mask) %{
10532   match(Set dst (XorI (Conv2B src) mask));
10533   predicate(UseCountLeadingZerosInstructionsPPC64);
10534   ins_cost(DEFAULT_COST);
10535 
10536   expand %{
10537     immI shiftAmount %{ 0x6 %}
10538     iRegIdst tmp1;
10539     countLeadingZerosP(tmp1, src);
10540     urShiftI_reg_imm(dst, tmp1, shiftAmount);
10541   %}
10542 %}
10543 
10544 instruct xorI_convP2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx, immI_1 mask) %{
10545   match(Set dst (XorI (Conv2B src) mask));
10546   effect(TEMP crx);
10547   predicate(!UseCountLeadingZerosInstructionsPPC64);
10548   ins_cost(DEFAULT_COST);
10549 
10550   format %{ "CMPDI   $crx, $src, #0 \t// XorI(convP2B($src), $mask)"
10551             "LI      $dst, #1\n\t"
10552             "BEQ     $crx, done\n\t"
10553             "LI      $dst, #0\n"
10554             "done:" %}
10555   size(16);
10556   ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x1, 0x0) );
10557   ins_pipe(pipe_class_compare);
10558 %}
10559 
10560 // if src1 < src2, return -1 else return 0
10561 instruct cmpLTMask_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
10562   match(Set dst (CmpLTMask src1 src2));
10563   ins_cost(DEFAULT_COST*4);
10564 
10565   expand %{
10566     iRegLdst src1s;
10567     iRegLdst src2s;
10568     iRegLdst diff;
10569     convI2L_reg(src1s, src1); // Ensure proper sign extension.
10570     convI2L_reg(src2s, src2); // Ensure proper sign extension.
10571     subL_reg_reg(diff, src1s, src2s);
10572     // Need to consider >=33 bit result, therefore we need signmaskL.
10573     signmask64I_regL(dst, diff);
10574   %}
10575 %}
10576 
10577 instruct cmpLTMask_reg_immI0(iRegIdst dst, iRegIsrc src1, immI_0 src2) %{
10578   match(Set dst (CmpLTMask src1 src2)); // if src1 < src2, return -1 else return 0
10579   format %{ "SRAWI   $dst, $src1, $src2 \t// CmpLTMask" %}
10580   size(4);
10581   ins_encode %{
10582     __ srawi($dst$$Register, $src1$$Register, 0x1f);
10583   %}
10584   ins_pipe(pipe_class_default);
10585 %}
10586 
10587 //----------Arithmetic Conversion Instructions---------------------------------
10588 
10589 // Convert to Byte  -- nop
10590 // Convert to Short -- nop
10591 
10592 // Convert to Int
10593 
10594 instruct convB2I_reg(iRegIdst dst, iRegIsrc src, immI_24 amount) %{
10595   match(Set dst (RShiftI (LShiftI src amount) amount));
10596   format %{ "EXTSB   $dst, $src \t// byte->int" %}
10597   size(4);
10598   ins_encode %{
10599     __ extsb($dst$$Register, $src$$Register);
10600   %}
10601   ins_pipe(pipe_class_default);
10602 %}
10603 
10604 instruct extsh(iRegIdst dst, iRegIsrc src) %{
10605   effect(DEF dst, USE src);
10606 
10607   size(4);
10608   ins_encode %{
10609     __ extsh($dst$$Register, $src$$Register);
10610   %}
10611   ins_pipe(pipe_class_default);
10612 %}
10613 
10614 // LShiftI 16 + RShiftI 16 converts short to int.
10615 instruct convS2I_reg(iRegIdst dst, iRegIsrc src, immI_16 amount) %{
10616   match(Set dst (RShiftI (LShiftI src amount) amount));
10617   format %{ "EXTSH   $dst, $src \t// short->int" %}
10618   size(4);
10619   ins_encode %{
10620     __ extsh($dst$$Register, $src$$Register);
10621   %}
10622   ins_pipe(pipe_class_default);
10623 %}
10624 
10625 // ConvL2I + ConvI2L: Sign extend int in long register.
10626 instruct sxtI_L2L_reg(iRegLdst dst, iRegLsrc src) %{
10627   match(Set dst (ConvI2L (ConvL2I src)));
10628 
10629   format %{ "EXTSW   $dst, $src \t// long->long" %}
10630   size(4);
10631   ins_encode %{
10632     __ extsw($dst$$Register, $src$$Register);
10633   %}
10634   ins_pipe(pipe_class_default);
10635 %}
10636 
10637 instruct convL2I_reg(iRegIdst dst, iRegLsrc src) %{
10638   match(Set dst (ConvL2I src));
10639   format %{ "MR      $dst, $src \t// long->int" %}
10640   // variable size, 0 or 4
10641   ins_encode %{
10642     __ mr_if_needed($dst$$Register, $src$$Register);
10643   %}
10644   ins_pipe(pipe_class_default);
10645 %}
10646 
10647 instruct convD2IRaw_regD(regD dst, regD src) %{
10648   // no match-rule, false predicate
10649   effect(DEF dst, USE src);
10650   predicate(false);
10651 
10652   format %{ "FCTIWZ $dst, $src \t// convD2I, $src != NaN" %}
10653   size(4);
10654   ins_encode %{
10655     __ fctiwz($dst$$FloatRegister, $src$$FloatRegister);
10656   %}
10657   ins_pipe(pipe_class_default);
10658 %}
10659 
10660 instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsRegSrc crx, stackSlotL src) %{
10661   // no match-rule, false predicate
10662   effect(DEF dst, USE crx, USE src);
10663   predicate(false);
10664 
10665   ins_variable_size_depending_on_alignment(true);
10666 
10667   format %{ "cmovI   $crx, $dst, $src" %}
10668   // Worst case is branch + move + stop, no stop without scheduler.
10669   size(8);
10670   ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) );
10671   ins_pipe(pipe_class_default);
10672 %}
10673 
10674 instruct cmovI_bso_reg(iRegIdst dst, flagsRegSrc crx, regD src) %{
10675   // no match-rule, false predicate
10676   effect(DEF dst, USE crx, USE src);
10677   predicate(false);
10678 
10679   ins_variable_size_depending_on_alignment(true);
10680 
10681   format %{ "cmovI   $crx, $dst, $src" %}
10682   // Worst case is branch + move + stop, no stop without scheduler.
10683   size(8);
10684   ins_encode( enc_cmove_bso_reg(dst, crx, src) );
10685   ins_pipe(pipe_class_default);
10686 %}
10687 
10688 instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, stackSlotL mem) %{
10689   // no match-rule, false predicate
10690   effect(DEF dst, USE crx, USE mem);
10691   predicate(false);
10692 
10693   format %{ "CmovI   $dst, $crx, $mem \t// postalloc expanded" %}
10694   postalloc_expand %{
10695     //
10696     // replaces
10697     //
10698     //   region  dst  crx  mem
10699     //    \       |    |   /
10700     //     dst=cmovI_bso_stackSlotL_conLvalue0
10701     //
10702     // with
10703     //
10704     //   region  dst
10705     //    \       /
10706     //     dst=loadConI16(0)
10707     //      |
10708     //      ^  region  dst  crx  mem
10709     //      |   \       |    |    /
10710     //      dst=cmovI_bso_stackSlotL
10711     //
10712 
10713     // Create new nodes.
10714     MachNode *m1 = new loadConI16Node();
10715     MachNode *m2 = new cmovI_bso_stackSlotLNode();
10716 
10717     // inputs for new nodes
10718     m1->add_req(n_region);
10719     m2->add_req(n_region, n_crx, n_mem);
10720 
10721     // precedences for new nodes
10722     m2->add_prec(m1);
10723 
10724     // operands for new nodes
10725     m1->_opnds[0] = op_dst;
10726     m1->_opnds[1] = new immI16Oper(0);
10727 
10728     m2->_opnds[0] = op_dst;
10729     m2->_opnds[1] = op_crx;
10730     m2->_opnds[2] = op_mem;
10731 
10732     // registers for new nodes
10733     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10734     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10735 
10736     // Insert new nodes.
10737     nodes->push(m1);
10738     nodes->push(m2);
10739   %}
10740 %}
10741 
10742 instruct cmovI_bso_reg_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, regD src) %{
10743   // no match-rule, false predicate
10744   effect(DEF dst, USE crx, USE src);
10745   predicate(false);
10746 
10747   format %{ "CmovI   $dst, $crx, $src \t// postalloc expanded" %}
10748   postalloc_expand %{
10749     //
10750     // replaces
10751     //
10752     //   region  dst  crx  src
10753     //    \       |    |   /
10754     //     dst=cmovI_bso_reg_conLvalue0
10755     //
10756     // with
10757     //
10758     //   region  dst
10759     //    \       /
10760     //     dst=loadConI16(0)
10761     //      |
10762     //      ^  region  dst  crx  src
10763     //      |   \       |    |    /
10764     //      dst=cmovI_bso_reg
10765     //
10766 
10767     // Create new nodes.
10768     MachNode *m1 = new loadConI16Node();
10769     MachNode *m2 = new cmovI_bso_regNode();
10770 
10771     // inputs for new nodes
10772     m1->add_req(n_region);
10773     m2->add_req(n_region, n_crx, n_src);
10774 
10775     // precedences for new nodes
10776     m2->add_prec(m1);
10777 
10778     // operands for new nodes
10779     m1->_opnds[0] = op_dst;
10780     m1->_opnds[1] = new immI16Oper(0);
10781 
10782     m2->_opnds[0] = op_dst;
10783     m2->_opnds[1] = op_crx;
10784     m2->_opnds[2] = op_src;
10785 
10786     // registers for new nodes
10787     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10788     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10789 
10790     // Insert new nodes.
10791     nodes->push(m1);
10792     nodes->push(m2);
10793   %}
10794 %}
10795 
10796 // Double to Int conversion, NaN is mapped to 0.
10797 instruct convD2I_reg_ExEx(iRegIdst dst, regD src) %{
10798   match(Set dst (ConvD2I src));
10799   predicate(!VM_Version::has_mtfprd());
10800   ins_cost(DEFAULT_COST);
10801 
10802   expand %{
10803     regD tmpD;
10804     stackSlotL tmpS;
10805     flagsReg crx;
10806     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10807     convD2IRaw_regD(tmpD, src);                         // Convert float to int (speculated).
10808     moveD2L_reg_stack(tmpS, tmpD);                      // Store float to stack (speculated).
10809     cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
10810   %}
10811 %}
10812 
10813 // Double to Int conversion, NaN is mapped to 0. Special version for Power8.
10814 instruct convD2I_reg_mffprd_ExEx(iRegIdst dst, regD src) %{
10815   match(Set dst (ConvD2I src));
10816   predicate(VM_Version::has_mtfprd());
10817   ins_cost(DEFAULT_COST);
10818 
10819   expand %{
10820     regD tmpD;
10821     flagsReg crx;
10822     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10823     convD2IRaw_regD(tmpD, src);                         // Convert float to int (speculated).
10824     cmovI_bso_reg_conLvalue0_Ex(dst, crx, tmpD);        // Cmove based on NaN check.
10825   %}
10826 %}
10827 
10828 instruct convF2IRaw_regF(regF dst, regF src) %{
10829   // no match-rule, false predicate
10830   effect(DEF dst, USE src);
10831   predicate(false);
10832 
10833   format %{ "FCTIWZ $dst, $src \t// convF2I, $src != NaN" %}
10834   size(4);
10835   ins_encode %{
10836     __ fctiwz($dst$$FloatRegister, $src$$FloatRegister);
10837   %}
10838   ins_pipe(pipe_class_default);
10839 %}
10840 
10841 // Float to Int conversion, NaN is mapped to 0.
10842 instruct convF2I_regF_ExEx(iRegIdst dst, regF src) %{
10843   match(Set dst (ConvF2I src));
10844   predicate(!VM_Version::has_mtfprd());
10845   ins_cost(DEFAULT_COST);
10846 
10847   expand %{
10848     regF tmpF;
10849     stackSlotL tmpS;
10850     flagsReg crx;
10851     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10852     convF2IRaw_regF(tmpF, src);                         // Convert float to int (speculated).
10853     moveF2L_reg_stack(tmpS, tmpF);                      // Store float to stack (speculated).
10854     cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
10855   %}
10856 %}
10857 
10858 // Float to Int conversion, NaN is mapped to 0. Special version for Power8.
10859 instruct convF2I_regF_mffprd_ExEx(iRegIdst dst, regF src) %{
10860   match(Set dst (ConvF2I src));
10861   predicate(VM_Version::has_mtfprd());
10862   ins_cost(DEFAULT_COST);
10863 
10864   expand %{
10865     regF tmpF;
10866     flagsReg crx;
10867     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10868     convF2IRaw_regF(tmpF, src);                         // Convert float to int (speculated).
10869     cmovI_bso_reg_conLvalue0_Ex(dst, crx, tmpF);        // Cmove based on NaN check.
10870   %}
10871 %}
10872 
10873 // Convert to Long
10874 
10875 instruct convI2L_reg(iRegLdst dst, iRegIsrc src) %{
10876   match(Set dst (ConvI2L src));
10877   format %{ "EXTSW   $dst, $src \t// int->long" %}
10878   size(4);
10879   ins_encode %{
10880     __ extsw($dst$$Register, $src$$Register);
10881   %}
10882   ins_pipe(pipe_class_default);
10883 %}
10884 
10885 // Zero-extend: convert unsigned int to long (convUI2L).
10886 instruct zeroExtendL_regI(iRegLdst dst, iRegIsrc src, immL_32bits mask) %{
10887   match(Set dst (AndL (ConvI2L src) mask));
10888   ins_cost(DEFAULT_COST);
10889 
10890   format %{ "CLRLDI  $dst, $src, #32 \t// zero-extend int to long" %}
10891   size(4);
10892   ins_encode %{
10893     __ clrldi($dst$$Register, $src$$Register, 32);
10894   %}
10895   ins_pipe(pipe_class_default);
10896 %}
10897 
10898 // Zero-extend: convert unsigned int to long in long register.
10899 instruct zeroExtendL_regL(iRegLdst dst, iRegLsrc src, immL_32bits mask) %{
10900   match(Set dst (AndL src mask));
10901   ins_cost(DEFAULT_COST);
10902 
10903   format %{ "CLRLDI  $dst, $src, #32 \t// zero-extend int to long" %}
10904   size(4);
10905   ins_encode %{
10906     __ clrldi($dst$$Register, $src$$Register, 32);
10907   %}
10908   ins_pipe(pipe_class_default);
10909 %}
10910 
10911 instruct convF2LRaw_regF(regF dst, regF src) %{
10912   // no match-rule, false predicate
10913   effect(DEF dst, USE src);
10914   predicate(false);
10915 
10916   format %{ "FCTIDZ $dst, $src \t// convF2L, $src != NaN" %}
10917   size(4);
10918   ins_encode %{
10919     __ fctidz($dst$$FloatRegister, $src$$FloatRegister);
10920   %}
10921   ins_pipe(pipe_class_default);
10922 %}
10923 
10924 instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL src) %{
10925   // no match-rule, false predicate
10926   effect(DEF dst, USE crx, USE src);
10927   predicate(false);
10928 
10929   ins_variable_size_depending_on_alignment(true);
10930 
10931   format %{ "cmovL   $crx, $dst, $src" %}
10932   // Worst case is branch + move + stop, no stop without scheduler.
10933   size(8);
10934   ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) );
10935   ins_pipe(pipe_class_default);
10936 %}
10937 
10938 instruct cmovL_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
10939   // no match-rule, false predicate
10940   effect(DEF dst, USE crx, USE src);
10941   predicate(false);
10942 
10943   ins_variable_size_depending_on_alignment(true);
10944 
10945   format %{ "cmovL   $crx, $dst, $src" %}
10946   // Worst case is branch + move + stop, no stop without scheduler.
10947   size(8);
10948   ins_encode( enc_cmove_bso_reg(dst, crx, src) );
10949   ins_pipe(pipe_class_default);
10950 %}
10951 
10952 instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, stackSlotL mem) %{
10953   // no match-rule, false predicate
10954   effect(DEF dst, USE crx, USE mem);
10955   predicate(false);
10956 
10957   format %{ "CmovL   $dst, $crx, $mem \t// postalloc expanded" %}
10958   postalloc_expand %{
10959     //
10960     // replaces
10961     //
10962     //   region  dst  crx  mem
10963     //    \       |    |   /
10964     //     dst=cmovL_bso_stackSlotL_conLvalue0
10965     //
10966     // with
10967     //
10968     //   region  dst
10969     //    \       /
10970     //     dst=loadConL16(0)
10971     //      |
10972     //      ^  region  dst  crx  mem
10973     //      |   \       |    |    /
10974     //      dst=cmovL_bso_stackSlotL
10975     //
10976 
10977     // Create new nodes.
10978     MachNode *m1 = new loadConL16Node();
10979     MachNode *m2 = new cmovL_bso_stackSlotLNode();
10980 
10981     // inputs for new nodes
10982     m1->add_req(n_region);
10983     m2->add_req(n_region, n_crx, n_mem);
10984     m2->add_prec(m1);
10985 
10986     // operands for new nodes
10987     m1->_opnds[0] = op_dst;
10988     m1->_opnds[1] = new immL16Oper(0);
10989     m2->_opnds[0] = op_dst;
10990     m2->_opnds[1] = op_crx;
10991     m2->_opnds[2] = op_mem;
10992 
10993     // registers for new nodes
10994     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10995     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10996 
10997     // Insert new nodes.
10998     nodes->push(m1);
10999     nodes->push(m2);
11000   %}
11001 %}
11002 
11003 instruct cmovL_bso_reg_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, regD src) %{
11004   // no match-rule, false predicate
11005   effect(DEF dst, USE crx, USE src);
11006   predicate(false);
11007 
11008   format %{ "CmovL   $dst, $crx, $src \t// postalloc expanded" %}
11009   postalloc_expand %{
11010     //
11011     // replaces
11012     //
11013     //   region  dst  crx  src
11014     //    \       |    |   /
11015     //     dst=cmovL_bso_reg_conLvalue0
11016     //
11017     // with
11018     //
11019     //   region  dst
11020     //    \       /
11021     //     dst=loadConL16(0)
11022     //      |
11023     //      ^  region  dst  crx  src
11024     //      |   \       |    |    /
11025     //      dst=cmovL_bso_reg
11026     //
11027 
11028     // Create new nodes.
11029     MachNode *m1 = new loadConL16Node();
11030     MachNode *m2 = new cmovL_bso_regNode();
11031 
11032     // inputs for new nodes
11033     m1->add_req(n_region);
11034     m2->add_req(n_region, n_crx, n_src);
11035     m2->add_prec(m1);
11036 
11037     // operands for new nodes
11038     m1->_opnds[0] = op_dst;
11039     m1->_opnds[1] = new immL16Oper(0);
11040     m2->_opnds[0] = op_dst;
11041     m2->_opnds[1] = op_crx;
11042     m2->_opnds[2] = op_src;
11043 
11044     // registers for new nodes
11045     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
11046     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
11047 
11048     // Insert new nodes.
11049     nodes->push(m1);
11050     nodes->push(m2);
11051   %}
11052 %}
11053 
11054 // Float to Long conversion, NaN is mapped to 0.
11055 instruct convF2L_reg_ExEx(iRegLdst dst, regF src) %{
11056   match(Set dst (ConvF2L src));
11057   predicate(!VM_Version::has_mtfprd());
11058   ins_cost(DEFAULT_COST);
11059 
11060   expand %{
11061     regF tmpF;
11062     stackSlotL tmpS;
11063     flagsReg crx;
11064     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
11065     convF2LRaw_regF(tmpF, src);                         // Convert float to long (speculated).
11066     moveF2L_reg_stack(tmpS, tmpF);                      // Store float to stack (speculated).
11067     cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
11068   %}
11069 %}
11070 
11071 // Float to Long conversion, NaN is mapped to 0. Special version for Power8.
11072 instruct convF2L_reg_mffprd_ExEx(iRegLdst dst, regF src) %{
11073   match(Set dst (ConvF2L src));
11074   predicate(VM_Version::has_mtfprd());
11075   ins_cost(DEFAULT_COST);
11076 
11077   expand %{
11078     regF tmpF;
11079     flagsReg crx;
11080     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
11081     convF2LRaw_regF(tmpF, src);                         // Convert float to long (speculated).
11082     cmovL_bso_reg_conLvalue0_Ex(dst, crx, tmpF);        // Cmove based on NaN check.
11083   %}
11084 %}
11085 
11086 instruct convD2LRaw_regD(regD dst, regD src) %{
11087   // no match-rule, false predicate
11088   effect(DEF dst, USE src);
11089   predicate(false);
11090 
11091   format %{ "FCTIDZ $dst, $src \t// convD2L $src != NaN" %}
11092   size(4);
11093   ins_encode %{
11094     __ fctidz($dst$$FloatRegister, $src$$FloatRegister);
11095   %}
11096   ins_pipe(pipe_class_default);
11097 %}
11098 
11099 // Double to Long conversion, NaN is mapped to 0.
11100 instruct convD2L_reg_ExEx(iRegLdst dst, regD src) %{
11101   match(Set dst (ConvD2L src));
11102   predicate(!VM_Version::has_mtfprd());
11103   ins_cost(DEFAULT_COST);
11104 
11105   expand %{
11106     regD tmpD;
11107     stackSlotL tmpS;
11108     flagsReg crx;
11109     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
11110     convD2LRaw_regD(tmpD, src);                         // Convert float to long (speculated).
11111     moveD2L_reg_stack(tmpS, tmpD);                      // Store float to stack (speculated).
11112     cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
11113   %}
11114 %}
11115 
11116 // Double to Long conversion, NaN is mapped to 0. Special version for Power8.
11117 instruct convD2L_reg_mffprd_ExEx(iRegLdst dst, regD src) %{
11118   match(Set dst (ConvD2L src));
11119   predicate(VM_Version::has_mtfprd());
11120   ins_cost(DEFAULT_COST);
11121 
11122   expand %{
11123     regD tmpD;
11124     flagsReg crx;
11125     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
11126     convD2LRaw_regD(tmpD, src);                         // Convert float to long (speculated).
11127     cmovL_bso_reg_conLvalue0_Ex(dst, crx, tmpD);        // Cmove based on NaN check.
11128   %}
11129 %}
11130 
11131 // Convert to Float
11132 
11133 // Placed here as needed in expand.
11134 instruct convL2DRaw_regD(regD dst, regD src) %{
11135   // no match-rule, false predicate
11136   effect(DEF dst, USE src);
11137   predicate(false);
11138 
11139   format %{ "FCFID $dst, $src \t// convL2D" %}
11140   size(4);
11141   ins_encode %{
11142     __ fcfid($dst$$FloatRegister, $src$$FloatRegister);
11143   %}
11144   ins_pipe(pipe_class_default);
11145 %}
11146 
11147 // Placed here as needed in expand.
11148 instruct convD2F_reg(regF dst, regD src) %{
11149   match(Set dst (ConvD2F src));
11150   format %{ "FRSP    $dst, $src \t// convD2F" %}
11151   size(4);
11152   ins_encode %{
11153     __ frsp($dst$$FloatRegister, $src$$FloatRegister);
11154   %}
11155   ins_pipe(pipe_class_default);
11156 %}
11157 
11158 // Integer to Float conversion.
11159 instruct convI2F_ireg_Ex(regF dst, iRegIsrc src) %{
11160   match(Set dst (ConvI2F src));
11161   predicate(!VM_Version::has_fcfids());
11162   ins_cost(DEFAULT_COST);
11163 
11164   expand %{
11165     iRegLdst tmpL;
11166     stackSlotL tmpS;
11167     regD tmpD;
11168     regD tmpD2;
11169     convI2L_reg(tmpL, src);              // Sign-extension int to long.
11170     regL_to_stkL(tmpS, tmpL);            // Store long to stack.
11171     moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
11172     convL2DRaw_regD(tmpD2, tmpD);        // Convert to double.
11173     convD2F_reg(dst, tmpD2);             // Convert double to float.
11174   %}
11175 %}
11176 
11177 instruct convL2FRaw_regF(regF dst, regD src) %{
11178   // no match-rule, false predicate
11179   effect(DEF dst, USE src);
11180   predicate(false);
11181 
11182   format %{ "FCFIDS $dst, $src \t// convL2F" %}
11183   size(4);
11184   ins_encode %{
11185     __ fcfids($dst$$FloatRegister, $src$$FloatRegister);
11186   %}
11187   ins_pipe(pipe_class_default);
11188 %}
11189 
11190 // Integer to Float conversion. Special version for Power7.
11191 instruct convI2F_ireg_fcfids_Ex(regF dst, iRegIsrc src) %{
11192   match(Set dst (ConvI2F src));
11193   predicate(VM_Version::has_fcfids() && !VM_Version::has_mtfprd());
11194   ins_cost(DEFAULT_COST);
11195 
11196   expand %{
11197     iRegLdst tmpL;
11198     stackSlotL tmpS;
11199     regD tmpD;
11200     convI2L_reg(tmpL, src);              // Sign-extension int to long.
11201     regL_to_stkL(tmpS, tmpL);            // Store long to stack.
11202     moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
11203     convL2FRaw_regF(dst, tmpD);          // Convert to float.
11204   %}
11205 %}
11206 
11207 // Integer to Float conversion. Special version for Power8.
11208 instruct convI2F_ireg_mtfprd_Ex(regF dst, iRegIsrc src) %{
11209   match(Set dst (ConvI2F src));
11210   predicate(VM_Version::has_fcfids() && VM_Version::has_mtfprd());
11211   ins_cost(DEFAULT_COST);
11212 
11213   expand %{
11214     regD tmpD;
11215     moveI2D_reg(tmpD, src);
11216     convL2FRaw_regF(dst, tmpD);          // Convert to float.
11217   %}
11218 %}
11219 
11220 // L2F to avoid runtime call.
11221 instruct convL2F_ireg_fcfids_Ex(regF dst, iRegLsrc src) %{
11222   match(Set dst (ConvL2F src));
11223   predicate(VM_Version::has_fcfids() && !VM_Version::has_mtfprd());
11224   ins_cost(DEFAULT_COST);
11225 
11226   expand %{
11227     stackSlotL tmpS;
11228     regD tmpD;
11229     regL_to_stkL(tmpS, src);             // Store long to stack.
11230     moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
11231     convL2FRaw_regF(dst, tmpD);          // Convert to float.
11232   %}
11233 %}
11234 
11235 // L2F to avoid runtime call.  Special version for Power8.
11236 instruct convL2F_ireg_mtfprd_Ex(regF dst, iRegLsrc src) %{
11237   match(Set dst (ConvL2F src));
11238   predicate(VM_Version::has_fcfids() && VM_Version::has_mtfprd());
11239   ins_cost(DEFAULT_COST);
11240 
11241   expand %{
11242     regD tmpD;
11243     moveL2D_reg(tmpD, src);
11244     convL2FRaw_regF(dst, tmpD);          // Convert to float.
11245   %}
11246 %}
11247 
11248 // Moved up as used in expand.
11249 //instruct convD2F_reg(regF dst, regD src) %{%}
11250 
11251 // Convert to Double
11252 
11253 // Integer to Double conversion.
11254 instruct convI2D_reg_Ex(regD dst, iRegIsrc src) %{
11255   match(Set dst (ConvI2D src));
11256   predicate(!VM_Version::has_mtfprd());
11257   ins_cost(DEFAULT_COST);
11258 
11259   expand %{
11260     iRegLdst tmpL;
11261     stackSlotL tmpS;
11262     regD tmpD;
11263     convI2L_reg(tmpL, src);              // Sign-extension int to long.
11264     regL_to_stkL(tmpS, tmpL);            // Store long to stack.
11265     moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
11266     convL2DRaw_regD(dst, tmpD);          // Convert to double.
11267   %}
11268 %}
11269 
11270 // Integer to Double conversion. Special version for Power8.
11271 instruct convI2D_reg_mtfprd_Ex(regD dst, iRegIsrc src) %{
11272   match(Set dst (ConvI2D src));
11273   predicate(VM_Version::has_mtfprd());
11274   ins_cost(DEFAULT_COST);
11275 
11276   expand %{
11277     regD tmpD;
11278     moveI2D_reg(tmpD, src);
11279     convL2DRaw_regD(dst, tmpD);          // Convert to double.
11280   %}
11281 %}
11282 
11283 // Long to Double conversion
11284 instruct convL2D_reg_Ex(regD dst, stackSlotL src) %{
11285   match(Set dst (ConvL2D src));
11286   ins_cost(DEFAULT_COST + MEMORY_REF_COST);
11287 
11288   expand %{
11289     regD tmpD;
11290     moveL2D_stack_reg(tmpD, src);
11291     convL2DRaw_regD(dst, tmpD);
11292   %}
11293 %}
11294 
11295 // Long to Double conversion. Special version for Power8.
11296 instruct convL2D_reg_mtfprd_Ex(regD dst, iRegLsrc src) %{
11297   match(Set dst (ConvL2D src));
11298   predicate(VM_Version::has_mtfprd());
11299   ins_cost(DEFAULT_COST);
11300 
11301   expand %{
11302     regD tmpD;
11303     moveL2D_reg(tmpD, src);
11304     convL2DRaw_regD(dst, tmpD);          // Convert to double.
11305   %}
11306 %}
11307 
11308 instruct convF2D_reg(regD dst, regF src) %{
11309   match(Set dst (ConvF2D src));
11310   format %{ "FMR     $dst, $src \t// float->double" %}
11311   // variable size, 0 or 4
11312   ins_encode %{
11313     __ fmr_if_needed($dst$$FloatRegister, $src$$FloatRegister);
11314   %}
11315   ins_pipe(pipe_class_default);
11316 %}
11317 
11318 //----------Control Flow Instructions------------------------------------------
11319 // Compare Instructions
11320 
11321 // Compare Integers
11322 instruct cmpI_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{
11323   match(Set crx (CmpI src1 src2));
11324   size(4);
11325   format %{ "CMPW    $crx, $src1, $src2" %}
11326   ins_encode %{
11327     __ cmpw($crx$$CondRegister, $src1$$Register, $src2$$Register);
11328   %}
11329   ins_pipe(pipe_class_compare);
11330 %}
11331 
11332 instruct cmpI_reg_imm16(flagsReg crx, iRegIsrc src1, immI16 src2) %{
11333   match(Set crx (CmpI src1 src2));
11334   format %{ "CMPWI   $crx, $src1, $src2" %}
11335   size(4);
11336   ins_encode %{
11337     __ cmpwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11338   %}
11339   ins_pipe(pipe_class_compare);
11340 %}
11341 
11342 // (src1 & src2) == 0?
11343 instruct testI_reg_imm(flagsRegCR0 cr0, iRegIsrc src1, uimmI16 src2, immI_0 zero) %{
11344   match(Set cr0 (CmpI (AndI src1 src2) zero));
11345   // r0 is killed
11346   format %{ "ANDI    R0, $src1, $src2 \t// BTST int" %}
11347   size(4);
11348   ins_encode %{
11349     __ andi_(R0, $src1$$Register, $src2$$constant);
11350   %}
11351   ins_pipe(pipe_class_compare);
11352 %}
11353 
11354 instruct cmpL_reg_reg(flagsReg crx, iRegLsrc src1, iRegLsrc src2) %{
11355   match(Set crx (CmpL src1 src2));
11356   format %{ "CMPD    $crx, $src1, $src2" %}
11357   size(4);
11358   ins_encode %{
11359     __ cmpd($crx$$CondRegister, $src1$$Register, $src2$$Register);
11360   %}
11361   ins_pipe(pipe_class_compare);
11362 %}
11363 
11364 instruct cmpL_reg_imm16(flagsReg crx, iRegLsrc src1, immL16 src2) %{
11365   match(Set crx (CmpL src1 src2));
11366   format %{ "CMPDI   $crx, $src1, $src2" %}
11367   size(4);
11368   ins_encode %{
11369     __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11370   %}
11371   ins_pipe(pipe_class_compare);
11372 %}
11373 
11374 // Added CmpUL for LoopPredicate.
11375 instruct cmpUL_reg_reg(flagsReg crx, iRegLsrc src1, iRegLsrc src2) %{
11376   match(Set crx (CmpUL src1 src2));
11377   format %{ "CMPLD   $crx, $src1, $src2" %}
11378   size(4);
11379   ins_encode %{
11380     __ cmpld($crx$$CondRegister, $src1$$Register, $src2$$Register);
11381   %}
11382   ins_pipe(pipe_class_compare);
11383 %}
11384 
11385 instruct cmpUL_reg_imm16(flagsReg crx, iRegLsrc src1, uimmL16 src2) %{
11386   match(Set crx (CmpUL src1 src2));
11387   format %{ "CMPLDI  $crx, $src1, $src2" %}
11388   size(4);
11389   ins_encode %{
11390     __ cmpldi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11391   %}
11392   ins_pipe(pipe_class_compare);
11393 %}
11394 
11395 instruct testL_reg_reg(flagsRegCR0 cr0, iRegLsrc src1, iRegLsrc src2, immL_0 zero) %{
11396   match(Set cr0 (CmpL (AndL src1 src2) zero));
11397   // r0 is killed
11398   format %{ "AND     R0, $src1, $src2 \t// BTST long" %}
11399   size(4);
11400   ins_encode %{
11401     __ and_(R0, $src1$$Register, $src2$$Register);
11402   %}
11403   ins_pipe(pipe_class_compare);
11404 %}
11405 
11406 instruct testL_reg_imm(flagsRegCR0 cr0, iRegLsrc src1, uimmL16 src2, immL_0 zero) %{
11407   match(Set cr0 (CmpL (AndL src1 src2) zero));
11408   // r0 is killed
11409   format %{ "ANDI    R0, $src1, $src2 \t// BTST long" %}
11410   size(4);
11411   ins_encode %{
11412     __ andi_(R0, $src1$$Register, $src2$$constant);
11413   %}
11414   ins_pipe(pipe_class_compare);
11415 %}
11416 
11417 // Manifest a CmpL3 result in an integer register.
11418 instruct cmpL3_reg_reg(iRegIdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
11419   match(Set dst (CmpL3 src1 src2));
11420   effect(KILL cr0);
11421   ins_cost(DEFAULT_COST * 5);
11422   size(VM_Version::has_brw() ? 16 : 20);
11423 
11424   format %{ "cmpL3_reg_reg $dst, $src1, $src2" %}
11425 
11426   ins_encode %{
11427     __ cmpd(CCR0, $src1$$Register, $src2$$Register);
11428     __ set_cmp3($dst$$Register);
11429   %}
11430   ins_pipe(pipe_class_default);
11431 %}
11432 
11433 // Implicit range checks.
11434 // A range check in the ideal world has one of the following shapes:
11435 //  - (If le (CmpU length index)), (IfTrue  throw exception)
11436 //  - (If lt (CmpU index length)), (IfFalse throw exception)
11437 //
11438 // Match range check 'If le (CmpU length index)'.
11439 instruct rangeCheck_iReg_uimm15(cmpOp cmp, iRegIsrc src_length, uimmI15 index, label labl) %{
11440   match(If cmp (CmpU src_length index));
11441   effect(USE labl);
11442   predicate(TrapBasedRangeChecks &&
11443             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le &&
11444             PROB_UNLIKELY(_leaf->as_If()->_prob) >= PROB_ALWAYS &&
11445             (Matcher::branches_to_uncommon_trap(_leaf)));
11446 
11447   ins_is_TrapBasedCheckNode(true);
11448 
11449   format %{ "TWI     $index $cmp $src_length \t// RangeCheck => trap $labl" %}
11450   size(4);
11451   ins_encode %{
11452     if ($cmp$$cmpcode == 0x1 /* less_equal */) {
11453       __ trap_range_check_le($src_length$$Register, $index$$constant);
11454     } else {
11455       // Both successors are uncommon traps, probability is 0.
11456       // Node got flipped during fixup flow.
11457       assert($cmp$$cmpcode == 0x9, "must be greater");
11458       __ trap_range_check_g($src_length$$Register, $index$$constant);
11459     }
11460   %}
11461   ins_pipe(pipe_class_trap);
11462 %}
11463 
11464 // Match range check 'If lt (CmpU index length)'.
11465 instruct rangeCheck_iReg_iReg(cmpOp cmp, iRegIsrc src_index, iRegIsrc src_length, label labl) %{
11466   match(If cmp (CmpU src_index src_length));
11467   effect(USE labl);
11468   predicate(TrapBasedRangeChecks &&
11469             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
11470             _leaf->as_If()->_prob >= PROB_ALWAYS &&
11471             (Matcher::branches_to_uncommon_trap(_leaf)));
11472 
11473   ins_is_TrapBasedCheckNode(true);
11474 
11475   format %{ "TW      $src_index $cmp $src_length \t// RangeCheck => trap $labl" %}
11476   size(4);
11477   ins_encode %{
11478     if ($cmp$$cmpcode == 0x0 /* greater_equal */) {
11479       __ trap_range_check_ge($src_index$$Register, $src_length$$Register);
11480     } else {
11481       // Both successors are uncommon traps, probability is 0.
11482       // Node got flipped during fixup flow.
11483       assert($cmp$$cmpcode == 0x8, "must be less");
11484       __ trap_range_check_l($src_index$$Register, $src_length$$Register);
11485     }
11486   %}
11487   ins_pipe(pipe_class_trap);
11488 %}
11489 
11490 // Match range check 'If lt (CmpU index length)'.
11491 instruct rangeCheck_uimm15_iReg(cmpOp cmp, iRegIsrc src_index, uimmI15 length, label labl) %{
11492   match(If cmp (CmpU src_index length));
11493   effect(USE labl);
11494   predicate(TrapBasedRangeChecks &&
11495             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
11496             _leaf->as_If()->_prob >= PROB_ALWAYS &&
11497             (Matcher::branches_to_uncommon_trap(_leaf)));
11498 
11499   ins_is_TrapBasedCheckNode(true);
11500 
11501   format %{ "TWI     $src_index $cmp $length \t// RangeCheck => trap $labl" %}
11502   size(4);
11503   ins_encode %{
11504     if ($cmp$$cmpcode == 0x0 /* greater_equal */) {
11505       __ trap_range_check_ge($src_index$$Register, $length$$constant);
11506     } else {
11507       // Both successors are uncommon traps, probability is 0.
11508       // Node got flipped during fixup flow.
11509       assert($cmp$$cmpcode == 0x8, "must be less");
11510       __ trap_range_check_l($src_index$$Register, $length$$constant);
11511     }
11512   %}
11513   ins_pipe(pipe_class_trap);
11514 %}
11515 
11516 instruct compU_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{
11517   match(Set crx (CmpU src1 src2));
11518   format %{ "CMPLW   $crx, $src1, $src2 \t// unsigned" %}
11519   size(4);
11520   ins_encode %{
11521     __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register);
11522   %}
11523   ins_pipe(pipe_class_compare);
11524 %}
11525 
11526 instruct compU_reg_uimm16(flagsReg crx, iRegIsrc src1, uimmI16 src2) %{
11527   match(Set crx (CmpU src1 src2));
11528   size(4);
11529   format %{ "CMPLWI  $crx, $src1, $src2" %}
11530   ins_encode %{
11531     __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11532   %}
11533   ins_pipe(pipe_class_compare);
11534 %}
11535 
11536 // Implicit zero checks (more implicit null checks).
11537 // No constant pool entries required.
11538 instruct zeroCheckN_iReg_imm0(cmpOp cmp, iRegNsrc value, immN_0 zero, label labl) %{
11539   match(If cmp (CmpN value zero));
11540   effect(USE labl);
11541   predicate(TrapBasedNullChecks &&
11542             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
11543             _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) &&
11544             Matcher::branches_to_uncommon_trap(_leaf));
11545   ins_cost(1);
11546 
11547   ins_is_TrapBasedCheckNode(true);
11548 
11549   format %{ "TDI     $value $cmp $zero \t// ZeroCheckN => trap $labl" %}
11550   size(4);
11551   ins_encode %{
11552     if ($cmp$$cmpcode == 0xA) {
11553       __ trap_null_check($value$$Register);
11554     } else {
11555       // Both successors are uncommon traps, probability is 0.
11556       // Node got flipped during fixup flow.
11557       assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)");
11558       __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned);
11559     }
11560   %}
11561   ins_pipe(pipe_class_trap);
11562 %}
11563 
11564 // Compare narrow oops.
11565 instruct cmpN_reg_reg(flagsReg crx, iRegNsrc src1, iRegNsrc src2) %{
11566   match(Set crx (CmpN src1 src2));
11567 
11568   size(4);
11569   ins_cost(2);
11570   format %{ "CMPLW   $crx, $src1, $src2 \t// compressed ptr" %}
11571   ins_encode %{
11572     __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register);
11573   %}
11574   ins_pipe(pipe_class_compare);
11575 %}
11576 
11577 instruct cmpN_reg_imm0(flagsReg crx, iRegNsrc src1, immN_0 src2) %{
11578   match(Set crx (CmpN src1 src2));
11579   // Make this more expensive than zeroCheckN_iReg_imm0.
11580   ins_cost(2);
11581 
11582   format %{ "CMPLWI  $crx, $src1, $src2 \t// compressed ptr" %}
11583   size(4);
11584   ins_encode %{
11585     __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11586   %}
11587   ins_pipe(pipe_class_compare);
11588 %}
11589 
11590 // Implicit zero checks (more implicit null checks).
11591 // No constant pool entries required.
11592 instruct zeroCheckP_reg_imm0(cmpOp cmp, iRegP_N2P value, immP_0 zero, label labl) %{
11593   match(If cmp (CmpP value zero));
11594   effect(USE labl);
11595   predicate(TrapBasedNullChecks &&
11596             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
11597             _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) &&
11598             Matcher::branches_to_uncommon_trap(_leaf));
11599   ins_cost(1); // Should not be cheaper than zeroCheckN.
11600 
11601   ins_is_TrapBasedCheckNode(true);
11602 
11603   format %{ "TDI     $value $cmp $zero \t// ZeroCheckP => trap $labl" %}
11604   size(4);
11605   ins_encode %{
11606     if ($cmp$$cmpcode == 0xA) {
11607       __ trap_null_check($value$$Register);
11608     } else {
11609       // Both successors are uncommon traps, probability is 0.
11610       // Node got flipped during fixup flow.
11611       assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)");
11612       __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned);
11613     }
11614   %}
11615   ins_pipe(pipe_class_trap);
11616 %}
11617 
11618 // Compare Pointers
11619 instruct cmpP_reg_reg(flagsReg crx, iRegP_N2P src1, iRegP_N2P src2) %{
11620   match(Set crx (CmpP src1 src2));
11621   format %{ "CMPLD   $crx, $src1, $src2 \t// ptr" %}
11622   size(4);
11623   ins_encode %{
11624     __ cmpld($crx$$CondRegister, $src1$$Register, $src2$$Register);
11625   %}
11626   ins_pipe(pipe_class_compare);
11627 %}
11628 
11629 instruct cmpP_reg_null(flagsReg crx, iRegP_N2P src1, immP_0or1 src2) %{
11630   match(Set crx (CmpP src1 src2));
11631   format %{ "CMPLDI   $crx, $src1, $src2 \t// ptr" %}
11632   size(4);
11633   ins_encode %{
11634     __ cmpldi($crx$$CondRegister, $src1$$Register, (int)((short)($src2$$constant & 0xFFFF)));
11635   %}
11636   ins_pipe(pipe_class_compare);
11637 %}
11638 
11639 // Used in postalloc expand.
11640 instruct cmpP_reg_imm16(flagsReg crx, iRegPsrc src1, immL16 src2) %{
11641   // This match rule prevents reordering of node before a safepoint.
11642   // This only makes sense if this instructions is used exclusively
11643   // for the expansion of EncodeP!
11644   match(Set crx (CmpP src1 src2));
11645   predicate(false);
11646 
11647   format %{ "CMPDI   $crx, $src1, $src2" %}
11648   size(4);
11649   ins_encode %{
11650     __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11651   %}
11652   ins_pipe(pipe_class_compare);
11653 %}
11654 
11655 //----------Float Compares----------------------------------------------------
11656 
11657 instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{
11658   // Needs matchrule, see cmpDUnordered.
11659   match(Set crx (CmpF src1 src2));
11660   // no match-rule, false predicate
11661   predicate(false);
11662 
11663   format %{ "cmpFUrd $crx, $src1, $src2" %}
11664   size(4);
11665   ins_encode %{
11666     __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister);
11667   %}
11668   ins_pipe(pipe_class_default);
11669 %}
11670 
11671 instruct cmov_bns_less(flagsReg crx) %{
11672   // no match-rule, false predicate
11673   effect(DEF crx);
11674   predicate(false);
11675 
11676   ins_variable_size_depending_on_alignment(true);
11677 
11678   format %{ "cmov    $crx" %}
11679   // Worst case is branch + move + stop, no stop without scheduler.
11680   size(12);
11681   ins_encode %{
11682     Label done;
11683     __ bns($crx$$CondRegister, done);        // not unordered -> keep crx
11684     __ li(R0, 0);
11685     __ cmpwi($crx$$CondRegister, R0, 1);     // unordered -> set crx to 'less'
11686     __ bind(done);
11687   %}
11688   ins_pipe(pipe_class_default);
11689 %}
11690 
11691 // Compare floating, generate condition code.
11692 instruct cmpF_reg_reg_Ex(flagsReg crx, regF src1, regF src2) %{
11693   // FIXME: should we match 'If cmp (CmpF src1 src2))' ??
11694   //
11695   // The following code sequence occurs a lot in mpegaudio:
11696   //
11697   // block BXX:
11698   // 0: instruct cmpFUnordered_reg_reg (cmpF_reg_reg-0):
11699   //    cmpFUrd CCR6, F11, F9
11700   // 4: instruct cmov_bns_less (cmpF_reg_reg-1):
11701   //    cmov CCR6
11702   // 8: instruct branchConSched:
11703   //    B_FARle CCR6, B56  P=0.500000 C=-1.000000
11704   match(Set crx (CmpF src1 src2));
11705   ins_cost(DEFAULT_COST+BRANCH_COST);
11706 
11707   format %{ "CmpF    $crx, $src1, $src2 \t// postalloc expanded" %}
11708   postalloc_expand %{
11709     //
11710     // replaces
11711     //
11712     //   region  src1  src2
11713     //    \       |     |
11714     //     crx=cmpF_reg_reg
11715     //
11716     // with
11717     //
11718     //   region  src1  src2
11719     //    \       |     |
11720     //     crx=cmpFUnordered_reg_reg
11721     //      |
11722     //      ^  region
11723     //      |   \
11724     //      crx=cmov_bns_less
11725     //
11726 
11727     // Create new nodes.
11728     MachNode *m1 = new cmpFUnordered_reg_regNode();
11729     MachNode *m2 = new cmov_bns_lessNode();
11730 
11731     // inputs for new nodes
11732     m1->add_req(n_region, n_src1, n_src2);
11733     m2->add_req(n_region);
11734     m2->add_prec(m1);
11735 
11736     // operands for new nodes
11737     m1->_opnds[0] = op_crx;
11738     m1->_opnds[1] = op_src1;
11739     m1->_opnds[2] = op_src2;
11740     m2->_opnds[0] = op_crx;
11741 
11742     // registers for new nodes
11743     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11744     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11745 
11746     // Insert new nodes.
11747     nodes->push(m1);
11748     nodes->push(m2);
11749   %}
11750 %}
11751 
11752 // Compare float, generate -1,0,1
11753 instruct cmpF3_reg_reg(iRegIdst dst, regF src1, regF src2, flagsRegCR0 cr0) %{
11754   match(Set dst (CmpF3 src1 src2));
11755   effect(KILL cr0);
11756   ins_cost(DEFAULT_COST * 6);
11757   size(VM_Version::has_brw() ? 20 : 24);
11758 
11759   format %{ "cmpF3_reg_reg $dst, $src1, $src2" %}
11760 
11761   ins_encode %{
11762     __ fcmpu(CCR0, $src1$$FloatRegister, $src2$$FloatRegister);
11763     __ set_cmpu3($dst$$Register, true); // C2 requires unordered to get treated like less
11764   %}
11765   ins_pipe(pipe_class_default);
11766 %}
11767 
11768 instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
11769   // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the
11770   // node right before the conditional move using it.
11771   // In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7,
11772   // compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle
11773   // crashed in register allocation where the flags Reg between cmpDUnoredered and a
11774   // conditional move was supposed to be spilled.
11775   match(Set crx (CmpD src1 src2));
11776   // False predicate, shall not be matched.
11777   predicate(false);
11778 
11779   format %{ "cmpFUrd $crx, $src1, $src2" %}
11780   size(4);
11781   ins_encode %{
11782     __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister);
11783   %}
11784   ins_pipe(pipe_class_default);
11785 %}
11786 
11787 instruct cmpD_reg_reg_Ex(flagsReg crx, regD src1, regD src2) %{
11788   match(Set crx (CmpD src1 src2));
11789   ins_cost(DEFAULT_COST+BRANCH_COST);
11790 
11791   format %{ "CmpD    $crx, $src1, $src2 \t// postalloc expanded" %}
11792   postalloc_expand %{
11793     //
11794     // replaces
11795     //
11796     //   region  src1  src2
11797     //    \       |     |
11798     //     crx=cmpD_reg_reg
11799     //
11800     // with
11801     //
11802     //   region  src1  src2
11803     //    \       |     |
11804     //     crx=cmpDUnordered_reg_reg
11805     //      |
11806     //      ^  region
11807     //      |   \
11808     //      crx=cmov_bns_less
11809     //
11810 
11811     // create new nodes
11812     MachNode *m1 = new cmpDUnordered_reg_regNode();
11813     MachNode *m2 = new cmov_bns_lessNode();
11814 
11815     // inputs for new nodes
11816     m1->add_req(n_region, n_src1, n_src2);
11817     m2->add_req(n_region);
11818     m2->add_prec(m1);
11819 
11820     // operands for new nodes
11821     m1->_opnds[0] = op_crx;
11822     m1->_opnds[1] = op_src1;
11823     m1->_opnds[2] = op_src2;
11824     m2->_opnds[0] = op_crx;
11825 
11826     // registers for new nodes
11827     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11828     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11829 
11830     // Insert new nodes.
11831     nodes->push(m1);
11832     nodes->push(m2);
11833   %}
11834 %}
11835 
11836 // Compare double, generate -1,0,1
11837 instruct cmpD3_reg_reg(iRegIdst dst, regD src1, regD src2, flagsRegCR0 cr0) %{
11838   match(Set dst (CmpD3 src1 src2));
11839   effect(KILL cr0);
11840   ins_cost(DEFAULT_COST * 6);
11841   size(VM_Version::has_brw() ? 20 : 24);
11842 
11843   format %{ "cmpD3_reg_reg $dst, $src1, $src2" %}
11844 
11845   ins_encode %{
11846     __ fcmpu(CCR0, $src1$$FloatRegister, $src2$$FloatRegister);
11847     __ set_cmpu3($dst$$Register, true); // C2 requires unordered to get treated like less
11848   %}
11849   ins_pipe(pipe_class_default);
11850 %}
11851 
11852 // Compare char
11853 instruct cmprb_Digit_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11854   match(Set dst (Digit src1));
11855   effect(TEMP src2, TEMP crx);
11856   ins_cost(3 * DEFAULT_COST);
11857 
11858   format %{ "LI      $src2, 0x3930\n\t"
11859             "CMPRB   $crx, 0, $src1, $src2\n\t"
11860             "SETB    $dst, $crx" %}
11861   size(12);
11862   ins_encode %{
11863     // 0x30: 0, 0x39: 9
11864     __ li($src2$$Register, 0x3930);
11865     // compare src1 with ranges 0x30 to 0x39
11866     __ cmprb($crx$$CondRegister, 0, $src1$$Register, $src2$$Register);
11867     __ setb($dst$$Register, $crx$$CondRegister);
11868   %}
11869   ins_pipe(pipe_class_default);
11870 %}
11871 
11872 instruct cmprb_LowerCase_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11873   match(Set dst (LowerCase src1));
11874   effect(TEMP src2, TEMP crx);
11875   ins_cost(12 * DEFAULT_COST);
11876 
11877   format %{ "LI      $src2, 0x7A61\n\t"
11878             "CMPRB   $crx, 0, $src1, $src2\n\t"
11879             "BGT     $crx, done\n\t"
11880             "LIS     $src2, (signed short)0xF6DF\n\t"
11881             "ORI     $src2, $src2, 0xFFF8\n\t"
11882             "CMPRB   $crx, 1, $src1, $src2\n\t"
11883             "BGT     $crx, done\n\t"
11884             "LIS     $src2, (signed short)0xAAB5\n\t"
11885             "ORI     $src2, $src2, 0xBABA\n\t"
11886             "INSRDI  $src2, $src2, 32, 0\n\t"
11887             "CMPEQB  $crx, 1, $src1, $src2\n"
11888             "done:\n\t"
11889             "SETB    $dst, $crx" %}
11890 
11891   size(48);
11892   ins_encode %{
11893     Label done;
11894     // 0x61: a, 0x7A: z
11895     __ li($src2$$Register, 0x7A61);
11896     // compare src1 with ranges 0x61 to 0x7A
11897     __ cmprb($crx$$CondRegister, 0, $src1$$Register, $src2$$Register);
11898     __ bgt($crx$$CondRegister, done);
11899 
11900     // 0xDF: sharp s, 0xFF: y with diaeresis, 0xF7 is not the lower case
11901     __ lis($src2$$Register, (signed short)0xF6DF);
11902     __ ori($src2$$Register, $src2$$Register, 0xFFF8);
11903     // compare src1 with ranges 0xDF to 0xF6 and 0xF8 to 0xFF
11904     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
11905     __ bgt($crx$$CondRegister, done);
11906 
11907     // 0xAA: feminine ordinal indicator
11908     // 0xB5: micro sign
11909     // 0xBA: masculine ordinal indicator
11910     __ lis($src2$$Register, (signed short)0xAAB5);
11911     __ ori($src2$$Register, $src2$$Register, 0xBABA);
11912     __ insrdi($src2$$Register, $src2$$Register, 32, 0);
11913     // compare src1 with 0xAA, 0xB5, and 0xBA
11914     __ cmpeqb($crx$$CondRegister, $src1$$Register, $src2$$Register);
11915 
11916     __ bind(done);
11917     __ setb($dst$$Register, $crx$$CondRegister);
11918   %}
11919   ins_pipe(pipe_class_default);
11920 %}
11921 
11922 instruct cmprb_UpperCase_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11923   match(Set dst (UpperCase src1));
11924   effect(TEMP src2, TEMP crx);
11925   ins_cost(7 * DEFAULT_COST);
11926 
11927   format %{ "LI      $src2, 0x5A41\n\t"
11928             "CMPRB   $crx, 0, $src1, $src2\n\t"
11929             "BGT     $crx, done\n\t"
11930             "LIS     $src2, (signed short)0xD6C0\n\t"
11931             "ORI     $src2, $src2, 0xDED8\n\t"
11932             "CMPRB   $crx, 1, $src1, $src2\n"
11933             "done:\n\t"
11934             "SETB    $dst, $crx" %}
11935 
11936   size(28);
11937   ins_encode %{
11938     Label done;
11939     // 0x41: A, 0x5A: Z
11940     __ li($src2$$Register, 0x5A41);
11941     // compare src1 with a range 0x41 to 0x5A
11942     __ cmprb($crx$$CondRegister, 0, $src1$$Register, $src2$$Register);
11943     __ bgt($crx$$CondRegister, done);
11944 
11945     // 0xC0: a with grave, 0xDE: thorn, 0xD7 is not the upper case
11946     __ lis($src2$$Register, (signed short)0xD6C0);
11947     __ ori($src2$$Register, $src2$$Register, 0xDED8);
11948     // compare src1 with ranges 0xC0 to 0xD6 and 0xD8 to 0xDE
11949     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
11950 
11951     __ bind(done);
11952     __ setb($dst$$Register, $crx$$CondRegister);
11953   %}
11954   ins_pipe(pipe_class_default);
11955 %}
11956 
11957 instruct cmprb_Whitespace_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11958   match(Set dst (Whitespace src1));
11959   predicate(PowerArchitecturePPC64 <= 9);
11960   effect(TEMP src2, TEMP crx);
11961   ins_cost(4 * DEFAULT_COST);
11962 
11963   format %{ "LI      $src2, 0x0D09\n\t"
11964             "ADDIS   $src2, 0x201C\n\t"
11965             "CMPRB   $crx, 1, $src1, $src2\n\t"
11966             "SETB    $dst, $crx" %}
11967   size(16);
11968   ins_encode %{
11969     // 0x09 to 0x0D, 0x1C to 0x20
11970     __ li($src2$$Register, 0x0D09);
11971     __ addis($src2$$Register, $src2$$Register, 0x0201C);
11972     // compare src with ranges 0x09 to 0x0D and 0x1C to 0x20
11973     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
11974     __ setb($dst$$Register, $crx$$CondRegister);
11975   %}
11976   ins_pipe(pipe_class_default);
11977 %}
11978 
11979 // Power 10 version, using prefixed addi to load 32-bit constant
11980 instruct cmprb_Whitespace_reg_reg_prefixed(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11981   match(Set dst (Whitespace src1));
11982   predicate(PowerArchitecturePPC64 >= 10);
11983   effect(TEMP src2, TEMP crx);
11984   ins_cost(3 * DEFAULT_COST);
11985 
11986   format %{ "PLI     $src2, 0x201C0D09\n\t"
11987             "CMPRB   $crx, 1, $src1, $src2\n\t"
11988             "SETB    $dst, $crx" %}
11989   size(16);
11990   ins_encode %{
11991     // 0x09 to 0x0D, 0x1C to 0x20
11992     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
11993     __ pli($src2$$Register, 0x201C0D09);
11994     // compare src with ranges 0x09 to 0x0D and 0x1C to 0x20
11995     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
11996     __ setb($dst$$Register, $crx$$CondRegister);
11997   %}
11998   ins_pipe(pipe_class_default);
11999   ins_alignment(2);
12000 %}
12001 
12002 //----------Branches---------------------------------------------------------
12003 // Jump
12004 
12005 // Direct Branch.
12006 instruct branch(label labl) %{
12007   match(Goto);
12008   effect(USE labl);
12009   ins_cost(BRANCH_COST);
12010 
12011   format %{ "B       $labl" %}
12012   size(4);
12013   ins_encode %{
12014      Label d;    // dummy
12015      __ bind(d);
12016      Label* p = $labl$$label;
12017      // `p' is `NULL' when this encoding class is used only to
12018      // determine the size of the encoded instruction.
12019      Label& l = (NULL == p)? d : *(p);
12020      __ b(l);
12021   %}
12022   ins_pipe(pipe_class_default);
12023 %}
12024 
12025 // Conditional Near Branch
12026 instruct branchCon(cmpOp cmp, flagsRegSrc crx, label lbl) %{
12027   // Same match rule as `branchConFar'.
12028   match(If cmp crx);
12029   effect(USE lbl);
12030   ins_cost(BRANCH_COST);
12031 
12032   // If set to 1 this indicates that the current instruction is a
12033   // short variant of a long branch. This avoids using this
12034   // instruction in first-pass matching. It will then only be used in
12035   // the `Shorten_branches' pass.
12036   ins_short_branch(1);
12037 
12038   format %{ "B$cmp     $crx, $lbl" %}
12039   size(4);
12040   ins_encode( enc_bc(crx, cmp, lbl) );
12041   ins_pipe(pipe_class_default);
12042 %}
12043 
12044 // This is for cases when the ppc64 `bc' instruction does not
12045 // reach far enough. So we emit a far branch here, which is more
12046 // expensive.
12047 //
12048 // Conditional Far Branch
12049 instruct branchConFar(cmpOp cmp, flagsRegSrc crx, label lbl) %{
12050   // Same match rule as `branchCon'.
12051   match(If cmp crx);
12052   effect(USE crx, USE lbl);
12053   // Higher cost than `branchCon'.
12054   ins_cost(5*BRANCH_COST);
12055 
12056   // This is not a short variant of a branch, but the long variant.
12057   ins_short_branch(0);
12058 
12059   format %{ "B_FAR$cmp $crx, $lbl" %}
12060   size(8);
12061   ins_encode( enc_bc_far(crx, cmp, lbl) );
12062   ins_pipe(pipe_class_default);
12063 %}
12064 
12065 instruct branchLoopEnd(cmpOp cmp, flagsRegSrc crx, label labl) %{
12066   match(CountedLoopEnd cmp crx);
12067   effect(USE labl);
12068   ins_cost(BRANCH_COST);
12069 
12070   // short variant.
12071   ins_short_branch(1);
12072 
12073   format %{ "B$cmp     $crx, $labl \t// counted loop end" %}
12074   size(4);
12075   ins_encode( enc_bc(crx, cmp, labl) );
12076   ins_pipe(pipe_class_default);
12077 %}
12078 
12079 instruct branchLoopEndFar(cmpOp cmp, flagsRegSrc crx, label labl) %{
12080   match(CountedLoopEnd cmp crx);
12081   effect(USE labl);
12082   ins_cost(BRANCH_COST);
12083 
12084   // Long variant.
12085   ins_short_branch(0);
12086 
12087   format %{ "B_FAR$cmp $crx, $labl \t// counted loop end" %}
12088   size(8);
12089   ins_encode( enc_bc_far(crx, cmp, labl) );
12090   ins_pipe(pipe_class_default);
12091 %}
12092 
12093 // ============================================================================
12094 // Java runtime operations, intrinsics and other complex operations.
12095 
12096 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
12097 // array for an instance of the superklass. Set a hidden internal cache on a
12098 // hit (cache is checked with exposed code in gen_subtype_check()). Return
12099 // not zero for a miss or zero for a hit. The encoding ALSO sets flags.
12100 //
12101 // GL TODO: Improve this.
12102 // - result should not be a TEMP
12103 // - Add match rule as on sparc avoiding additional Cmp.
12104 instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P superklass,
12105                              iRegPdst tmp_klass, iRegPdst tmp_arrayptr) %{
12106   match(Set result (PartialSubtypeCheck subklass superklass));
12107   effect(TEMP_DEF result, TEMP tmp_klass, TEMP tmp_arrayptr);
12108   ins_cost(DEFAULT_COST*10);
12109 
12110   format %{ "PartialSubtypeCheck $result = ($subklass instanceOf $superklass) tmp: $tmp_klass, $tmp_arrayptr" %}
12111   ins_encode %{
12112     __ check_klass_subtype_slow_path($subklass$$Register, $superklass$$Register, $tmp_arrayptr$$Register,
12113                                      $tmp_klass$$Register, NULL, $result$$Register);
12114   %}
12115   ins_pipe(pipe_class_default);
12116 %}
12117 
12118 // inlined locking and unlocking
12119 
12120 instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2) %{
12121   match(Set crx (FastLock oop box));
12122   effect(TEMP tmp1, TEMP tmp2);
12123   predicate(!Compile::current()->use_rtm());
12124 
12125   format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2" %}
12126   ins_encode %{
12127     __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
12128                                  $tmp1$$Register, $tmp2$$Register, /*tmp3*/ R0);
12129     // If locking was successfull, crx should indicate 'EQ'.
12130     // The compiler generates a branch to the runtime call to
12131     // _complete_monitor_locking_Java for the case where crx is 'NE'.
12132   %}
12133   ins_pipe(pipe_class_compare);
12134 %}
12135 
12136 // Separate version for TM. Use bound register for box to enable USE_KILL.
12137 instruct cmpFastLock_tm(flagsReg crx, iRegPdst oop, rarg2RegP box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
12138   match(Set crx (FastLock oop box));
12139   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL box);
12140   predicate(Compile::current()->use_rtm());
12141 
12142   format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2, $tmp3 (TM)" %}
12143   ins_encode %{
12144     __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
12145                                  $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
12146                                  _rtm_counters, _stack_rtm_counters,
12147                                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12148                                  /*RTM*/ true, ra_->C->profile_rtm());
12149     // If locking was successfull, crx should indicate 'EQ'.
12150     // The compiler generates a branch to the runtime call to
12151     // _complete_monitor_locking_Java for the case where crx is 'NE'.
12152   %}
12153   ins_pipe(pipe_class_compare);
12154 %}
12155 
12156 instruct cmpFastUnlock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
12157   match(Set crx (FastUnlock oop box));
12158   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
12159   predicate(!Compile::current()->use_rtm());
12160 
12161   format %{ "FASTUNLOCK  $oop, $box, $tmp1, $tmp2" %}
12162   ins_encode %{
12163     __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
12164                                    $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
12165                                    false);
12166     // If unlocking was successfull, crx should indicate 'EQ'.
12167     // The compiler generates a branch to the runtime call to
12168     // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
12169   %}
12170   ins_pipe(pipe_class_compare);
12171 %}
12172 
12173 instruct cmpFastUnlock_tm(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
12174   match(Set crx (FastUnlock oop box));
12175   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
12176   predicate(Compile::current()->use_rtm());
12177 
12178   format %{ "FASTUNLOCK  $oop, $box, $tmp1, $tmp2 (TM)" %}
12179   ins_encode %{
12180     __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
12181                                    $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
12182                                    /*RTM*/ true);
12183     // If unlocking was successfull, crx should indicate 'EQ'.
12184     // The compiler generates a branch to the runtime call to
12185     // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
12186   %}
12187   ins_pipe(pipe_class_compare);
12188 %}
12189 
12190 // Align address.
12191 instruct align_addr(iRegPdst dst, iRegPsrc src, immLnegpow2 mask) %{
12192   match(Set dst (CastX2P (AndL (CastP2X src) mask)));
12193 
12194   format %{ "ANDDI   $dst, $src, $mask \t// next aligned address" %}
12195   size(4);
12196   ins_encode %{
12197     __ clrrdi($dst$$Register, $src$$Register, log2i_exact(-(julong)$mask$$constant));
12198   %}
12199   ins_pipe(pipe_class_default);
12200 %}
12201 
12202 // Array size computation.
12203 instruct array_size(iRegLdst dst, iRegPsrc end, iRegPsrc start) %{
12204   match(Set dst (SubL (CastP2X end) (CastP2X start)));
12205 
12206   format %{ "SUB     $dst, $end, $start \t// array size in bytes" %}
12207   size(4);
12208   ins_encode %{
12209     __ subf($dst$$Register, $start$$Register, $end$$Register);
12210   %}
12211   ins_pipe(pipe_class_default);
12212 %}
12213 
12214 // Clear-array with constant short array length. The versions below can use dcbz with cnt > 30.
12215 instruct inlineCallClearArrayShort(immLmax30 cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{
12216   match(Set dummy (ClearArray cnt base));
12217   effect(USE_KILL base, KILL ctr);
12218   ins_cost(2 * MEMORY_REF_COST);
12219 
12220   format %{ "ClearArray $cnt, $base" %}
12221   ins_encode %{
12222     __ clear_memory_constlen($base$$Register, $cnt$$constant, R0); // kills base, R0
12223   %}
12224   ins_pipe(pipe_class_default);
12225 %}
12226 
12227 // Clear-array with constant large array length.
12228 instruct inlineCallClearArrayLarge(immL cnt, rarg2RegP base, Universe dummy, iRegLdst tmp, regCTR ctr) %{
12229   match(Set dummy (ClearArray cnt base));
12230   effect(USE_KILL base, TEMP tmp, KILL ctr);
12231   ins_cost(3 * MEMORY_REF_COST);
12232 
12233   format %{ "ClearArray $cnt, $base \t// KILL $tmp" %}
12234   ins_encode %{
12235     __ clear_memory_doubleword($base$$Register, $tmp$$Register, R0, $cnt$$constant); // kills base, R0
12236   %}
12237   ins_pipe(pipe_class_default);
12238 %}
12239 
12240 // Clear-array with dynamic array length.
12241 instruct inlineCallClearArray(rarg1RegL cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{
12242   match(Set dummy (ClearArray cnt base));
12243   effect(USE_KILL cnt, USE_KILL base, KILL ctr);
12244   ins_cost(4 * MEMORY_REF_COST);
12245 
12246   format %{ "ClearArray $cnt, $base" %}
12247   ins_encode %{
12248     __ clear_memory_doubleword($base$$Register, $cnt$$Register, R0); // kills cnt, base, R0
12249   %}
12250   ins_pipe(pipe_class_default);
12251 %}
12252 
12253 instruct string_compareL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
12254                          iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12255   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12256   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12257   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
12258   ins_cost(300);
12259   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
12260   ins_encode %{
12261     __ string_compare($str1$$Register, $str2$$Register,
12262                       $cnt1$$Register, $cnt2$$Register,
12263                       $tmp$$Register,
12264                       $result$$Register, StrIntrinsicNode::LL);
12265   %}
12266   ins_pipe(pipe_class_default);
12267 %}
12268 
12269 instruct string_compareU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
12270                          iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12271   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
12272   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12273   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
12274   ins_cost(300);
12275   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
12276   ins_encode %{
12277     __ string_compare($str1$$Register, $str2$$Register,
12278                       $cnt1$$Register, $cnt2$$Register,
12279                       $tmp$$Register,
12280                       $result$$Register, StrIntrinsicNode::UU);
12281   %}
12282   ins_pipe(pipe_class_default);
12283 %}
12284 
12285 instruct string_compareLU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
12286                           iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12287   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
12288   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12289   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
12290   ins_cost(300);
12291   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
12292   ins_encode %{
12293     __ string_compare($str1$$Register, $str2$$Register,
12294                       $cnt1$$Register, $cnt2$$Register,
12295                       $tmp$$Register,
12296                       $result$$Register, StrIntrinsicNode::LU);
12297   %}
12298   ins_pipe(pipe_class_default);
12299 %}
12300 
12301 instruct string_compareUL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
12302                           iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12303   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
12304   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12305   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
12306   ins_cost(300);
12307   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
12308   ins_encode %{
12309     __ string_compare($str2$$Register, $str1$$Register,
12310                       $cnt2$$Register, $cnt1$$Register,
12311                       $tmp$$Register,
12312                       $result$$Register, StrIntrinsicNode::UL);
12313   %}
12314   ins_pipe(pipe_class_default);
12315 %}
12316 
12317 instruct string_equalsL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt, iRegIdst result,
12318                         iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12319   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
12320   match(Set result (StrEquals (Binary str1 str2) cnt));
12321   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp, KILL ctr, KILL cr0);
12322   ins_cost(300);
12323   format %{ "String Equals byte[] $str1,$str2,$cnt -> $result \t// KILL $tmp" %}
12324   ins_encode %{
12325     __ array_equals(false, $str1$$Register, $str2$$Register,
12326                     $cnt$$Register, $tmp$$Register,
12327                     $result$$Register, true /* byte */);
12328   %}
12329   ins_pipe(pipe_class_default);
12330 %}
12331 
12332 instruct string_equalsU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt, iRegIdst result,
12333                         iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12334   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
12335   match(Set result (StrEquals (Binary str1 str2) cnt));
12336   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp, KILL ctr, KILL cr0);
12337   ins_cost(300);
12338   format %{ "String Equals char[]  $str1,$str2,$cnt -> $result \t// KILL $tmp" %}
12339   ins_encode %{
12340     __ array_equals(false, $str1$$Register, $str2$$Register,
12341                     $cnt$$Register, $tmp$$Register,
12342                     $result$$Register, false /* byte */);
12343   %}
12344   ins_pipe(pipe_class_default);
12345 %}
12346 
12347 instruct array_equalsB(rarg1RegP ary1, rarg2RegP ary2, iRegIdst result,
12348                        iRegIdst tmp1, iRegIdst tmp2, regCTR ctr, flagsRegCR0 cr0, flagsRegCR0 cr1) %{
12349   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12350   match(Set result (AryEq ary1 ary2));
12351   effect(TEMP_DEF result, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0, KILL cr1);
12352   ins_cost(300);
12353   format %{ "Array Equals $ary1,$ary2 -> $result \t// KILL $tmp1,$tmp2" %}
12354   ins_encode %{
12355     __ array_equals(true, $ary1$$Register, $ary2$$Register,
12356                     $tmp1$$Register, $tmp2$$Register,
12357                     $result$$Register, true /* byte */);
12358   %}
12359   ins_pipe(pipe_class_default);
12360 %}
12361 
12362 instruct array_equalsC(rarg1RegP ary1, rarg2RegP ary2, iRegIdst result,
12363                        iRegIdst tmp1, iRegIdst tmp2, regCTR ctr, flagsRegCR0 cr0, flagsRegCR0 cr1) %{
12364   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12365   match(Set result (AryEq ary1 ary2));
12366   effect(TEMP_DEF result, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0, KILL cr1);
12367   ins_cost(300);
12368   format %{ "Array Equals $ary1,$ary2 -> $result \t// KILL $tmp1,$tmp2" %}
12369   ins_encode %{
12370     __ array_equals(true, $ary1$$Register, $ary2$$Register,
12371                     $tmp1$$Register, $tmp2$$Register,
12372                     $result$$Register, false /* byte */);
12373   %}
12374   ins_pipe(pipe_class_default);
12375 %}
12376 
12377 instruct indexOf_imm1_char_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12378                              immP needleImm, immL offsetImm, immI_1 needlecntImm,
12379                              iRegIdst tmp1, iRegIdst tmp2,
12380                              flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12381   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
12382   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12383   // Required for EA: check if it is still a type_array.
12384   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
12385   ins_cost(150);
12386 
12387   format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
12388             "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12389 
12390   ins_encode %{
12391     immPOper *needleOper = (immPOper *)$needleImm;
12392     const TypeOopPtr *t = needleOper->type()->isa_oopptr();
12393     ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
12394     jchar chr;
12395 #ifdef VM_LITTLE_ENDIAN
12396     chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) |
12397            ((jchar)(unsigned char)needle_values->element_value(0).as_byte());
12398 #else
12399     chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
12400            ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
12401 #endif
12402     __ string_indexof_char($result$$Register,
12403                            $haystack$$Register, $haycnt$$Register,
12404                            R0, chr,
12405                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12406   %}
12407   ins_pipe(pipe_class_compare);
12408 %}
12409 
12410 instruct indexOf_imm1_char_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12411                              immP needleImm, immL offsetImm, immI_1 needlecntImm,
12412                              iRegIdst tmp1, iRegIdst tmp2,
12413                              flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12414   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
12415   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12416   // Required for EA: check if it is still a type_array.
12417   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
12418   ins_cost(150);
12419 
12420   format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
12421             "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12422 
12423   ins_encode %{
12424     immPOper *needleOper = (immPOper *)$needleImm;
12425     const TypeOopPtr *t = needleOper->type()->isa_oopptr();
12426     ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
12427     jchar chr = (jchar)needle_values->element_value(0).as_byte();
12428     __ string_indexof_char($result$$Register,
12429                            $haystack$$Register, $haycnt$$Register,
12430                            R0, chr,
12431                            $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
12432   %}
12433   ins_pipe(pipe_class_compare);
12434 %}
12435 
12436 instruct indexOf_imm1_char_UL(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12437                               immP needleImm, immL offsetImm, immI_1 needlecntImm,
12438                               iRegIdst tmp1, iRegIdst tmp2,
12439                               flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12440   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
12441   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12442   // Required for EA: check if it is still a type_array.
12443   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
12444   ins_cost(150);
12445 
12446   format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
12447             "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12448 
12449   ins_encode %{
12450     immPOper *needleOper = (immPOper *)$needleImm;
12451     const TypeOopPtr *t = needleOper->type()->isa_oopptr();
12452     ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
12453     jchar chr = (jchar)needle_values->element_value(0).as_byte();
12454     __ string_indexof_char($result$$Register,
12455                            $haystack$$Register, $haycnt$$Register,
12456                            R0, chr,
12457                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12458   %}
12459   ins_pipe(pipe_class_compare);
12460 %}
12461 
12462 instruct indexOf_imm1_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12463                         rscratch2RegP needle, immI_1 needlecntImm,
12464                         iRegIdst tmp1, iRegIdst tmp2,
12465                         flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12466   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12467   effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12468   // Required for EA: check if it is still a type_array.
12469   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU &&
12470             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12471             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12472   ins_cost(180);
12473 
12474   format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12475             " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
12476   ins_encode %{
12477     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12478     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12479     guarantee(needle_values, "sanity");
12480     jchar chr;
12481 #ifdef VM_LITTLE_ENDIAN
12482     chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) |
12483            ((jchar)(unsigned char)needle_values->element_value(0).as_byte());
12484 #else
12485     chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
12486            ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
12487 #endif
12488     __ string_indexof_char($result$$Register,
12489                            $haystack$$Register, $haycnt$$Register,
12490                            R0, chr,
12491                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12492   %}
12493   ins_pipe(pipe_class_compare);
12494 %}
12495 
12496 instruct indexOf_imm1_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12497                         rscratch2RegP needle, immI_1 needlecntImm,
12498                         iRegIdst tmp1, iRegIdst tmp2,
12499                         flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12500   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12501   effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12502   // Required for EA: check if it is still a type_array.
12503   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL &&
12504             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12505             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12506   ins_cost(180);
12507 
12508   format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12509             " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
12510   ins_encode %{
12511     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12512     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12513     guarantee(needle_values, "sanity");
12514     jchar chr = (jchar)needle_values->element_value(0).as_byte();
12515     __ string_indexof_char($result$$Register,
12516                            $haystack$$Register, $haycnt$$Register,
12517                            R0, chr,
12518                            $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
12519   %}
12520   ins_pipe(pipe_class_compare);
12521 %}
12522 
12523 instruct indexOf_imm1_UL(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12524                          rscratch2RegP needle, immI_1 needlecntImm,
12525                          iRegIdst tmp1, iRegIdst tmp2,
12526                          flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12527   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12528   effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12529   // Required for EA: check if it is still a type_array.
12530   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL &&
12531             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12532             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12533   ins_cost(180);
12534 
12535   format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12536             " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
12537   ins_encode %{
12538     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12539     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12540     guarantee(needle_values, "sanity");
12541     jchar chr = (jchar)needle_values->element_value(0).as_byte();
12542     __ string_indexof_char($result$$Register,
12543                            $haystack$$Register, $haycnt$$Register,
12544                            R0, chr,
12545                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12546   %}
12547   ins_pipe(pipe_class_compare);
12548 %}
12549 
12550 instruct indexOfChar_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12551                        iRegIsrc ch, iRegIdst tmp1, iRegIdst tmp2,
12552                        flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12553   match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
12554   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12555   predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
12556   ins_cost(180);
12557 
12558   format %{ "StringUTF16 IndexOfChar $haystack[0..$haycnt], $ch"
12559             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12560   ins_encode %{
12561     __ string_indexof_char($result$$Register,
12562                            $haystack$$Register, $haycnt$$Register,
12563                            $ch$$Register, 0 /* this is not used if the character is already in a register */,
12564                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12565   %}
12566   ins_pipe(pipe_class_compare);
12567 %}
12568 
12569 instruct indexOfChar_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12570                        iRegIsrc ch, iRegIdst tmp1, iRegIdst tmp2,
12571                        flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12572   match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
12573   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12574   predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
12575   ins_cost(180);
12576 
12577   format %{ "StringLatin1 IndexOfChar $haystack[0..$haycnt], $ch"
12578             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12579   ins_encode %{
12580     __ string_indexof_char($result$$Register,
12581                            $haystack$$Register, $haycnt$$Register,
12582                            $ch$$Register, 0 /* this is not used if the character is already in a register */,
12583                            $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
12584   %}
12585   ins_pipe(pipe_class_compare);
12586 %}
12587 
12588 instruct indexOf_imm_U(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
12589                        iRegPsrc needle, uimmI15 needlecntImm,
12590                        iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
12591                        flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12592   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12593   effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
12594          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12595   // Required for EA: check if it is still a type_array.
12596   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU &&
12597             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12598             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12599   ins_cost(250);
12600 
12601   format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12602             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
12603   ins_encode %{
12604     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12605     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12606 
12607     __ string_indexof($result$$Register,
12608                       $haystack$$Register, $haycnt$$Register,
12609                       $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
12610                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UU);
12611   %}
12612   ins_pipe(pipe_class_compare);
12613 %}
12614 
12615 instruct indexOf_imm_L(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
12616                        iRegPsrc needle, uimmI15 needlecntImm,
12617                        iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
12618                        flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12619   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12620   effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
12621          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12622   // Required for EA: check if it is still a type_array.
12623   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL &&
12624             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12625             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12626   ins_cost(250);
12627 
12628   format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12629             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
12630   ins_encode %{
12631     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12632     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12633 
12634     __ string_indexof($result$$Register,
12635                       $haystack$$Register, $haycnt$$Register,
12636                       $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
12637                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::LL);
12638   %}
12639   ins_pipe(pipe_class_compare);
12640 %}
12641 
12642 instruct indexOf_imm_UL(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
12643                         iRegPsrc needle, uimmI15 needlecntImm,
12644                         iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
12645                         flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12646   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12647   effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
12648          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12649   // Required for EA: check if it is still a type_array.
12650   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL &&
12651             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12652             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12653   ins_cost(250);
12654 
12655   format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12656             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
12657   ins_encode %{
12658     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12659     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12660 
12661     __ string_indexof($result$$Register,
12662                       $haystack$$Register, $haycnt$$Register,
12663                       $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
12664                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UL);
12665   %}
12666   ins_pipe(pipe_class_compare);
12667 %}
12668 
12669 instruct indexOf_U(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
12670                    iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
12671                    flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12672   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
12673   effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
12674          TEMP_DEF result,
12675          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12676   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
12677   ins_cost(300);
12678 
12679   format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
12680              " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
12681   ins_encode %{
12682     __ string_indexof($result$$Register,
12683                       $haystack$$Register, $haycnt$$Register,
12684                       $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
12685                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UU);
12686   %}
12687   ins_pipe(pipe_class_compare);
12688 %}
12689 
12690 instruct indexOf_L(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
12691                    iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
12692                    flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12693   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
12694   effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
12695          TEMP_DEF result,
12696          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12697   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
12698   ins_cost(300);
12699 
12700   format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
12701              " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
12702   ins_encode %{
12703     __ string_indexof($result$$Register,
12704                       $haystack$$Register, $haycnt$$Register,
12705                       $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
12706                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::LL);
12707   %}
12708   ins_pipe(pipe_class_compare);
12709 %}
12710 
12711 instruct indexOf_UL(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
12712                     iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
12713                     flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12714   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
12715   effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
12716          TEMP_DEF result,
12717          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12718   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
12719   ins_cost(300);
12720 
12721   format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
12722              " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
12723   ins_encode %{
12724     __ string_indexof($result$$Register,
12725                       $haystack$$Register, $haycnt$$Register,
12726                       $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
12727                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UL);
12728   %}
12729   ins_pipe(pipe_class_compare);
12730 %}
12731 
12732 // char[] to byte[] compression
12733 instruct string_compress(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1,
12734                          iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
12735   match(Set result (StrCompressedCopy src (Binary dst len)));
12736   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
12737          USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
12738   ins_cost(300);
12739   format %{ "String Compress $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
12740   ins_encode %{
12741     Label Lskip, Ldone;
12742     __ li($result$$Register, 0);
12743     __ string_compress_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
12744                           $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, Ldone);
12745     __ rldicl_($tmp1$$Register, $len$$Register, 0, 64-3); // Remaining characters.
12746     __ beq(CCR0, Lskip);
12747     __ string_compress($src$$Register, $dst$$Register, $tmp1$$Register, $tmp2$$Register, Ldone);
12748     __ bind(Lskip);
12749     __ mr($result$$Register, $len$$Register);
12750     __ bind(Ldone);
12751   %}
12752   ins_pipe(pipe_class_default);
12753 %}
12754 
12755 // byte[] to char[] inflation
12756 instruct string_inflate(Universe dummy, rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegLdst tmp1,
12757                         iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
12758   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12759   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
12760   ins_cost(300);
12761   format %{ "String Inflate $src,$dst,$len \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
12762   ins_encode %{
12763     Label Ldone;
12764     __ string_inflate_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
12765                          $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register);
12766     __ rldicl_($tmp1$$Register, $len$$Register, 0, 64-3); // Remaining characters.
12767     __ beq(CCR0, Ldone);
12768     __ string_inflate($src$$Register, $dst$$Register, $tmp1$$Register, $tmp2$$Register);
12769     __ bind(Ldone);
12770   %}
12771   ins_pipe(pipe_class_default);
12772 %}
12773 
12774 // StringCoding.java intrinsics
12775 instruct has_negatives(rarg1RegP ary1, iRegIsrc len, iRegIdst result, iRegLdst tmp1, iRegLdst tmp2,
12776                        regCTR ctr, flagsRegCR0 cr0)
12777 %{
12778   match(Set result (HasNegatives ary1 len));
12779   effect(TEMP_DEF result, USE_KILL ary1, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0);
12780   ins_cost(300);
12781   format %{ "has negatives byte[] $ary1,$len -> $result \t// KILL $tmp1, $tmp2" %}
12782   ins_encode %{
12783     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register,
12784                      $tmp1$$Register, $tmp2$$Register);
12785   %}
12786   ins_pipe(pipe_class_default);
12787 %}
12788 
12789 // encode char[] to byte[] in ISO_8859_1
12790 instruct encode_iso_array(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1,
12791                           iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
12792   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12793   match(Set result (EncodeISOArray src (Binary dst len)));
12794   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
12795          USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
12796   ins_cost(300);
12797   format %{ "Encode array $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
12798   ins_encode %{
12799     Label Lslow, Lfailure1, Lfailure2, Ldone;
12800     __ string_compress_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
12801                           $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, Lfailure1);
12802     __ rldicl_($result$$Register, $len$$Register, 0, 64-3); // Remaining characters.
12803     __ beq(CCR0, Ldone);
12804     __ bind(Lslow);
12805     __ string_compress($src$$Register, $dst$$Register, $result$$Register, $tmp2$$Register, Lfailure2);
12806     __ li($result$$Register, 0);
12807     __ b(Ldone);
12808 
12809     __ bind(Lfailure1);
12810     __ mr($result$$Register, $len$$Register);
12811     __ mfctr($tmp1$$Register);
12812     __ rldimi_($result$$Register, $tmp1$$Register, 3, 0); // Remaining characters.
12813     __ beq(CCR0, Ldone);
12814     __ b(Lslow);
12815 
12816     __ bind(Lfailure2);
12817     __ mfctr($result$$Register); // Remaining characters.
12818 
12819     __ bind(Ldone);
12820     __ subf($result$$Register, $result$$Register, $len$$Register);
12821   %}
12822   ins_pipe(pipe_class_default);
12823 %}
12824 
12825 
12826 //---------- Min/Max Instructions ---------------------------------------------
12827 
12828 instruct minI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
12829   match(Set dst (MinI src1 src2));
12830   ins_cost(DEFAULT_COST*6);
12831 
12832   expand %{
12833     iRegLdst src1s;
12834     iRegLdst src2s;
12835     iRegLdst diff;
12836     iRegLdst sm;
12837     iRegLdst doz; // difference or zero
12838     convI2L_reg(src1s, src1); // Ensure proper sign extension.
12839     convI2L_reg(src2s, src2); // Ensure proper sign extension.
12840     subL_reg_reg(diff, src2s, src1s);
12841     // Need to consider >=33 bit result, therefore we need signmaskL.
12842     signmask64L_regL(sm, diff);
12843     andL_reg_reg(doz, diff, sm); // <=0
12844     addI_regL_regL(dst, doz, src1s);
12845   %}
12846 %}
12847 
12848 instruct minI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
12849   match(Set dst (MinI src1 src2));
12850   effect(KILL cr0);
12851   predicate(VM_Version::has_isel());
12852   ins_cost(DEFAULT_COST*2);
12853 
12854   ins_encode %{
12855     __ cmpw(CCR0, $src1$$Register, $src2$$Register);
12856     __ isel($dst$$Register, CCR0, Assembler::less, /*invert*/false, $src1$$Register, $src2$$Register);
12857   %}
12858   ins_pipe(pipe_class_default);
12859 %}
12860 
12861 instruct maxI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
12862   match(Set dst (MaxI src1 src2));
12863   ins_cost(DEFAULT_COST*6);
12864 
12865   expand %{
12866     iRegLdst src1s;
12867     iRegLdst src2s;
12868     iRegLdst diff;
12869     iRegLdst sm;
12870     iRegLdst doz; // difference or zero
12871     convI2L_reg(src1s, src1); // Ensure proper sign extension.
12872     convI2L_reg(src2s, src2); // Ensure proper sign extension.
12873     subL_reg_reg(diff, src2s, src1s);
12874     // Need to consider >=33 bit result, therefore we need signmaskL.
12875     signmask64L_regL(sm, diff);
12876     andcL_reg_reg(doz, diff, sm); // >=0
12877     addI_regL_regL(dst, doz, src1s);
12878   %}
12879 %}
12880 
12881 instruct maxI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
12882   match(Set dst (MaxI src1 src2));
12883   effect(KILL cr0);
12884   predicate(VM_Version::has_isel());
12885   ins_cost(DEFAULT_COST*2);
12886 
12887   ins_encode %{
12888     __ cmpw(CCR0, $src1$$Register, $src2$$Register);
12889     __ isel($dst$$Register, CCR0, Assembler::greater, /*invert*/false, $src1$$Register, $src2$$Register);
12890   %}
12891   ins_pipe(pipe_class_default);
12892 %}
12893 
12894 //---------- Population Count Instructions ------------------------------------
12895 
12896 // Popcnt for Power7.
12897 instruct popCountI(iRegIdst dst, iRegIsrc src) %{
12898   match(Set dst (PopCountI src));
12899   predicate(UsePopCountInstruction && VM_Version::has_popcntw());
12900   ins_cost(DEFAULT_COST);
12901 
12902   format %{ "POPCNTW $dst, $src" %}
12903   size(4);
12904   ins_encode %{
12905     __ popcntw($dst$$Register, $src$$Register);
12906   %}
12907   ins_pipe(pipe_class_default);
12908 %}
12909 
12910 // Popcnt for Power7.
12911 instruct popCountL(iRegIdst dst, iRegLsrc src) %{
12912   predicate(UsePopCountInstruction && VM_Version::has_popcntw());
12913   match(Set dst (PopCountL src));
12914   ins_cost(DEFAULT_COST);
12915 
12916   format %{ "POPCNTD $dst, $src" %}
12917   size(4);
12918   ins_encode %{
12919     __ popcntd($dst$$Register, $src$$Register);
12920   %}
12921   ins_pipe(pipe_class_default);
12922 %}
12923 
12924 instruct countLeadingZerosI(iRegIdst dst, iRegIsrc src) %{
12925   match(Set dst (CountLeadingZerosI src));
12926   predicate(UseCountLeadingZerosInstructionsPPC64);  // See Matcher::match_rule_supported.
12927   ins_cost(DEFAULT_COST);
12928 
12929   format %{ "CNTLZW  $dst, $src" %}
12930   size(4);
12931   ins_encode %{
12932     __ cntlzw($dst$$Register, $src$$Register);
12933   %}
12934   ins_pipe(pipe_class_default);
12935 %}
12936 
12937 instruct countLeadingZerosL(iRegIdst dst, iRegLsrc src) %{
12938   match(Set dst (CountLeadingZerosL src));
12939   predicate(UseCountLeadingZerosInstructionsPPC64);  // See Matcher::match_rule_supported.
12940   ins_cost(DEFAULT_COST);
12941 
12942   format %{ "CNTLZD  $dst, $src" %}
12943   size(4);
12944   ins_encode %{
12945     __ cntlzd($dst$$Register, $src$$Register);
12946   %}
12947   ins_pipe(pipe_class_default);
12948 %}
12949 
12950 instruct countLeadingZerosP(iRegIdst dst, iRegPsrc src) %{
12951   // no match-rule, false predicate
12952   effect(DEF dst, USE src);
12953   predicate(false);
12954 
12955   format %{ "CNTLZD  $dst, $src" %}
12956   size(4);
12957   ins_encode %{
12958     __ cntlzd($dst$$Register, $src$$Register);
12959   %}
12960   ins_pipe(pipe_class_default);
12961 %}
12962 
12963 instruct countTrailingZerosI_Ex(iRegIdst dst, iRegIsrc src) %{
12964   match(Set dst (CountTrailingZerosI src));
12965   predicate(UseCountLeadingZerosInstructionsPPC64 && !UseCountTrailingZerosInstructionsPPC64);
12966   ins_cost(DEFAULT_COST);
12967 
12968   expand %{
12969     immI16 imm1 %{ (int)-1 %}
12970     immI16 imm2 %{ (int)32 %}
12971     immI_minus1 m1 %{ -1 %}
12972     iRegIdst tmpI1;
12973     iRegIdst tmpI2;
12974     iRegIdst tmpI3;
12975     addI_reg_imm16(tmpI1, src, imm1);
12976     andcI_reg_reg(tmpI2, src, m1, tmpI1);
12977     countLeadingZerosI(tmpI3, tmpI2);
12978     subI_imm16_reg(dst, imm2, tmpI3);
12979   %}
12980 %}
12981 
12982 instruct countTrailingZerosI_cnttzw(iRegIdst dst, iRegIsrc src) %{
12983   match(Set dst (CountTrailingZerosI src));
12984   predicate(UseCountTrailingZerosInstructionsPPC64);
12985   ins_cost(DEFAULT_COST);
12986 
12987   format %{ "CNTTZW  $dst, $src" %}
12988   size(4);
12989   ins_encode %{
12990     __ cnttzw($dst$$Register, $src$$Register);
12991   %}
12992   ins_pipe(pipe_class_default);
12993 %}
12994 
12995 instruct countTrailingZerosL_Ex(iRegIdst dst, iRegLsrc src) %{
12996   match(Set dst (CountTrailingZerosL src));
12997   predicate(UseCountLeadingZerosInstructionsPPC64 && !UseCountTrailingZerosInstructionsPPC64);
12998   ins_cost(DEFAULT_COST);
12999 
13000   expand %{
13001     immL16 imm1 %{ (long)-1 %}
13002     immI16 imm2 %{ (int)64 %}
13003     iRegLdst tmpL1;
13004     iRegLdst tmpL2;
13005     iRegIdst tmpL3;
13006     addL_reg_imm16(tmpL1, src, imm1);
13007     andcL_reg_reg(tmpL2, tmpL1, src);
13008     countLeadingZerosL(tmpL3, tmpL2);
13009     subI_imm16_reg(dst, imm2, tmpL3);
13010  %}
13011 %}
13012 
13013 instruct countTrailingZerosL_cnttzd(iRegIdst dst, iRegLsrc src) %{
13014   match(Set dst (CountTrailingZerosL src));
13015   predicate(UseCountTrailingZerosInstructionsPPC64);
13016   ins_cost(DEFAULT_COST);
13017 
13018   format %{ "CNTTZD  $dst, $src" %}
13019   size(4);
13020   ins_encode %{
13021     __ cnttzd($dst$$Register, $src$$Register);
13022   %}
13023   ins_pipe(pipe_class_default);
13024 %}
13025 
13026 // Expand nodes for byte_reverse_int.
13027 instruct insrwi_a(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
13028   effect(DEF dst, USE src, USE pos, USE shift);
13029   predicate(false);
13030 
13031   format %{ "INSRWI  $dst, $src, $pos, $shift" %}
13032   size(4);
13033   ins_encode %{
13034     __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant);
13035   %}
13036   ins_pipe(pipe_class_default);
13037 %}
13038 
13039 // As insrwi_a, but with USE_DEF.
13040 instruct insrwi(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
13041   effect(USE_DEF dst, USE src, USE pos, USE shift);
13042   predicate(false);
13043 
13044   format %{ "INSRWI  $dst, $src, $pos, $shift" %}
13045   size(4);
13046   ins_encode %{
13047     __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant);
13048   %}
13049   ins_pipe(pipe_class_default);
13050 %}
13051 
13052 // Just slightly faster than java implementation.
13053 instruct bytes_reverse_int_Ex(iRegIdst dst, iRegIsrc src) %{
13054   match(Set dst (ReverseBytesI src));
13055   predicate(!UseByteReverseInstructions);
13056   ins_cost(7*DEFAULT_COST);
13057 
13058   expand %{
13059     immI16 imm24 %{ (int) 24 %}
13060     immI16 imm16 %{ (int) 16 %}
13061     immI16  imm8 %{ (int)  8 %}
13062     immI16  imm4 %{ (int)  4 %}
13063     immI16  imm0 %{ (int)  0 %}
13064     iRegLdst tmpI1;
13065     iRegLdst tmpI2;
13066     iRegLdst tmpI3;
13067 
13068     urShiftI_reg_imm(tmpI1, src, imm24);
13069     insrwi_a(dst, tmpI1, imm24, imm8);
13070     urShiftI_reg_imm(tmpI2, src, imm16);
13071     insrwi(dst, tmpI2, imm8, imm16);
13072     urShiftI_reg_imm(tmpI3, src, imm8);
13073     insrwi(dst, tmpI3, imm8, imm8);
13074     insrwi(dst, src, imm0, imm8);
13075   %}
13076 %}
13077 
13078 instruct bytes_reverse_int_vec(iRegIdst dst, iRegIsrc src, vecX tmpV) %{
13079   match(Set dst (ReverseBytesI src));
13080   predicate(UseVectorByteReverseInstructionsPPC64);
13081   effect(TEMP tmpV);
13082   ins_cost(DEFAULT_COST*3);
13083   size(12);
13084   format %{ "MTVSRWZ $tmpV, $src\n"
13085             "\tXXBRW   $tmpV, $tmpV\n"
13086             "\tMFVSRWZ $dst, $tmpV" %}
13087 
13088   ins_encode %{
13089     __ mtvsrwz($tmpV$$VectorSRegister, $src$$Register);
13090     __ xxbrw($tmpV$$VectorSRegister, $tmpV$$VectorSRegister);
13091     __ mfvsrwz($dst$$Register, $tmpV$$VectorSRegister);
13092   %}
13093   ins_pipe(pipe_class_default);
13094 %}
13095 
13096 instruct bytes_reverse_int(iRegIdst dst, iRegIsrc src) %{
13097   match(Set dst (ReverseBytesI src));
13098   predicate(UseByteReverseInstructions);
13099   ins_cost(DEFAULT_COST);
13100   size(4);
13101 
13102   format %{ "BRW  $dst, $src" %}
13103 
13104   ins_encode %{
13105     __ brw($dst$$Register, $src$$Register);
13106   %}
13107   ins_pipe(pipe_class_default);
13108 %}
13109 
13110 instruct bytes_reverse_long_Ex(iRegLdst dst, iRegLsrc src) %{
13111   match(Set dst (ReverseBytesL src));
13112   predicate(!UseByteReverseInstructions);
13113   ins_cost(15*DEFAULT_COST);
13114 
13115   expand %{
13116     immI16 imm56 %{ (int) 56 %}
13117     immI16 imm48 %{ (int) 48 %}
13118     immI16 imm40 %{ (int) 40 %}
13119     immI16 imm32 %{ (int) 32 %}
13120     immI16 imm24 %{ (int) 24 %}
13121     immI16 imm16 %{ (int) 16 %}
13122     immI16  imm8 %{ (int)  8 %}
13123     immI16  imm0 %{ (int)  0 %}
13124     iRegLdst tmpL1;
13125     iRegLdst tmpL2;
13126     iRegLdst tmpL3;
13127     iRegLdst tmpL4;
13128     iRegLdst tmpL5;
13129     iRegLdst tmpL6;
13130 
13131                                         // src   : |a|b|c|d|e|f|g|h|
13132     rldicl(tmpL1, src, imm8, imm24);    // tmpL1 : | | | |e|f|g|h|a|
13133     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |a| | | |e|
13134     rldicl(tmpL3, tmpL2, imm32, imm0);  // tmpL3 : | | | |e| | | |a|
13135     rldicl(tmpL1, src, imm16, imm24);   // tmpL1 : | | | |f|g|h|a|b|
13136     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |b| | | |f|
13137     rldicl(tmpL4, tmpL2, imm40, imm0);  // tmpL4 : | | |f| | | |b| |
13138     orL_reg_reg(tmpL5, tmpL3, tmpL4);   // tmpL5 : | | |f|e| | |b|a|
13139     rldicl(tmpL1, src, imm24, imm24);   // tmpL1 : | | | |g|h|a|b|c|
13140     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |c| | | |g|
13141     rldicl(tmpL3, tmpL2, imm48, imm0);  // tmpL3 : | |g| | | |c| | |
13142     rldicl(tmpL1, src, imm32, imm24);   // tmpL1 : | | | |h|a|b|c|d|
13143     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |d| | | |h|
13144     rldicl(tmpL4, tmpL2, imm56, imm0);  // tmpL4 : |h| | | |d| | | |
13145     orL_reg_reg(tmpL6, tmpL3, tmpL4);   // tmpL6 : |h|g| | |d|c| | |
13146     orL_reg_reg(dst, tmpL5, tmpL6);     // dst   : |h|g|f|e|d|c|b|a|
13147   %}
13148 %}
13149 
13150 instruct bytes_reverse_long_vec(iRegLdst dst, iRegLsrc src, vecX tmpV) %{
13151   match(Set dst (ReverseBytesL src));
13152   predicate(UseVectorByteReverseInstructionsPPC64);
13153   effect(TEMP tmpV);
13154   ins_cost(DEFAULT_COST*3);
13155   size(12);
13156   format %{ "MTVSRD  $tmpV, $src\n"
13157             "\tXXBRD   $tmpV, $tmpV\n"
13158             "\tMFVSRD  $dst, $tmpV" %}
13159 
13160   ins_encode %{
13161     __ mtvsrd($tmpV$$VectorSRegister, $src$$Register);
13162     __ xxbrd($tmpV$$VectorSRegister, $tmpV$$VectorSRegister);
13163     __ mfvsrd($dst$$Register, $tmpV$$VectorSRegister);
13164   %}
13165   ins_pipe(pipe_class_default);
13166 %}
13167 
13168 instruct bytes_reverse_long(iRegLdst dst, iRegLsrc src) %{
13169   match(Set dst (ReverseBytesL src));
13170   predicate(UseByteReverseInstructions);
13171   ins_cost(DEFAULT_COST);
13172   size(4);
13173 
13174   format %{ "BRD  $dst, $src" %}
13175 
13176   ins_encode %{
13177     __ brd($dst$$Register, $src$$Register);
13178   %}
13179   ins_pipe(pipe_class_default);
13180 %}
13181 
13182 instruct bytes_reverse_ushort_Ex(iRegIdst dst, iRegIsrc src) %{
13183   match(Set dst (ReverseBytesUS src));
13184   predicate(!UseByteReverseInstructions);
13185   ins_cost(2*DEFAULT_COST);
13186 
13187   expand %{
13188     immI16  imm16 %{ (int) 16 %}
13189     immI16   imm8 %{ (int)  8 %}
13190 
13191     urShiftI_reg_imm(dst, src, imm8);
13192     insrwi(dst, src, imm16, imm8);
13193   %}
13194 %}
13195 
13196 instruct bytes_reverse_ushort(iRegIdst dst, iRegIsrc src) %{
13197   match(Set dst (ReverseBytesUS src));
13198   predicate(UseByteReverseInstructions);
13199   ins_cost(DEFAULT_COST);
13200   size(4);
13201 
13202   format %{ "BRH  $dst, $src" %}
13203 
13204   ins_encode %{
13205     __ brh($dst$$Register, $src$$Register);
13206   %}
13207   ins_pipe(pipe_class_default);
13208 %}
13209 
13210 instruct bytes_reverse_short_Ex(iRegIdst dst, iRegIsrc src) %{
13211   match(Set dst (ReverseBytesS src));
13212   predicate(!UseByteReverseInstructions);
13213   ins_cost(3*DEFAULT_COST);
13214 
13215   expand %{
13216     immI16  imm16 %{ (int) 16 %}
13217     immI16   imm8 %{ (int)  8 %}
13218     iRegLdst tmpI1;
13219 
13220     urShiftI_reg_imm(tmpI1, src, imm8);
13221     insrwi(tmpI1, src, imm16, imm8);
13222     extsh(dst, tmpI1);
13223   %}
13224 %}
13225 
13226 instruct bytes_reverse_short(iRegIdst dst, iRegIsrc src) %{
13227   match(Set dst (ReverseBytesS src));
13228   predicate(UseByteReverseInstructions);
13229   ins_cost(DEFAULT_COST);
13230   size(8);
13231 
13232   format %{ "BRH   $dst, $src\n\t"
13233             "EXTSH $dst, $dst" %}
13234 
13235   ins_encode %{
13236     __ brh($dst$$Register, $src$$Register);
13237     __ extsh($dst$$Register, $dst$$Register);
13238   %}
13239   ins_pipe(pipe_class_default);
13240 %}
13241 
13242 // Load Integer reversed byte order
13243 instruct loadI_reversed(iRegIdst dst, indirect mem) %{
13244   match(Set dst (ReverseBytesI (LoadI mem)));
13245   predicate(n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)));
13246   ins_cost(MEMORY_REF_COST);
13247 
13248   size(4);
13249   ins_encode %{
13250     __ lwbrx($dst$$Register, $mem$$Register);
13251   %}
13252   ins_pipe(pipe_class_default);
13253 %}
13254 
13255 instruct loadI_reversed_acquire(iRegIdst dst, indirect mem) %{
13256   match(Set dst (ReverseBytesI (LoadI mem)));
13257   ins_cost(2 * MEMORY_REF_COST);
13258 
13259   size(12);
13260   ins_encode %{
13261     __ lwbrx($dst$$Register, $mem$$Register);
13262     __ twi_0($dst$$Register);
13263     __ isync();
13264   %}
13265   ins_pipe(pipe_class_default);
13266 %}
13267 
13268 // Load Long - aligned and reversed
13269 instruct loadL_reversed(iRegLdst dst, indirect mem) %{
13270   match(Set dst (ReverseBytesL (LoadL mem)));
13271   predicate(VM_Version::has_ldbrx() && (n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1))));
13272   ins_cost(MEMORY_REF_COST);
13273 
13274   size(4);
13275   ins_encode %{
13276     __ ldbrx($dst$$Register, $mem$$Register);
13277   %}
13278   ins_pipe(pipe_class_default);
13279 %}
13280 
13281 instruct loadL_reversed_acquire(iRegLdst dst, indirect mem) %{
13282   match(Set dst (ReverseBytesL (LoadL mem)));
13283   predicate(VM_Version::has_ldbrx());
13284   ins_cost(2 * MEMORY_REF_COST);
13285 
13286   size(12);
13287   ins_encode %{
13288     __ ldbrx($dst$$Register, $mem$$Register);
13289     __ twi_0($dst$$Register);
13290     __ isync();
13291   %}
13292   ins_pipe(pipe_class_default);
13293 %}
13294 
13295 // Load unsigned short / char reversed byte order
13296 instruct loadUS_reversed(iRegIdst dst, indirect mem) %{
13297   match(Set dst (ReverseBytesUS (LoadUS mem)));
13298   predicate(n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)));
13299   ins_cost(MEMORY_REF_COST);
13300 
13301   size(4);
13302   ins_encode %{
13303     __ lhbrx($dst$$Register, $mem$$Register);
13304   %}
13305   ins_pipe(pipe_class_default);
13306 %}
13307 
13308 instruct loadUS_reversed_acquire(iRegIdst dst, indirect mem) %{
13309   match(Set dst (ReverseBytesUS (LoadUS mem)));
13310   ins_cost(2 * MEMORY_REF_COST);
13311 
13312   size(12);
13313   ins_encode %{
13314     __ lhbrx($dst$$Register, $mem$$Register);
13315     __ twi_0($dst$$Register);
13316     __ isync();
13317   %}
13318   ins_pipe(pipe_class_default);
13319 %}
13320 
13321 // Load short reversed byte order
13322 instruct loadS_reversed(iRegIdst dst, indirect mem) %{
13323   match(Set dst (ReverseBytesS (LoadS mem)));
13324   predicate(n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)));
13325   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
13326 
13327   size(8);
13328   ins_encode %{
13329     __ lhbrx($dst$$Register, $mem$$Register);
13330     __ extsh($dst$$Register, $dst$$Register);
13331   %}
13332   ins_pipe(pipe_class_default);
13333 %}
13334 
13335 instruct loadS_reversed_acquire(iRegIdst dst, indirect mem) %{
13336   match(Set dst (ReverseBytesS (LoadS mem)));
13337   ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
13338 
13339   size(16);
13340   ins_encode %{
13341     __ lhbrx($dst$$Register, $mem$$Register);
13342     __ twi_0($dst$$Register);
13343     __ extsh($dst$$Register, $dst$$Register);
13344     __ isync();
13345   %}
13346   ins_pipe(pipe_class_default);
13347 %}
13348 
13349 // Store Integer reversed byte order
13350 instruct storeI_reversed(iRegIsrc src, indirect mem) %{
13351   match(Set mem (StoreI mem (ReverseBytesI src)));
13352   ins_cost(MEMORY_REF_COST);
13353 
13354   size(4);
13355   ins_encode %{
13356     __ stwbrx($src$$Register, $mem$$Register);
13357   %}
13358   ins_pipe(pipe_class_default);
13359 %}
13360 
13361 // Store Long reversed byte order
13362 instruct storeL_reversed(iRegLsrc src, indirect mem) %{
13363   match(Set mem (StoreL mem (ReverseBytesL src)));
13364   predicate(VM_Version::has_stdbrx());
13365   ins_cost(MEMORY_REF_COST);
13366 
13367   size(4);
13368   ins_encode %{
13369     __ stdbrx($src$$Register, $mem$$Register);
13370   %}
13371   ins_pipe(pipe_class_default);
13372 %}
13373 
13374 // Store unsigned short / char reversed byte order
13375 instruct storeUS_reversed(iRegIsrc src, indirect mem) %{
13376   match(Set mem (StoreC mem (ReverseBytesUS src)));
13377   ins_cost(MEMORY_REF_COST);
13378 
13379   size(4);
13380   ins_encode %{
13381     __ sthbrx($src$$Register, $mem$$Register);
13382   %}
13383   ins_pipe(pipe_class_default);
13384 %}
13385 
13386 // Store short reversed byte order
13387 instruct storeS_reversed(iRegIsrc src, indirect mem) %{
13388   match(Set mem (StoreC mem (ReverseBytesS src)));
13389   ins_cost(MEMORY_REF_COST);
13390 
13391   size(4);
13392   ins_encode %{
13393     __ sthbrx($src$$Register, $mem$$Register);
13394   %}
13395   ins_pipe(pipe_class_default);
13396 %}
13397 
13398 instruct mtvsrwz(vecX temp1, iRegIsrc src) %{
13399   effect(DEF temp1, USE src);
13400 
13401   format %{ "MTVSRWZ $temp1, $src \t// Move to 16-byte register" %}
13402   size(4);
13403   ins_encode %{
13404     __ mtvsrwz($temp1$$VectorSRegister, $src$$Register);
13405   %}
13406   ins_pipe(pipe_class_default);
13407 %}
13408 
13409 instruct xxspltw(vecX dst, vecX src, immI8 imm1) %{
13410   effect(DEF dst, USE src, USE imm1);
13411 
13412   format %{ "XXSPLTW $dst, $src, $imm1 \t// Splat word" %}
13413   size(4);
13414   ins_encode %{
13415     __ xxspltw($dst$$VectorSRegister, $src$$VectorSRegister, $imm1$$constant);
13416   %}
13417   ins_pipe(pipe_class_default);
13418 %}
13419 
13420 instruct xscvdpspn_regF(vecX dst, regF src) %{
13421   effect(DEF dst, USE src);
13422 
13423   format %{ "XSCVDPSPN $dst, $src \t// Convert scalar single precision to vector single precision" %}
13424   size(4);
13425   ins_encode %{
13426     __ xscvdpspn($dst$$VectorSRegister, $src$$FloatRegister->to_vsr());
13427   %}
13428   ins_pipe(pipe_class_default);
13429 %}
13430 
13431 //---------- Replicate Vector Instructions ------------------------------------
13432 
13433 // Insrdi does replicate if src == dst.
13434 instruct repl32(iRegLdst dst) %{
13435   predicate(false);
13436   effect(USE_DEF dst);
13437 
13438   format %{ "INSRDI  $dst, #0, $dst, #32 \t// replicate" %}
13439   size(4);
13440   ins_encode %{
13441     __ insrdi($dst$$Register, $dst$$Register, 32, 0);
13442   %}
13443   ins_pipe(pipe_class_default);
13444 %}
13445 
13446 // Insrdi does replicate if src == dst.
13447 instruct repl48(iRegLdst dst) %{
13448   predicate(false);
13449   effect(USE_DEF dst);
13450 
13451   format %{ "INSRDI  $dst, #0, $dst, #48 \t// replicate" %}
13452   size(4);
13453   ins_encode %{
13454     __ insrdi($dst$$Register, $dst$$Register, 48, 0);
13455   %}
13456   ins_pipe(pipe_class_default);
13457 %}
13458 
13459 // Insrdi does replicate if src == dst.
13460 instruct repl56(iRegLdst dst) %{
13461   predicate(false);
13462   effect(USE_DEF dst);
13463 
13464   format %{ "INSRDI  $dst, #0, $dst, #56 \t// replicate" %}
13465   size(4);
13466   ins_encode %{
13467     __ insrdi($dst$$Register, $dst$$Register, 56, 0);
13468   %}
13469   ins_pipe(pipe_class_default);
13470 %}
13471 
13472 instruct repl8B_reg_Ex(iRegLdst dst, iRegIsrc src) %{
13473   match(Set dst (ReplicateB src));
13474   predicate(n->as_Vector()->length() == 8);
13475   expand %{
13476     moveReg(dst, src);
13477     repl56(dst);
13478     repl48(dst);
13479     repl32(dst);
13480   %}
13481 %}
13482 
13483 instruct repl8B_immI0(iRegLdst dst, immI_0 zero) %{
13484   match(Set dst (ReplicateB zero));
13485   predicate(n->as_Vector()->length() == 8);
13486   format %{ "LI      $dst, #0 \t// replicate8B" %}
13487   size(4);
13488   ins_encode %{
13489     __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
13490   %}
13491   ins_pipe(pipe_class_default);
13492 %}
13493 
13494 instruct repl8B_immIminus1(iRegLdst dst, immI_minus1 src) %{
13495   match(Set dst (ReplicateB src));
13496   predicate(n->as_Vector()->length() == 8);
13497   format %{ "LI      $dst, #-1 \t// replicate8B" %}
13498   size(4);
13499   ins_encode %{
13500     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
13501   %}
13502   ins_pipe(pipe_class_default);
13503 %}
13504 
13505 instruct repl16B_reg_Ex(vecX dst, iRegIsrc src) %{
13506   match(Set dst (ReplicateB src));
13507   predicate(n->as_Vector()->length() == 16);
13508 
13509   expand %{
13510     iRegLdst tmpL;
13511     vecX tmpV;
13512     immI8  imm1 %{ (int)  1 %}
13513     moveReg(tmpL, src);
13514     repl56(tmpL);
13515     repl48(tmpL);
13516     mtvsrwz(tmpV, tmpL);
13517     xxspltw(dst, tmpV, imm1);
13518   %}
13519 %}
13520 
13521 instruct repl16B_immI0(vecX dst, immI_0 zero) %{
13522   match(Set dst (ReplicateB zero));
13523   predicate(n->as_Vector()->length() == 16);
13524 
13525   format %{ "XXLXOR      $dst, $zero \t// replicate16B" %}
13526   size(4);
13527   ins_encode %{
13528     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13529   %}
13530   ins_pipe(pipe_class_default);
13531 %}
13532 
13533 instruct repl16B_immIminus1(vecX dst, immI_minus1 src) %{
13534   match(Set dst (ReplicateB src));
13535   predicate(n->as_Vector()->length() == 16);
13536 
13537   format %{ "XXLEQV      $dst, $src \t// replicate16B" %}
13538   size(4);
13539   ins_encode %{
13540     __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13541   %}
13542   ins_pipe(pipe_class_default);
13543 %}
13544 
13545 instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{
13546   match(Set dst (ReplicateS src));
13547   predicate(n->as_Vector()->length() == 4);
13548   expand %{
13549     moveReg(dst, src);
13550     repl48(dst);
13551     repl32(dst);
13552   %}
13553 %}
13554 
13555 instruct repl4S_immI0(iRegLdst dst, immI_0 zero) %{
13556   match(Set dst (ReplicateS zero));
13557   predicate(n->as_Vector()->length() == 4);
13558   format %{ "LI      $dst, #0 \t// replicate4S" %}
13559   size(4);
13560   ins_encode %{
13561     __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
13562   %}
13563   ins_pipe(pipe_class_default);
13564 %}
13565 
13566 instruct repl4S_immIminus1(iRegLdst dst, immI_minus1 src) %{
13567   match(Set dst (ReplicateS src));
13568   predicate(n->as_Vector()->length() == 4);
13569   format %{ "LI      $dst, -1 \t// replicate4S" %}
13570   size(4);
13571   ins_encode %{
13572     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
13573   %}
13574   ins_pipe(pipe_class_default);
13575 %}
13576 
13577 instruct repl8S_reg_Ex(vecX dst, iRegIsrc src) %{
13578   match(Set dst (ReplicateS src));
13579   predicate(n->as_Vector()->length() == 8);
13580 
13581   expand %{
13582     iRegLdst tmpL;
13583     vecX tmpV;
13584     immI8  zero %{ (int)  0 %}
13585     moveReg(tmpL, src);
13586     repl48(tmpL);
13587     repl32(tmpL);
13588     mtvsrd(tmpV, tmpL);
13589     xxpermdi(dst, tmpV, tmpV, zero);
13590   %}
13591 %}
13592 
13593 instruct repl8S_immI0(vecX dst, immI_0 zero) %{
13594   match(Set dst (ReplicateS zero));
13595   predicate(n->as_Vector()->length() == 8);
13596 
13597   format %{ "XXLXOR      $dst, $zero \t// replicate8S" %}
13598   size(4);
13599   ins_encode %{
13600     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13601   %}
13602   ins_pipe(pipe_class_default);
13603 %}
13604 
13605 instruct repl8S_immIminus1(vecX dst, immI_minus1 src) %{
13606   match(Set dst (ReplicateS src));
13607   predicate(n->as_Vector()->length() == 8);
13608 
13609   format %{ "XXLEQV      $dst, $src \t// replicate8S" %}
13610   size(4);
13611   ins_encode %{
13612     __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13613   %}
13614   ins_pipe(pipe_class_default);
13615 %}
13616 
13617 instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{
13618   match(Set dst (ReplicateI src));
13619   predicate(n->as_Vector()->length() == 2);
13620   ins_cost(2 * DEFAULT_COST);
13621   expand %{
13622     moveReg(dst, src);
13623     repl32(dst);
13624   %}
13625 %}
13626 
13627 instruct repl2I_immI0(iRegLdst dst, immI_0 zero) %{
13628   match(Set dst (ReplicateI zero));
13629   predicate(n->as_Vector()->length() == 2);
13630   format %{ "LI      $dst, #0 \t// replicate2I" %}
13631   size(4);
13632   ins_encode %{
13633     __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
13634   %}
13635   ins_pipe(pipe_class_default);
13636 %}
13637 
13638 instruct repl2I_immIminus1(iRegLdst dst, immI_minus1 src) %{
13639   match(Set dst (ReplicateI src));
13640   predicate(n->as_Vector()->length() == 2);
13641   format %{ "LI      $dst, -1 \t// replicate2I" %}
13642   size(4);
13643   ins_encode %{
13644     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
13645   %}
13646   ins_pipe(pipe_class_default);
13647 %}
13648 
13649 instruct repl4I_reg_Ex(vecX dst, iRegIsrc src) %{
13650   match(Set dst (ReplicateI src));
13651   predicate(n->as_Vector()->length() == 4);
13652   ins_cost(2 * DEFAULT_COST);
13653 
13654   expand %{
13655     iRegLdst tmpL;
13656     vecX tmpV;
13657     immI8  zero %{ (int)  0 %}
13658     moveReg(tmpL, src);
13659     repl32(tmpL);
13660     mtvsrd(tmpV, tmpL);
13661     xxpermdi(dst, tmpV, tmpV, zero);
13662   %}
13663 %}
13664 
13665 instruct repl4I_immI0(vecX dst, immI_0 zero) %{
13666   match(Set dst (ReplicateI zero));
13667   predicate(n->as_Vector()->length() == 4);
13668 
13669   format %{ "XXLXOR      $dst, $zero \t// replicate4I" %}
13670   size(4);
13671   ins_encode %{
13672     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13673   %}
13674   ins_pipe(pipe_class_default);
13675 %}
13676 
13677 instruct repl4I_immIminus1(vecX dst, immI_minus1 src) %{
13678   match(Set dst (ReplicateI src));
13679   predicate(n->as_Vector()->length() == 4);
13680 
13681   format %{ "XXLEQV      $dst, $dst, $dst \t// replicate4I" %}
13682   size(4);
13683   ins_encode %{
13684     __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13685   %}
13686   ins_pipe(pipe_class_default);
13687 %}
13688 
13689 // Move float to int register via stack, replicate.
13690 instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{
13691   match(Set dst (ReplicateF src));
13692   predicate(n->as_Vector()->length() == 2);
13693   ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
13694   expand %{
13695     stackSlotL tmpS;
13696     iRegIdst tmpI;
13697     moveF2I_reg_stack(tmpS, src);   // Move float to stack.
13698     moveF2I_stack_reg(tmpI, tmpS);  // Move stack to int reg.
13699     moveReg(dst, tmpI);             // Move int to long reg.
13700     repl32(dst);                    // Replicate bitpattern.
13701   %}
13702 %}
13703 
13704 // Replicate scalar constant to packed float values in Double register
13705 instruct repl2F_immF_Ex(iRegLdst dst, immF src) %{
13706   match(Set dst (ReplicateF src));
13707   predicate(n->as_Vector()->length() == 2);
13708   ins_cost(5 * DEFAULT_COST);
13709 
13710   format %{ "LD      $dst, offset, $constanttablebase\t// load replicated float $src $src from table, postalloc expanded" %}
13711   postalloc_expand( postalloc_expand_load_replF_constant(dst, src, constanttablebase) );
13712 %}
13713 
13714 // Replicate scalar zero constant to packed float values in Double register
13715 instruct repl2F_immF0(iRegLdst dst, immF_0 zero) %{
13716   match(Set dst (ReplicateF zero));
13717   predicate(n->as_Vector()->length() == 2);
13718 
13719   format %{ "LI      $dst, #0 \t// replicate2F" %}
13720   ins_encode %{
13721     __ li($dst$$Register, 0x0);
13722   %}
13723   ins_pipe(pipe_class_default);
13724 %}
13725 
13726 
13727 //----------Vector Arithmetic Instructions--------------------------------------
13728 
13729 // Vector Addition Instructions
13730 
13731 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
13732   match(Set dst (AddVB src1 src2));
13733   predicate(n->as_Vector()->length() == 16);
13734   format %{ "VADDUBM  $dst,$src1,$src2\t// add packed16B" %}
13735   size(4);
13736   ins_encode %{
13737     __ vaddubm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13738   %}
13739   ins_pipe(pipe_class_default);
13740 %}
13741 
13742 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
13743   match(Set dst (AddVS src1 src2));
13744   predicate(n->as_Vector()->length() == 8);
13745   format %{ "VADDUHM  $dst,$src1,$src2\t// add packed8S" %}
13746   size(4);
13747   ins_encode %{
13748     __ vadduhm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13749   %}
13750   ins_pipe(pipe_class_default);
13751 %}
13752 
13753 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
13754   match(Set dst (AddVI src1 src2));
13755   predicate(n->as_Vector()->length() == 4);
13756   format %{ "VADDUWM  $dst,$src1,$src2\t// add packed4I" %}
13757   size(4);
13758   ins_encode %{
13759     __ vadduwm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13760   %}
13761   ins_pipe(pipe_class_default);
13762 %}
13763 
13764 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
13765   match(Set dst (AddVF src1 src2));
13766   predicate(n->as_Vector()->length() == 4);
13767   format %{ "VADDFP  $dst,$src1,$src2\t// add packed4F" %}
13768   size(4);
13769   ins_encode %{
13770     __ vaddfp($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13771   %}
13772   ins_pipe(pipe_class_default);
13773 %}
13774 
13775 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
13776   match(Set dst (AddVL src1 src2));
13777   predicate(n->as_Vector()->length() == 2);
13778   format %{ "VADDUDM  $dst,$src1,$src2\t// add packed2L" %}
13779   size(4);
13780   ins_encode %{
13781     __ vaddudm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13782   %}
13783   ins_pipe(pipe_class_default);
13784 %}
13785 
13786 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
13787   match(Set dst (AddVD src1 src2));
13788   predicate(n->as_Vector()->length() == 2);
13789   format %{ "XVADDDP  $dst,$src1,$src2\t// add packed2D" %}
13790   size(4);
13791   ins_encode %{
13792     __ xvadddp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13793   %}
13794   ins_pipe(pipe_class_default);
13795 %}
13796 
13797 // Vector Subtraction Instructions
13798 
13799 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
13800   match(Set dst (SubVB src1 src2));
13801   predicate(n->as_Vector()->length() == 16);
13802   format %{ "VSUBUBM  $dst,$src1,$src2\t// sub packed16B" %}
13803   size(4);
13804   ins_encode %{
13805     __ vsububm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13806   %}
13807   ins_pipe(pipe_class_default);
13808 %}
13809 
13810 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
13811   match(Set dst (SubVS src1 src2));
13812   predicate(n->as_Vector()->length() == 8);
13813   format %{ "VSUBUHM  $dst,$src1,$src2\t// sub packed8S" %}
13814   size(4);
13815   ins_encode %{
13816     __ vsubuhm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13817   %}
13818   ins_pipe(pipe_class_default);
13819 %}
13820 
13821 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
13822   match(Set dst (SubVI src1 src2));
13823   predicate(n->as_Vector()->length() == 4);
13824   format %{ "VSUBUWM  $dst,$src1,$src2\t// sub packed4I" %}
13825   size(4);
13826   ins_encode %{
13827     __ vsubuwm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13828   %}
13829   ins_pipe(pipe_class_default);
13830 %}
13831 
13832 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
13833   match(Set dst (SubVF src1 src2));
13834   predicate(n->as_Vector()->length() == 4);
13835   format %{ "VSUBFP  $dst,$src1,$src2\t// sub packed4F" %}
13836   size(4);
13837   ins_encode %{
13838     __ vsubfp($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13839   %}
13840   ins_pipe(pipe_class_default);
13841 %}
13842 
13843 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
13844   match(Set dst (SubVL src1 src2));
13845   predicate(n->as_Vector()->length() == 2);
13846   format %{ "VSUBUDM  $dst,$src1,$src2\t// sub packed2L" %}
13847   size(4);
13848   ins_encode %{
13849     __ vsubudm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13850   %}
13851   ins_pipe(pipe_class_default);
13852 %}
13853 
13854 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
13855   match(Set dst (SubVD src1 src2));
13856   predicate(n->as_Vector()->length() == 2);
13857   format %{ "XVSUBDP  $dst,$src1,$src2\t// sub packed2D" %}
13858   size(4);
13859   ins_encode %{
13860     __ xvsubdp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13861   %}
13862   ins_pipe(pipe_class_default);
13863 %}
13864 
13865 // Vector Multiplication Instructions
13866 
13867 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2, vecX tmp) %{
13868   match(Set dst (MulVS src1 src2));
13869   predicate(n->as_Vector()->length() == 8);
13870   effect(TEMP tmp);
13871   format %{ "VSPLTISH  $tmp,0\t// mul packed8S" %}
13872   format %{ "VMLADDUHM  $dst,$src1,$src2\t// mul packed8S" %}
13873   size(8);
13874   ins_encode %{
13875     __ vspltish($tmp$$VectorSRegister->to_vr(), 0);
13876     __ vmladduhm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr(), $tmp$$VectorSRegister->to_vr());
13877   %}
13878   ins_pipe(pipe_class_default);
13879 %}
13880 
13881 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
13882   match(Set dst (MulVI src1 src2));
13883   predicate(n->as_Vector()->length() == 4);
13884   format %{ "VMULUWM  $dst,$src1,$src2\t// mul packed4I" %}
13885   size(4);
13886   ins_encode %{
13887     __ vmuluwm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13888   %}
13889   ins_pipe(pipe_class_default);
13890 %}
13891 
13892 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
13893   match(Set dst (MulVF src1 src2));
13894   predicate(n->as_Vector()->length() == 4);
13895   format %{ "XVMULSP  $dst,$src1,$src2\t// mul packed4F" %}
13896   size(4);
13897   ins_encode %{
13898     __ xvmulsp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13899   %}
13900   ins_pipe(pipe_class_default);
13901 %}
13902 
13903 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
13904   match(Set dst (MulVD src1 src2));
13905   predicate(n->as_Vector()->length() == 2);
13906   format %{ "XVMULDP  $dst,$src1,$src2\t// mul packed2D" %}
13907   size(4);
13908   ins_encode %{
13909     __ xvmuldp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13910   %}
13911   ins_pipe(pipe_class_default);
13912 %}
13913 
13914 // Vector Division Instructions
13915 
13916 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
13917   match(Set dst (DivVF src1 src2));
13918   predicate(n->as_Vector()->length() == 4);
13919   format %{ "XVDIVSP  $dst,$src1,$src2\t// div packed4F" %}
13920   size(4);
13921   ins_encode %{
13922     __ xvdivsp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13923   %}
13924   ins_pipe(pipe_class_default);
13925 %}
13926 
13927 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
13928   match(Set dst (DivVD src1 src2));
13929   predicate(n->as_Vector()->length() == 2);
13930   format %{ "XVDIVDP  $dst,$src1,$src2\t// div packed2D" %}
13931   size(4);
13932   ins_encode %{
13933     __ xvdivdp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13934   %}
13935   ins_pipe(pipe_class_default);
13936 %}
13937 
13938 // Vector Absolute Instructions
13939 
13940 instruct vabs4F_reg(vecX dst, vecX src) %{
13941   match(Set dst (AbsVF src));
13942   predicate(n->as_Vector()->length() == 4);
13943   format %{ "XVABSSP $dst,$src\t// absolute packed4F" %}
13944   size(4);
13945   ins_encode %{
13946     __ xvabssp($dst$$VectorSRegister, $src$$VectorSRegister);
13947   %}
13948   ins_pipe(pipe_class_default);
13949 %}
13950 
13951 instruct vabs2D_reg(vecX dst, vecX src) %{
13952   match(Set dst (AbsVD src));
13953   predicate(n->as_Vector()->length() == 2);
13954   format %{ "XVABSDP $dst,$src\t// absolute packed2D" %}
13955   size(4);
13956   ins_encode %{
13957     __ xvabsdp($dst$$VectorSRegister, $src$$VectorSRegister);
13958   %}
13959   ins_pipe(pipe_class_default);
13960 %}
13961 
13962 // Round Instructions
13963 instruct roundD_reg(regD dst, regD src, immI8 rmode) %{
13964   match(Set dst (RoundDoubleMode src rmode));
13965   format %{ "RoundDoubleMode $src,$rmode" %}
13966   size(4);
13967   ins_encode %{
13968     switch ($rmode$$constant) {
13969       case RoundDoubleModeNode::rmode_rint:
13970         __ xvrdpic($dst$$FloatRegister->to_vsr(), $src$$FloatRegister->to_vsr());
13971         break;
13972       case RoundDoubleModeNode::rmode_floor:
13973         __ frim($dst$$FloatRegister, $src$$FloatRegister);
13974         break;
13975       case RoundDoubleModeNode::rmode_ceil:
13976         __ frip($dst$$FloatRegister, $src$$FloatRegister);
13977         break;
13978       default:
13979         ShouldNotReachHere();
13980     }
13981   %}
13982   ins_pipe(pipe_class_default);
13983 %}
13984 
13985 // Vector Round Instructions
13986 instruct vround2D_reg(vecX dst, vecX src, immI8 rmode) %{
13987   match(Set dst (RoundDoubleModeV src rmode));
13988   predicate(n->as_Vector()->length() == 2);
13989   format %{ "RoundDoubleModeV $src,$rmode" %}
13990   size(4);
13991   ins_encode %{
13992     switch ($rmode$$constant) {
13993       case RoundDoubleModeNode::rmode_rint:
13994         __ xvrdpic($dst$$VectorSRegister, $src$$VectorSRegister);
13995         break;
13996       case RoundDoubleModeNode::rmode_floor:
13997         __ xvrdpim($dst$$VectorSRegister, $src$$VectorSRegister);
13998         break;
13999       case RoundDoubleModeNode::rmode_ceil:
14000         __ xvrdpip($dst$$VectorSRegister, $src$$VectorSRegister);
14001         break;
14002       default:
14003         ShouldNotReachHere();
14004     }
14005   %}
14006   ins_pipe(pipe_class_default);
14007 %}
14008 
14009 // Vector Negate Instructions
14010 
14011 instruct vneg4F_reg(vecX dst, vecX src) %{
14012   match(Set dst (NegVF src));
14013   predicate(n->as_Vector()->length() == 4);
14014   format %{ "XVNEGSP $dst,$src\t// negate packed4F" %}
14015   size(4);
14016   ins_encode %{
14017     __ xvnegsp($dst$$VectorSRegister, $src$$VectorSRegister);
14018   %}
14019   ins_pipe(pipe_class_default);
14020 %}
14021 
14022 instruct vneg2D_reg(vecX dst, vecX src) %{
14023   match(Set dst (NegVD src));
14024   predicate(n->as_Vector()->length() == 2);
14025   format %{ "XVNEGDP $dst,$src\t// negate packed2D" %}
14026   size(4);
14027   ins_encode %{
14028     __ xvnegdp($dst$$VectorSRegister, $src$$VectorSRegister);
14029   %}
14030   ins_pipe(pipe_class_default);
14031 %}
14032 
14033 // Vector Square Root Instructions
14034 
14035 instruct vsqrt4F_reg(vecX dst, vecX src) %{
14036   match(Set dst (SqrtVF src));
14037   predicate(n->as_Vector()->length() == 4);
14038   format %{ "XVSQRTSP $dst,$src\t// sqrt packed4F" %}
14039   size(4);
14040   ins_encode %{
14041     __ xvsqrtsp($dst$$VectorSRegister, $src$$VectorSRegister);
14042   %}
14043   ins_pipe(pipe_class_default);
14044 %}
14045 
14046 instruct vsqrt2D_reg(vecX dst, vecX src) %{
14047   match(Set dst (SqrtVD src));
14048   predicate(n->as_Vector()->length() == 2);
14049   format %{ "XVSQRTDP  $dst,$src\t// sqrt packed2D" %}
14050   size(4);
14051   ins_encode %{
14052     __ xvsqrtdp($dst$$VectorSRegister, $src$$VectorSRegister);
14053   %}
14054   ins_pipe(pipe_class_default);
14055 %}
14056 
14057 // Vector Population Count Instructions
14058 
14059 instruct vpopcnt4I_reg(vecX dst, vecX src) %{
14060   match(Set dst (PopCountVI src));
14061   predicate(n->as_Vector()->length() == 4);
14062   format %{ "VPOPCNTW $dst,$src\t// pop count packed4I" %}
14063   size(4);
14064   ins_encode %{
14065     __ vpopcntw($dst$$VectorSRegister->to_vr(), $src$$VectorSRegister->to_vr());
14066   %}
14067   ins_pipe(pipe_class_default);
14068 %}
14069 
14070 // --------------------------------- FMA --------------------------------------
14071 // dst + src1 * src2
14072 instruct vfma4F(vecX dst, vecX src1, vecX src2) %{
14073   match(Set dst (FmaVF dst (Binary src1 src2)));
14074   predicate(n->as_Vector()->length() == 4);
14075 
14076   format %{ "XVMADDASP   $dst, $src1, $src2" %}
14077 
14078   size(4);
14079   ins_encode %{
14080     __ xvmaddasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14081   %}
14082   ins_pipe(pipe_class_default);
14083 %}
14084 
14085 // dst - src1 * src2
14086 instruct vfma4F_neg1(vecX dst, vecX src1, vecX src2) %{
14087   match(Set dst (FmaVF dst (Binary (NegVF src1) src2)));
14088   match(Set dst (FmaVF dst (Binary src1 (NegVF src2))));
14089   predicate(n->as_Vector()->length() == 4);
14090 
14091   format %{ "XVNMSUBASP   $dst, $src1, $src2" %}
14092 
14093   size(4);
14094   ins_encode %{
14095     __ xvnmsubasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14096   %}
14097   ins_pipe(pipe_class_default);
14098 %}
14099 
14100 // - dst + src1 * src2
14101 instruct vfma4F_neg2(vecX dst, vecX src1, vecX src2) %{
14102   match(Set dst (FmaVF (NegVF dst) (Binary src1 src2)));
14103   predicate(n->as_Vector()->length() == 4);
14104 
14105   format %{ "XVMSUBASP   $dst, $src1, $src2" %}
14106 
14107   size(4);
14108   ins_encode %{
14109     __ xvmsubasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14110   %}
14111   ins_pipe(pipe_class_default);
14112 %}
14113 
14114 // dst + src1 * src2
14115 instruct vfma2D(vecX dst, vecX src1, vecX src2) %{
14116   match(Set dst (FmaVD  dst (Binary src1 src2)));
14117   predicate(n->as_Vector()->length() == 2);
14118 
14119   format %{ "XVMADDADP   $dst, $src1, $src2" %}
14120 
14121   size(4);
14122   ins_encode %{
14123     __ xvmaddadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14124   %}
14125   ins_pipe(pipe_class_default);
14126 %}
14127 
14128 // dst - src1 * src2
14129 instruct vfma2D_neg1(vecX dst, vecX src1, vecX src2) %{
14130   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
14131   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
14132   predicate(n->as_Vector()->length() == 2);
14133 
14134   format %{ "XVNMSUBADP   $dst, $src1, $src2" %}
14135 
14136   size(4);
14137   ins_encode %{
14138     __ xvnmsubadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14139   %}
14140   ins_pipe(pipe_class_default);
14141 %}
14142 
14143 // - dst + src1 * src2
14144 instruct vfma2D_neg2(vecX dst, vecX src1, vecX src2) %{
14145   match(Set dst (FmaVD (NegVD dst) (Binary src1 src2)));
14146   predicate(n->as_Vector()->length() == 2);
14147 
14148   format %{ "XVMSUBADP   $dst, $src1, $src2" %}
14149 
14150   size(4);
14151   ins_encode %{
14152     __ xvmsubadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14153   %}
14154   ins_pipe(pipe_class_default);
14155 %}
14156 
14157 //----------Overflow Math Instructions-----------------------------------------
14158 
14159 // Note that we have to make sure that XER.SO is reset before using overflow instructions.
14160 // Simple Overflow operations can be matched by very few instructions (e.g. addExact: xor, and_, bc).
14161 // Seems like only Long intrinsincs have an advantage. (The only expensive one is OverflowMulL.)
14162 
14163 instruct overflowAddL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
14164   match(Set cr0 (OverflowAddL op1 op2));
14165 
14166   format %{ "add_    $op1, $op2\t# overflow check long" %}
14167   ins_encode %{
14168     __ li(R0, 0);
14169     __ mtxer(R0); // clear XER.SO
14170     __ addo_(R0, $op1$$Register, $op2$$Register);
14171   %}
14172   ins_pipe(pipe_class_default);
14173 %}
14174 
14175 instruct overflowSubL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
14176   match(Set cr0 (OverflowSubL op1 op2));
14177 
14178   format %{ "subfo_  R0, $op2, $op1\t# overflow check long" %}
14179   ins_encode %{
14180     __ li(R0, 0);
14181     __ mtxer(R0); // clear XER.SO
14182     __ subfo_(R0, $op2$$Register, $op1$$Register);
14183   %}
14184   ins_pipe(pipe_class_default);
14185 %}
14186 
14187 instruct overflowNegL_reg(flagsRegCR0 cr0, immL_0 zero, iRegLsrc op2) %{
14188   match(Set cr0 (OverflowSubL zero op2));
14189 
14190   format %{ "nego_   R0, $op2\t# overflow check long" %}
14191   ins_encode %{
14192     __ li(R0, 0);
14193     __ mtxer(R0); // clear XER.SO
14194     __ nego_(R0, $op2$$Register);
14195   %}
14196   ins_pipe(pipe_class_default);
14197 %}
14198 
14199 instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
14200   match(Set cr0 (OverflowMulL op1 op2));
14201 
14202   format %{ "mulldo_ R0, $op1, $op2\t# overflow check long" %}
14203   ins_encode %{
14204     __ li(R0, 0);
14205     __ mtxer(R0); // clear XER.SO
14206     __ mulldo_(R0, $op1$$Register, $op2$$Register);
14207   %}
14208   ins_pipe(pipe_class_default);
14209 %}
14210 
14211 instruct repl4F_reg_Ex(vecX dst, regF src) %{
14212   match(Set dst (ReplicateF src));
14213   predicate(n->as_Vector()->length() == 4);
14214   ins_cost(DEFAULT_COST);
14215   expand %{
14216     vecX tmpV;
14217     immI8  zero %{ (int)  0 %}
14218 
14219     xscvdpspn_regF(tmpV, src);
14220     xxspltw(dst, tmpV, zero);
14221   %}
14222 %}
14223 
14224 instruct repl4F_immF_Ex(vecX dst, immF src, iRegLdst tmp) %{
14225   match(Set dst (ReplicateF src));
14226   predicate(n->as_Vector()->length() == 4);
14227   effect(TEMP tmp);
14228   ins_cost(10 * DEFAULT_COST);
14229 
14230   postalloc_expand( postalloc_expand_load_replF_constant_vsx(dst, src, constanttablebase, tmp) );
14231 %}
14232 
14233 instruct repl4F_immF0(vecX dst, immF_0 zero) %{
14234   match(Set dst (ReplicateF zero));
14235   predicate(n->as_Vector()->length() == 4);
14236 
14237   format %{ "XXLXOR      $dst, $zero \t// replicate4F" %}
14238   ins_encode %{
14239     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
14240   %}
14241   ins_pipe(pipe_class_default);
14242 %}
14243 
14244 instruct repl2D_reg_Ex(vecX dst, regD src) %{
14245   match(Set dst (ReplicateD src));
14246   predicate(n->as_Vector()->length() == 2);
14247 
14248   format %{ "XXPERMDI      $dst, $src, $src, 0 \t// Splat doubleword" %}
14249   size(4);
14250   ins_encode %{
14251     __ xxpermdi($dst$$VectorSRegister, $src$$FloatRegister->to_vsr(), $src$$FloatRegister->to_vsr(), 0);
14252   %}
14253   ins_pipe(pipe_class_default);
14254 %}
14255 
14256 instruct repl2D_immD0(vecX dst, immD_0 zero) %{
14257   match(Set dst (ReplicateD zero));
14258   predicate(n->as_Vector()->length() == 2);
14259 
14260   format %{ "XXLXOR      $dst, $zero \t// replicate2D" %}
14261   size(4);
14262   ins_encode %{
14263     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
14264   %}
14265   ins_pipe(pipe_class_default);
14266 %}
14267 
14268 instruct mtvsrd(vecX dst, iRegLsrc src) %{
14269   predicate(false);
14270   effect(DEF dst, USE src);
14271 
14272   format %{ "MTVSRD      $dst, $src \t// Move to 16-byte register" %}
14273   size(4);
14274   ins_encode %{
14275     __ mtvsrd($dst$$VectorSRegister, $src$$Register);
14276   %}
14277   ins_pipe(pipe_class_default);
14278 %}
14279 
14280 instruct xxspltd(vecX dst, vecX src, immI8 zero) %{
14281   effect(DEF dst, USE src, USE zero);
14282 
14283   format %{ "XXSPLATD      $dst, $src, $zero \t// Splat doubleword" %}
14284   size(4);
14285   ins_encode %{
14286     __ xxpermdi($dst$$VectorSRegister, $src$$VectorSRegister, $src$$VectorSRegister, $zero$$constant);
14287   %}
14288   ins_pipe(pipe_class_default);
14289 %}
14290 
14291 instruct xxpermdi(vecX dst, vecX src1, vecX src2, immI8 zero) %{
14292   effect(DEF dst, USE src1, USE src2, USE zero);
14293 
14294   format %{ "XXPERMDI      $dst, $src1, $src2, $zero \t// Splat doubleword" %}
14295   size(4);
14296   ins_encode %{
14297     __ xxpermdi($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister, $zero$$constant);
14298   %}
14299   ins_pipe(pipe_class_default);
14300 %}
14301 
14302 instruct repl2L_reg_Ex(vecX dst, iRegLsrc src) %{
14303   match(Set dst (ReplicateL src));
14304   predicate(n->as_Vector()->length() == 2);
14305   expand %{
14306     vecX tmpV;
14307     immI8  zero %{ (int)  0 %}
14308     mtvsrd(tmpV, src);
14309     xxpermdi(dst, tmpV, tmpV, zero);
14310   %}
14311 %}
14312 
14313 instruct repl2L_immI0(vecX dst, immI_0 zero) %{
14314   match(Set dst (ReplicateL zero));
14315   predicate(n->as_Vector()->length() == 2);
14316 
14317   format %{ "XXLXOR      $dst, $zero \t// replicate2L" %}
14318   size(4);
14319   ins_encode %{
14320     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
14321   %}
14322   ins_pipe(pipe_class_default);
14323 %}
14324 
14325 instruct repl2L_immIminus1(vecX dst, immI_minus1 src) %{
14326   match(Set dst (ReplicateL src));
14327   predicate(n->as_Vector()->length() == 2);
14328 
14329   format %{ "XXLEQV      $dst, $src \t// replicate2L" %}
14330   size(4);
14331   ins_encode %{
14332     __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
14333   %}
14334   ins_pipe(pipe_class_default);
14335 %}
14336 
14337 // ============================================================================
14338 // Safepoint Instruction
14339 
14340 instruct safePoint_poll(iRegPdst poll) %{
14341   match(SafePoint poll);
14342 
14343   // It caused problems to add the effect that r0 is killed, but this
14344   // effect no longer needs to be mentioned, since r0 is not contained
14345   // in a reg_class.
14346 
14347   format %{ "LD      R0, #0, $poll \t// Safepoint poll for GC" %}
14348   size(4);
14349   ins_encode( enc_poll(0x0, poll) );
14350   ins_pipe(pipe_class_default);
14351 %}
14352 
14353 // ============================================================================
14354 // Call Instructions
14355 
14356 // Call Java Static Instruction
14357 
14358 // Schedulable version of call static node.
14359 instruct CallStaticJavaDirect(method meth) %{
14360   match(CallStaticJava);
14361   effect(USE meth);
14362   ins_cost(CALL_COST);
14363 
14364   ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */);
14365 
14366   format %{ "CALL,static $meth \t// ==> " %}
14367   size(4);
14368   ins_encode( enc_java_static_call(meth) );
14369   ins_pipe(pipe_class_call);
14370 %}
14371 
14372 // Call Java Dynamic Instruction
14373 
14374 // Used by postalloc expand of CallDynamicJavaDirectSchedEx (actual call).
14375 // Loading of IC was postalloc expanded. The nodes loading the IC are reachable
14376 // via fields ins_field_load_ic_hi_node and ins_field_load_ic_node.
14377 // The call destination must still be placed in the constant pool.
14378 instruct CallDynamicJavaDirectSched(method meth) %{
14379   match(CallDynamicJava); // To get all the data fields we need ...
14380   effect(USE meth);
14381   predicate(false);       // ... but never match.
14382 
14383   ins_field_load_ic_hi_node(loadConL_hiNode*);
14384   ins_field_load_ic_node(loadConLNode*);
14385   ins_num_consts(1 /* 1 patchable constant: call destination */);
14386 
14387   format %{ "BL        \t// dynamic $meth ==> " %}
14388   size(4);
14389   ins_encode( enc_java_dynamic_call_sched(meth) );
14390   ins_pipe(pipe_class_call);
14391 %}
14392 
14393 // Schedulable (i.e. postalloc expanded) version of call dynamic java.
14394 // We use postalloc expanded calls if we use inline caches
14395 // and do not update method data.
14396 //
14397 // This instruction has two constants: inline cache (IC) and call destination.
14398 // Loading the inline cache will be postalloc expanded, thus leaving a call with
14399 // one constant.
14400 instruct CallDynamicJavaDirectSched_Ex(method meth) %{
14401   match(CallDynamicJava);
14402   effect(USE meth);
14403   predicate(UseInlineCaches);
14404   ins_cost(CALL_COST);
14405 
14406   ins_num_consts(2 /* 2 patchable constants: inline cache, call destination. */);
14407 
14408   format %{ "CALL,dynamic $meth \t// postalloc expanded" %}
14409   postalloc_expand( postalloc_expand_java_dynamic_call_sched(meth, constanttablebase) );
14410 %}
14411 
14412 // Compound version of call dynamic java
14413 // We use postalloc expanded calls if we use inline caches
14414 // and do not update method data.
14415 instruct CallDynamicJavaDirect(method meth) %{
14416   match(CallDynamicJava);
14417   effect(USE meth);
14418   predicate(!UseInlineCaches);
14419   ins_cost(CALL_COST);
14420 
14421   // Enc_java_to_runtime_call needs up to 4 constants (method data oop).
14422   ins_num_consts(4);
14423 
14424   format %{ "CALL,dynamic $meth \t// ==> " %}
14425   ins_encode( enc_java_dynamic_call(meth, constanttablebase) );
14426   ins_pipe(pipe_class_call);
14427 %}
14428 
14429 // Call Runtime Instruction
14430 
14431 instruct CallRuntimeDirect(method meth) %{
14432   match(CallRuntime);
14433   effect(USE meth);
14434   ins_cost(CALL_COST);
14435 
14436   // Enc_java_to_runtime_call needs up to 3 constants: call target,
14437   // env for callee, C-toc.
14438   ins_num_consts(3);
14439 
14440   format %{ "CALL,runtime" %}
14441   ins_encode( enc_java_to_runtime_call(meth) );
14442   ins_pipe(pipe_class_call);
14443 %}
14444 
14445 // Call Leaf
14446 
14447 // Used by postalloc expand of CallLeafDirect_Ex (mtctr).
14448 instruct CallLeafDirect_mtctr(iRegLdst dst, iRegLsrc src) %{
14449   effect(DEF dst, USE src);
14450 
14451   ins_num_consts(1);
14452 
14453   format %{ "MTCTR   $src" %}
14454   size(4);
14455   ins_encode( enc_leaf_call_mtctr(src) );
14456   ins_pipe(pipe_class_default);
14457 %}
14458 
14459 // Used by postalloc expand of CallLeafDirect_Ex (actual call).
14460 instruct CallLeafDirect(method meth) %{
14461   match(CallLeaf);   // To get the data all the data fields we need ...
14462   effect(USE meth);
14463   predicate(false);  // but never match.
14464 
14465   format %{ "BCTRL     \t// leaf call $meth ==> " %}
14466   size(4);
14467   ins_encode %{
14468     __ bctrl();
14469   %}
14470   ins_pipe(pipe_class_call);
14471 %}
14472 
14473 // postalloc expand of CallLeafDirect.
14474 // Load adress to call from TOC, then bl to it.
14475 instruct CallLeafDirect_Ex(method meth) %{
14476   match(CallLeaf);
14477   effect(USE meth);
14478   ins_cost(CALL_COST);
14479 
14480   // Postalloc_expand_java_to_runtime_call needs up to 3 constants: call target,
14481   // env for callee, C-toc.
14482   ins_num_consts(3);
14483 
14484   format %{ "CALL,runtime leaf $meth \t// postalloc expanded" %}
14485   postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) );
14486 %}
14487 
14488 // Call runtime without safepoint - same as CallLeaf.
14489 // postalloc expand of CallLeafNoFPDirect.
14490 // Load adress to call from TOC, then bl to it.
14491 instruct CallLeafNoFPDirect_Ex(method meth) %{
14492   match(CallLeafNoFP);
14493   effect(USE meth);
14494   ins_cost(CALL_COST);
14495 
14496   // Enc_java_to_runtime_call needs up to 3 constants: call target,
14497   // env for callee, C-toc.
14498   ins_num_consts(3);
14499 
14500   format %{ "CALL,runtime leaf nofp $meth \t// postalloc expanded" %}
14501   postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) );
14502 %}
14503 
14504 // Tail Call; Jump from runtime stub to Java code.
14505 // Also known as an 'interprocedural jump'.
14506 // Target of jump will eventually return to caller.
14507 // TailJump below removes the return address.
14508 instruct TailCalljmpInd(iRegPdstNoScratch jump_target, inline_cache_regP method_ptr) %{
14509   match(TailCall jump_target method_ptr);
14510   ins_cost(CALL_COST);
14511 
14512   format %{ "MTCTR   $jump_target \t// $method_ptr holds method\n\t"
14513             "BCTR         \t// tail call" %}
14514   size(8);
14515   ins_encode %{
14516     __ mtctr($jump_target$$Register);
14517     __ bctr();
14518   %}
14519   ins_pipe(pipe_class_call);
14520 %}
14521 
14522 // Return Instruction
14523 instruct Ret() %{
14524   match(Return);
14525   format %{ "BLR      \t// branch to link register" %}
14526   size(4);
14527   ins_encode %{
14528     // LR is restored in MachEpilogNode. Just do the RET here.
14529     __ blr();
14530   %}
14531   ins_pipe(pipe_class_default);
14532 %}
14533 
14534 // Tail Jump; remove the return address; jump to target.
14535 // TailCall above leaves the return address around.
14536 // TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
14537 // ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
14538 // "restore" before this instruction (in Epilogue), we need to materialize it
14539 // in %i0.
14540 instruct tailjmpInd(iRegPdstNoScratch jump_target, rarg1RegP ex_oop) %{
14541   match(TailJump jump_target ex_oop);
14542   ins_cost(CALL_COST);
14543 
14544   format %{ "LD      R4_ARG2 = LR\n\t"
14545             "MTCTR   $jump_target\n\t"
14546             "BCTR     \t// TailJump, exception oop: $ex_oop" %}
14547   size(12);
14548   ins_encode %{
14549     __ ld(R4_ARG2/* issuing pc */, _abi0(lr), R1_SP);
14550     __ mtctr($jump_target$$Register);
14551     __ bctr();
14552   %}
14553   ins_pipe(pipe_class_call);
14554 %}
14555 
14556 // Create exception oop: created by stack-crawling runtime code.
14557 // Created exception is now available to this handler, and is setup
14558 // just prior to jumping to this handler. No code emitted.
14559 instruct CreateException(rarg1RegP ex_oop) %{
14560   match(Set ex_oop (CreateEx));
14561   ins_cost(0);
14562 
14563   format %{ " -- \t// exception oop; no code emitted" %}
14564   size(0);
14565   ins_encode( /*empty*/ );
14566   ins_pipe(pipe_class_default);
14567 %}
14568 
14569 // Rethrow exception: The exception oop will come in the first
14570 // argument position. Then JUMP (not call) to the rethrow stub code.
14571 instruct RethrowException() %{
14572   match(Rethrow);
14573   ins_cost(CALL_COST);
14574 
14575   format %{ "Jmp     rethrow_stub" %}
14576   ins_encode %{
14577     cbuf.set_insts_mark();
14578     __ b64_patchable((address)OptoRuntime::rethrow_stub(), relocInfo::runtime_call_type);
14579   %}
14580   ins_pipe(pipe_class_call);
14581 %}
14582 
14583 // Die now.
14584 instruct ShouldNotReachHere() %{
14585   match(Halt);
14586   ins_cost(CALL_COST);
14587 
14588   format %{ "ShouldNotReachHere" %}
14589   ins_encode %{
14590     if (is_reachable()) {
14591       __ stop(_halt_reason);
14592     }
14593   %}
14594   ins_pipe(pipe_class_default);
14595 %}
14596 
14597 // This name is KNOWN by the ADLC and cannot be changed.  The ADLC
14598 // forces a 'TypeRawPtr::BOTTOM' output type for this guy.
14599 // Get a DEF on threadRegP, no costs, no encoding, use
14600 // 'ins_should_rematerialize(true)' to avoid spilling.
14601 instruct tlsLoadP(threadRegP dst) %{
14602   match(Set dst (ThreadLocal));
14603   ins_cost(0);
14604 
14605   ins_should_rematerialize(true);
14606 
14607   format %{ " -- \t// $dst=Thread::current(), empty" %}
14608   size(0);
14609   ins_encode( /*empty*/ );
14610   ins_pipe(pipe_class_empty);
14611 %}
14612 
14613 //---Some PPC specific nodes---------------------------------------------------
14614 
14615 // Stop a group.
14616 instruct endGroup() %{
14617   ins_cost(0);
14618 
14619   ins_is_nop(true);
14620 
14621   format %{ "End Bundle (ori r1, r1, 0)" %}
14622   size(4);
14623   ins_encode %{
14624     __ endgroup();
14625   %}
14626   ins_pipe(pipe_class_default);
14627 %}
14628 
14629 // Nop instructions
14630 
14631 instruct fxNop() %{
14632   ins_cost(0);
14633 
14634   ins_is_nop(true);
14635 
14636   format %{ "fxNop" %}
14637   size(4);
14638   ins_encode %{
14639     __ nop();
14640   %}
14641   ins_pipe(pipe_class_default);
14642 %}
14643 
14644 instruct fpNop0() %{
14645   ins_cost(0);
14646 
14647   ins_is_nop(true);
14648 
14649   format %{ "fpNop0" %}
14650   size(4);
14651   ins_encode %{
14652     __ fpnop0();
14653   %}
14654   ins_pipe(pipe_class_default);
14655 %}
14656 
14657 instruct fpNop1() %{
14658   ins_cost(0);
14659 
14660   ins_is_nop(true);
14661 
14662   format %{ "fpNop1" %}
14663   size(4);
14664   ins_encode %{
14665     __ fpnop1();
14666   %}
14667   ins_pipe(pipe_class_default);
14668 %}
14669 
14670 instruct brNop0() %{
14671   ins_cost(0);
14672   size(4);
14673   format %{ "brNop0" %}
14674   ins_encode %{
14675     __ brnop0();
14676   %}
14677   ins_is_nop(true);
14678   ins_pipe(pipe_class_default);
14679 %}
14680 
14681 instruct brNop1() %{
14682   ins_cost(0);
14683 
14684   ins_is_nop(true);
14685 
14686   format %{ "brNop1" %}
14687   size(4);
14688   ins_encode %{
14689     __ brnop1();
14690   %}
14691   ins_pipe(pipe_class_default);
14692 %}
14693 
14694 instruct brNop2() %{
14695   ins_cost(0);
14696 
14697   ins_is_nop(true);
14698 
14699   format %{ "brNop2" %}
14700   size(4);
14701   ins_encode %{
14702     __ brnop2();
14703   %}
14704   ins_pipe(pipe_class_default);
14705 %}
14706 
14707 instruct cacheWB(indirect addr)
14708 %{
14709   match(CacheWB addr);
14710 
14711   ins_cost(100);
14712   format %{ "cache writeback, address = $addr" %}
14713   ins_encode %{
14714     assert($addr->index_position() < 0, "should be");
14715     assert($addr$$disp == 0, "should be");
14716     __ cache_wb(Address($addr$$base$$Register));
14717   %}
14718   ins_pipe(pipe_class_default);
14719 %}
14720 
14721 instruct cacheWBPreSync()
14722 %{
14723   match(CacheWBPreSync);
14724 
14725   ins_cost(0);
14726   format %{ "cache writeback presync" %}
14727   ins_encode %{
14728     __ cache_wbsync(true);
14729   %}
14730   ins_pipe(pipe_class_default);
14731 %}
14732 
14733 instruct cacheWBPostSync()
14734 %{
14735   match(CacheWBPostSync);
14736 
14737   ins_cost(100);
14738   format %{ "cache writeback postsync" %}
14739   ins_encode %{
14740     __ cache_wbsync(false);
14741   %}
14742   ins_pipe(pipe_class_default);
14743 %}
14744 
14745 //----------PEEPHOLE RULES-----------------------------------------------------
14746 // These must follow all instruction definitions as they use the names
14747 // defined in the instructions definitions.
14748 //
14749 // peepmatch ( root_instr_name [preceeding_instruction]* );
14750 //
14751 // peepconstraint %{
14752 // (instruction_number.operand_name relational_op instruction_number.operand_name
14753 //  [, ...] );
14754 // // instruction numbers are zero-based using left to right order in peepmatch
14755 //
14756 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
14757 // // provide an instruction_number.operand_name for each operand that appears
14758 // // in the replacement instruction's match rule
14759 //
14760 // ---------VM FLAGS---------------------------------------------------------
14761 //
14762 // All peephole optimizations can be turned off using -XX:-OptoPeephole
14763 //
14764 // Each peephole rule is given an identifying number starting with zero and
14765 // increasing by one in the order seen by the parser. An individual peephole
14766 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
14767 // on the command-line.
14768 //
14769 // ---------CURRENT LIMITATIONS----------------------------------------------
14770 //
14771 // Only match adjacent instructions in same basic block
14772 // Only equality constraints
14773 // Only constraints between operands, not (0.dest_reg == EAX_enc)
14774 // Only one replacement instruction
14775 //
14776 // ---------EXAMPLE----------------------------------------------------------
14777 //
14778 // // pertinent parts of existing instructions in architecture description
14779 // instruct movI(eRegI dst, eRegI src) %{
14780 //   match(Set dst (CopyI src));
14781 // %}
14782 //
14783 // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
14784 //   match(Set dst (AddI dst src));
14785 //   effect(KILL cr);
14786 // %}
14787 //
14788 // // Change (inc mov) to lea
14789 // peephole %{
14790 //   // increment preceeded by register-register move
14791 //   peepmatch ( incI_eReg movI );
14792 //   // require that the destination register of the increment
14793 //   // match the destination register of the move
14794 //   peepconstraint ( 0.dst == 1.dst );
14795 //   // construct a replacement instruction that sets
14796 //   // the destination to ( move's source register + one )
14797 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14798 // %}
14799 //
14800 // Implementation no longer uses movX instructions since
14801 // machine-independent system no longer uses CopyX nodes.
14802 //
14803 // peephole %{
14804 //   peepmatch ( incI_eReg movI );
14805 //   peepconstraint ( 0.dst == 1.dst );
14806 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14807 // %}
14808 //
14809 // peephole %{
14810 //   peepmatch ( decI_eReg movI );
14811 //   peepconstraint ( 0.dst == 1.dst );
14812 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14813 // %}
14814 //
14815 // peephole %{
14816 //   peepmatch ( addI_eReg_imm movI );
14817 //   peepconstraint ( 0.dst == 1.dst );
14818 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14819 // %}
14820 //
14821 // peephole %{
14822 //   peepmatch ( addP_eReg_imm movP );
14823 //   peepconstraint ( 0.dst == 1.dst );
14824 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
14825 // %}
14826 
14827 // // Change load of spilled value to only a spill
14828 // instruct storeI(memory mem, eRegI src) %{
14829 //   match(Set mem (StoreI mem src));
14830 // %}
14831 //
14832 // instruct loadI(eRegI dst, memory mem) %{
14833 //   match(Set dst (LoadI mem));
14834 // %}
14835 //
14836 peephole %{
14837   peepmatch ( loadI storeI );
14838   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14839   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14840 %}
14841 
14842 peephole %{
14843   peepmatch ( loadL storeL );
14844   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14845   peepreplace ( storeL( 1.mem 1.mem 1.src ) );
14846 %}
14847 
14848 peephole %{
14849   peepmatch ( loadP storeP );
14850   peepconstraint ( 1.src == 0.dst, 1.dst == 0.mem );
14851   peepreplace ( storeP( 1.dst 1.dst 1.src ) );
14852 %}
14853 
14854 //----------SMARTSPILL RULES---------------------------------------------------
14855 // These must follow all instruction definitions as they use the names
14856 // defined in the instructions definitions.