1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // Copyright (c) 2012, 2026 SAP SE. All rights reserved.
    4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    5 //
    6 // This code is free software; you can redistribute it and/or modify it
    7 // under the terms of the GNU General Public License version 2 only, as
    8 // published by the Free Software Foundation.
    9 //
   10 // This code is distributed in the hope that it will be useful, but WITHOUT
   11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   13 // version 2 for more details (a copy is included in the LICENSE file that
   14 // accompanied this code).
   15 //
   16 // You should have received a copy of the GNU General Public License version
   17 // 2 along with this work; if not, write to the Free Software Foundation,
   18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   19 //
   20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   21 // or visit www.oracle.com if you need additional information or have any
   22 // questions.
   23 //
   24 //
   25 
   26 //
   27 // PPC64 Architecture Description File
   28 //
   29 
   30 //----------REGISTER DEFINITION BLOCK------------------------------------------
   31 // This information is used by the matcher and the register allocator to
   32 // describe individual registers and classes of registers within the target
   33 // architecture.
   34 register %{
   35 //----------Architecture Description Register Definitions----------------------
   36 // General Registers
   37 // "reg_def"  name (register save type, C convention save type,
   38 //                  ideal register type, encoding);
   39 //
   40 // Register Save Types:
   41 //
   42 //   NS  = No-Save:     The register allocator assumes that these registers
   43 //                      can be used without saving upon entry to the method, &
   44 //                      that they do not need to be saved at call sites.
   45 //
   46 //   SOC = Save-On-Call: The register allocator assumes that these registers
   47 //                      can be used without saving upon entry to the method,
   48 //                      but that they must be saved at call sites.
   49 //                      These are called "volatiles" on ppc.
   50 //
   51 //   SOE = Save-On-Entry: The register allocator assumes that these registers
   52 //                      must be saved before using them upon entry to the
   53 //                      method, but they do not need to be saved at call
   54 //                      sites.
   55 //                      These are called "nonvolatiles" on ppc.
   56 //
   57 //   AS  = Always-Save:   The register allocator assumes that these registers
   58 //                      must be saved before using them upon entry to the
   59 //                      method, & that they must be saved at call sites.
   60 //
   61 // Ideal Register Type is used to determine how to save & restore a
   62 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   63 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
   64 //
   65 // The encoding number is the actual bit-pattern placed into the opcodes.
   66 //
   67 // PPC64 register definitions, based on the 64-bit PowerPC ELF ABI
   68 // Supplement Version 1.7 as of 2003-10-29.
   69 //
   70 // For each 64-bit register we must define two registers: the register
   71 // itself, e.g. R3, and a corresponding virtual other (32-bit-)'half',
   72 // e.g. R3_H, which is needed by the allocator, but is not used
   73 // for stores, loads, etc.
   74 
   75 // ----------------------------
   76 // Integer/Long Registers
   77 // ----------------------------
   78 
   79   // PPC64 has 32 64-bit integer registers.
   80 
   81   // types: v = volatile, nv = non-volatile, s = system
   82   reg_def R0   ( SOC, SOC, Op_RegI,  0, R0->as_VMReg()         );  // v   used in prologs
   83   reg_def R0_H ( SOC, SOC, Op_RegI, 99, R0->as_VMReg()->next() );
   84   reg_def R1   ( NS,  NS,  Op_RegI,  1, R1->as_VMReg()         );  // s   SP
   85   reg_def R1_H ( NS,  NS,  Op_RegI, 99, R1->as_VMReg()->next() );
   86   reg_def R2   ( SOC, SOC, Op_RegI,  2, R2->as_VMReg()         );  // v   TOC
   87   reg_def R2_H ( SOC, SOC, Op_RegI, 99, R2->as_VMReg()->next() );
   88   reg_def R3   ( SOC, SOC, Op_RegI,  3, R3->as_VMReg()         );  // v   iarg1 & iret
   89   reg_def R3_H ( SOC, SOC, Op_RegI, 99, R3->as_VMReg()->next() );
   90   reg_def R4   ( SOC, SOC, Op_RegI,  4, R4->as_VMReg()         );  //     iarg2
   91   reg_def R4_H ( SOC, SOC, Op_RegI, 99, R4->as_VMReg()->next() );
   92   reg_def R5   ( SOC, SOC, Op_RegI,  5, R5->as_VMReg()         );  // v   iarg3
   93   reg_def R5_H ( SOC, SOC, Op_RegI, 99, R5->as_VMReg()->next() );
   94   reg_def R6   ( SOC, SOC, Op_RegI,  6, R6->as_VMReg()         );  // v   iarg4
   95   reg_def R6_H ( SOC, SOC, Op_RegI, 99, R6->as_VMReg()->next() );
   96   reg_def R7   ( SOC, SOC, Op_RegI,  7, R7->as_VMReg()         );  // v   iarg5
   97   reg_def R7_H ( SOC, SOC, Op_RegI, 99, R7->as_VMReg()->next() );
   98   reg_def R8   ( SOC, SOC, Op_RegI,  8, R8->as_VMReg()         );  // v   iarg6
   99   reg_def R8_H ( SOC, SOC, Op_RegI, 99, R8->as_VMReg()->next() );
  100   reg_def R9   ( SOC, SOC, Op_RegI,  9, R9->as_VMReg()         );  // v   iarg7
  101   reg_def R9_H ( SOC, SOC, Op_RegI, 99, R9->as_VMReg()->next() );
  102   reg_def R10  ( SOC, SOC, Op_RegI, 10, R10->as_VMReg()        );  // v   iarg8
  103   reg_def R10_H( SOC, SOC, Op_RegI, 99, R10->as_VMReg()->next());
  104   reg_def R11  ( SOC, SOC, Op_RegI, 11, R11->as_VMReg()        );  // v   ENV / scratch
  105   reg_def R11_H( SOC, SOC, Op_RegI, 99, R11->as_VMReg()->next());
  106   reg_def R12  ( SOC, SOC, Op_RegI, 12, R12->as_VMReg()        );  // v   scratch
  107   reg_def R12_H( SOC, SOC, Op_RegI, 99, R12->as_VMReg()->next());
  108   reg_def R13  ( NS,  NS,  Op_RegI, 13, R13->as_VMReg()        );  // s   system thread id
  109   reg_def R13_H( NS,  NS,  Op_RegI, 99, R13->as_VMReg()->next());
  110   reg_def R14  ( SOC, SOE, Op_RegI, 14, R14->as_VMReg()        );  // nv
  111   reg_def R14_H( SOC, SOE, Op_RegI, 99, R14->as_VMReg()->next());
  112   reg_def R15  ( SOC, SOE, Op_RegI, 15, R15->as_VMReg()        );  // nv
  113   reg_def R15_H( SOC, SOE, Op_RegI, 99, R15->as_VMReg()->next());
  114   reg_def R16  ( SOC, SOE, Op_RegI, 16, R16->as_VMReg()        );  // nv
  115   reg_def R16_H( SOC, SOE, Op_RegI, 99, R16->as_VMReg()->next());
  116   reg_def R17  ( SOC, SOE, Op_RegI, 17, R17->as_VMReg()        );  // nv
  117   reg_def R17_H( SOC, SOE, Op_RegI, 99, R17->as_VMReg()->next());
  118   reg_def R18  ( SOC, SOE, Op_RegI, 18, R18->as_VMReg()        );  // nv
  119   reg_def R18_H( SOC, SOE, Op_RegI, 99, R18->as_VMReg()->next());
  120   reg_def R19  ( SOC, SOE, Op_RegI, 19, R19->as_VMReg()        );  // nv
  121   reg_def R19_H( SOC, SOE, Op_RegI, 99, R19->as_VMReg()->next());
  122   reg_def R20  ( SOC, SOE, Op_RegI, 20, R20->as_VMReg()        );  // nv
  123   reg_def R20_H( SOC, SOE, Op_RegI, 99, R20->as_VMReg()->next());
  124   reg_def R21  ( SOC, SOE, Op_RegI, 21, R21->as_VMReg()        );  // nv
  125   reg_def R21_H( SOC, SOE, Op_RegI, 99, R21->as_VMReg()->next());
  126   reg_def R22  ( SOC, SOE, Op_RegI, 22, R22->as_VMReg()        );  // nv
  127   reg_def R22_H( SOC, SOE, Op_RegI, 99, R22->as_VMReg()->next());
  128   reg_def R23  ( SOC, SOE, Op_RegI, 23, R23->as_VMReg()        );  // nv
  129   reg_def R23_H( SOC, SOE, Op_RegI, 99, R23->as_VMReg()->next());
  130   reg_def R24  ( SOC, SOE, Op_RegI, 24, R24->as_VMReg()        );  // nv
  131   reg_def R24_H( SOC, SOE, Op_RegI, 99, R24->as_VMReg()->next());
  132   reg_def R25  ( SOC, SOE, Op_RegI, 25, R25->as_VMReg()        );  // nv
  133   reg_def R25_H( SOC, SOE, Op_RegI, 99, R25->as_VMReg()->next());
  134   reg_def R26  ( SOC, SOE, Op_RegI, 26, R26->as_VMReg()        );  // nv
  135   reg_def R26_H( SOC, SOE, Op_RegI, 99, R26->as_VMReg()->next());
  136   reg_def R27  ( SOC, SOE, Op_RegI, 27, R27->as_VMReg()        );  // nv
  137   reg_def R27_H( SOC, SOE, Op_RegI, 99, R27->as_VMReg()->next());
  138   reg_def R28  ( SOC, SOE, Op_RegI, 28, R28->as_VMReg()        );  // nv
  139   reg_def R28_H( SOC, SOE, Op_RegI, 99, R28->as_VMReg()->next());
  140   reg_def R29  ( SOC, SOE, Op_RegI, 29, R29->as_VMReg()        );  // nv
  141   reg_def R29_H( SOC, SOE, Op_RegI, 99, R29->as_VMReg()->next());
  142   reg_def R30  ( SOC, SOE, Op_RegI, 30, R30->as_VMReg()        );  // nv
  143   reg_def R30_H( SOC, SOE, Op_RegI, 99, R30->as_VMReg()->next());
  144   reg_def R31  ( SOC, SOE, Op_RegI, 31, R31->as_VMReg()        );  // nv
  145   reg_def R31_H( SOC, SOE, Op_RegI, 99, R31->as_VMReg()->next());
  146 
  147 
  148 // ----------------------------
  149 // Float/Double Registers
  150 // ----------------------------
  151 
  152   // Double Registers
  153   // The rules of ADL require that double registers be defined in pairs.
  154   // Each pair must be two 32-bit values, but not necessarily a pair of
  155   // single float registers. In each pair, ADLC-assigned register numbers
  156   // must be adjacent, with the lower number even. Finally, when the
  157   // CPU stores such a register pair to memory, the word associated with
  158   // the lower ADLC-assigned number must be stored to the lower address.
  159 
  160   // PPC64 has 32 64-bit floating-point registers. Each can store a single
  161   // or double precision floating-point value.
  162 
  163   // types: v = volatile, nv = non-volatile, s = system
  164   reg_def F0   ( SOC, SOC, Op_RegF,  0, F0->as_VMReg()         );  // v   scratch
  165   reg_def F0_H ( SOC, SOC, Op_RegF, 99, F0->as_VMReg()->next() );
  166   reg_def F1   ( SOC, SOC, Op_RegF,  1, F1->as_VMReg()         );  // v   farg1 & fret
  167   reg_def F1_H ( SOC, SOC, Op_RegF, 99, F1->as_VMReg()->next() );
  168   reg_def F2   ( SOC, SOC, Op_RegF,  2, F2->as_VMReg()         );  // v   farg2
  169   reg_def F2_H ( SOC, SOC, Op_RegF, 99, F2->as_VMReg()->next() );
  170   reg_def F3   ( SOC, SOC, Op_RegF,  3, F3->as_VMReg()         );  // v   farg3
  171   reg_def F3_H ( SOC, SOC, Op_RegF, 99, F3->as_VMReg()->next() );
  172   reg_def F4   ( SOC, SOC, Op_RegF,  4, F4->as_VMReg()         );  // v   farg4
  173   reg_def F4_H ( SOC, SOC, Op_RegF, 99, F4->as_VMReg()->next() );
  174   reg_def F5   ( SOC, SOC, Op_RegF,  5, F5->as_VMReg()         );  // v   farg5
  175   reg_def F5_H ( SOC, SOC, Op_RegF, 99, F5->as_VMReg()->next() );
  176   reg_def F6   ( SOC, SOC, Op_RegF,  6, F6->as_VMReg()         );  // v   farg6
  177   reg_def F6_H ( SOC, SOC, Op_RegF, 99, F6->as_VMReg()->next() );
  178   reg_def F7   ( SOC, SOC, Op_RegF,  7, F7->as_VMReg()         );  // v   farg7
  179   reg_def F7_H ( SOC, SOC, Op_RegF, 99, F7->as_VMReg()->next() );
  180   reg_def F8   ( SOC, SOC, Op_RegF,  8, F8->as_VMReg()         );  // v   farg8
  181   reg_def F8_H ( SOC, SOC, Op_RegF, 99, F8->as_VMReg()->next() );
  182   reg_def F9   ( SOC, SOC, Op_RegF,  9, F9->as_VMReg()         );  // v   farg9
  183   reg_def F9_H ( SOC, SOC, Op_RegF, 99, F9->as_VMReg()->next() );
  184   reg_def F10  ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()        );  // v   farg10
  185   reg_def F10_H( SOC, SOC, Op_RegF, 99, F10->as_VMReg()->next());
  186   reg_def F11  ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()        );  // v   farg11
  187   reg_def F11_H( SOC, SOC, Op_RegF, 99, F11->as_VMReg()->next());
  188   reg_def F12  ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()        );  // v   farg12
  189   reg_def F12_H( SOC, SOC, Op_RegF, 99, F12->as_VMReg()->next());
  190   reg_def F13  ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()        );  // v   farg13
  191   reg_def F13_H( SOC, SOC, Op_RegF, 99, F13->as_VMReg()->next());
  192   reg_def F14  ( SOC, SOE, Op_RegF, 14, F14->as_VMReg()        );  // nv
  193   reg_def F14_H( SOC, SOE, Op_RegF, 99, F14->as_VMReg()->next());
  194   reg_def F15  ( SOC, SOE, Op_RegF, 15, F15->as_VMReg()        );  // nv
  195   reg_def F15_H( SOC, SOE, Op_RegF, 99, F15->as_VMReg()->next());
  196   reg_def F16  ( SOC, SOE, Op_RegF, 16, F16->as_VMReg()        );  // nv
  197   reg_def F16_H( SOC, SOE, Op_RegF, 99, F16->as_VMReg()->next());
  198   reg_def F17  ( SOC, SOE, Op_RegF, 17, F17->as_VMReg()        );  // nv
  199   reg_def F17_H( SOC, SOE, Op_RegF, 99, F17->as_VMReg()->next());
  200   reg_def F18  ( SOC, SOE, Op_RegF, 18, F18->as_VMReg()        );  // nv
  201   reg_def F18_H( SOC, SOE, Op_RegF, 99, F18->as_VMReg()->next());
  202   reg_def F19  ( SOC, SOE, Op_RegF, 19, F19->as_VMReg()        );  // nv
  203   reg_def F19_H( SOC, SOE, Op_RegF, 99, F19->as_VMReg()->next());
  204   reg_def F20  ( SOC, SOE, Op_RegF, 20, F20->as_VMReg()        );  // nv
  205   reg_def F20_H( SOC, SOE, Op_RegF, 99, F20->as_VMReg()->next());
  206   reg_def F21  ( SOC, SOE, Op_RegF, 21, F21->as_VMReg()        );  // nv
  207   reg_def F21_H( SOC, SOE, Op_RegF, 99, F21->as_VMReg()->next());
  208   reg_def F22  ( SOC, SOE, Op_RegF, 22, F22->as_VMReg()        );  // nv
  209   reg_def F22_H( SOC, SOE, Op_RegF, 99, F22->as_VMReg()->next());
  210   reg_def F23  ( SOC, SOE, Op_RegF, 23, F23->as_VMReg()        );  // nv
  211   reg_def F23_H( SOC, SOE, Op_RegF, 99, F23->as_VMReg()->next());
  212   reg_def F24  ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()        );  // nv
  213   reg_def F24_H( SOC, SOE, Op_RegF, 99, F24->as_VMReg()->next());
  214   reg_def F25  ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()        );  // nv
  215   reg_def F25_H( SOC, SOE, Op_RegF, 99, F25->as_VMReg()->next());
  216   reg_def F26  ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()        );  // nv
  217   reg_def F26_H( SOC, SOE, Op_RegF, 99, F26->as_VMReg()->next());
  218   reg_def F27  ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()        );  // nv
  219   reg_def F27_H( SOC, SOE, Op_RegF, 99, F27->as_VMReg()->next());
  220   reg_def F28  ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()        );  // nv
  221   reg_def F28_H( SOC, SOE, Op_RegF, 99, F28->as_VMReg()->next());
  222   reg_def F29  ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()        );  // nv
  223   reg_def F29_H( SOC, SOE, Op_RegF, 99, F29->as_VMReg()->next());
  224   reg_def F30  ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()        );  // nv
  225   reg_def F30_H( SOC, SOE, Op_RegF, 99, F30->as_VMReg()->next());
  226   reg_def F31  ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()        );  // nv
  227   reg_def F31_H( SOC, SOE, Op_RegF, 99, F31->as_VMReg()->next());
  228 
  229 // ----------------------------
  230 // Special Registers
  231 // ----------------------------
  232 
  233 // Condition Codes Flag Registers
  234 
  235   // PPC64 has 8 condition code "registers" which are all contained
  236   // in the CR register.
  237 
  238   // types: v = volatile, nv = non-volatile, s = system
  239   reg_def CR0(SOC, SOC, Op_RegFlags, 0, CR0->as_VMReg());  // v
  240   reg_def CR1(SOC, SOC, Op_RegFlags, 1, CR1->as_VMReg());  // v
  241   reg_def CR2(SOC, SOC, Op_RegFlags, 2, CR2->as_VMReg());  // nv
  242   reg_def CR3(SOC, SOC, Op_RegFlags, 3, CR3->as_VMReg());  // nv
  243   reg_def CR4(SOC, SOC, Op_RegFlags, 4, CR4->as_VMReg());  // nv
  244   reg_def CR5(SOC, SOC, Op_RegFlags, 5, CR5->as_VMReg());  // v
  245   reg_def CR6(SOC, SOC, Op_RegFlags, 6, CR6->as_VMReg());  // v
  246   reg_def CR7(SOC, SOC, Op_RegFlags, 7, CR7->as_VMReg());  // v
  247 
  248   // Special registers of PPC64
  249 
  250   reg_def SR_XER(    SOC, SOC, Op_RegP, 0, SR_XER->as_VMReg());     // v
  251   reg_def SR_LR(     SOC, SOC, Op_RegP, 1, SR_LR->as_VMReg());      // v
  252   reg_def SR_CTR(    SOC, SOC, Op_RegP, 2, SR_CTR->as_VMReg());     // v
  253   reg_def SR_VRSAVE( SOC, SOC, Op_RegP, 3, SR_VRSAVE->as_VMReg());  // v
  254   reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v
  255   reg_def SR_PPR(    SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg());     // v
  256 
  257 // ----------------------------
  258 // Vector Registers
  259 // ----------------------------
  260 
  261   reg_def VR0  (SOC, SOC, Op_RegF, 0, VR0->as_VMReg()         );
  262   reg_def VR0_H(SOC, SOC, Op_RegF, 0, VR0->as_VMReg()->next() );
  263   reg_def VR0_J(SOC, SOC, Op_RegF, 0, VR0->as_VMReg()->next(2));
  264   reg_def VR0_K(SOC, SOC, Op_RegF, 0, VR0->as_VMReg()->next(3));
  265 
  266   reg_def VR1  (SOC, SOC, Op_RegF, 1, VR1->as_VMReg()         );
  267   reg_def VR1_H(SOC, SOC, Op_RegF, 1, VR1->as_VMReg()->next() );
  268   reg_def VR1_J(SOC, SOC, Op_RegF, 1, VR1->as_VMReg()->next(2));
  269   reg_def VR1_K(SOC, SOC, Op_RegF, 1, VR1->as_VMReg()->next(3));
  270 
  271   reg_def VR2  (SOC, SOC, Op_RegF, 2, VR2->as_VMReg()         );
  272   reg_def VR2_H(SOC, SOC, Op_RegF, 2, VR2->as_VMReg()->next() );
  273   reg_def VR2_J(SOC, SOC, Op_RegF, 2, VR2->as_VMReg()->next(2));
  274   reg_def VR2_K(SOC, SOC, Op_RegF, 2, VR2->as_VMReg()->next(3));
  275 
  276   reg_def VR3  (SOC, SOC, Op_RegF, 3, VR3->as_VMReg()         );
  277   reg_def VR3_H(SOC, SOC, Op_RegF, 3, VR3->as_VMReg()->next() );
  278   reg_def VR3_J(SOC, SOC, Op_RegF, 3, VR3->as_VMReg()->next(2));
  279   reg_def VR3_K(SOC, SOC, Op_RegF, 3, VR3->as_VMReg()->next(3));
  280 
  281   reg_def VR4  (SOC, SOC, Op_RegF, 4, VR4->as_VMReg()         );
  282   reg_def VR4_H(SOC, SOC, Op_RegF, 4, VR4->as_VMReg()->next() );
  283   reg_def VR4_J(SOC, SOC, Op_RegF, 4, VR4->as_VMReg()->next(2));
  284   reg_def VR4_K(SOC, SOC, Op_RegF, 4, VR4->as_VMReg()->next(3));
  285 
  286   reg_def VR5  (SOC, SOC, Op_RegF, 5, VR5->as_VMReg()         );
  287   reg_def VR5_H(SOC, SOC, Op_RegF, 5, VR5->as_VMReg()->next() );
  288   reg_def VR5_J(SOC, SOC, Op_RegF, 5, VR5->as_VMReg()->next(2));
  289   reg_def VR5_K(SOC, SOC, Op_RegF, 5, VR5->as_VMReg()->next(3));
  290 
  291   reg_def VR6  (SOC, SOC, Op_RegF, 6, VR6->as_VMReg()         );
  292   reg_def VR6_H(SOC, SOC, Op_RegF, 6, VR6->as_VMReg()->next() );
  293   reg_def VR6_J(SOC, SOC, Op_RegF, 6, VR6->as_VMReg()->next(2));
  294   reg_def VR6_K(SOC, SOC, Op_RegF, 6, VR6->as_VMReg()->next(3));
  295 
  296   reg_def VR7  (SOC, SOC, Op_RegF, 7, VR7->as_VMReg()         );
  297   reg_def VR7_H(SOC, SOC, Op_RegF, 7, VR7->as_VMReg()->next() );
  298   reg_def VR7_J(SOC, SOC, Op_RegF, 7, VR7->as_VMReg()->next(2));
  299   reg_def VR7_K(SOC, SOC, Op_RegF, 7, VR7->as_VMReg()->next(3));
  300 
  301   reg_def VR8  (SOC, SOC, Op_RegF, 8, VR8->as_VMReg()         );
  302   reg_def VR8_H(SOC, SOC, Op_RegF, 8, VR8->as_VMReg()->next() );
  303   reg_def VR8_J(SOC, SOC, Op_RegF, 8, VR8->as_VMReg()->next(2));
  304   reg_def VR8_K(SOC, SOC, Op_RegF, 8, VR8->as_VMReg()->next(3));
  305 
  306   reg_def VR9  (SOC, SOC, Op_RegF, 9, VR9->as_VMReg()         );
  307   reg_def VR9_H(SOC, SOC, Op_RegF, 9, VR9->as_VMReg()->next() );
  308   reg_def VR9_J(SOC, SOC, Op_RegF, 9, VR9->as_VMReg()->next(2));
  309   reg_def VR9_K(SOC, SOC, Op_RegF, 9, VR9->as_VMReg()->next(3));
  310 
  311   reg_def VR10  (SOC, SOC, Op_RegF, 10, VR10->as_VMReg()         );
  312   reg_def VR10_H(SOC, SOC, Op_RegF, 10, VR10->as_VMReg()->next() );
  313   reg_def VR10_J(SOC, SOC, Op_RegF, 10, VR10->as_VMReg()->next(2));
  314   reg_def VR10_K(SOC, SOC, Op_RegF, 10, VR10->as_VMReg()->next(3));
  315 
  316   reg_def VR11  (SOC, SOC, Op_RegF, 11, VR11->as_VMReg()         );
  317   reg_def VR11_H(SOC, SOC, Op_RegF, 11, VR11->as_VMReg()->next() );
  318   reg_def VR11_J(SOC, SOC, Op_RegF, 11, VR11->as_VMReg()->next(2));
  319   reg_def VR11_K(SOC, SOC, Op_RegF, 11, VR11->as_VMReg()->next(3));
  320 
  321   reg_def VR12  (SOC, SOC, Op_RegF, 12, VR12->as_VMReg()         );
  322   reg_def VR12_H(SOC, SOC, Op_RegF, 12, VR12->as_VMReg()->next() );
  323   reg_def VR12_J(SOC, SOC, Op_RegF, 12, VR12->as_VMReg()->next(2));
  324   reg_def VR12_K(SOC, SOC, Op_RegF, 12, VR12->as_VMReg()->next(3));
  325 
  326   reg_def VR13  (SOC, SOC, Op_RegF, 13, VR13->as_VMReg()         );
  327   reg_def VR13_H(SOC, SOC, Op_RegF, 13, VR13->as_VMReg()->next() );
  328   reg_def VR13_J(SOC, SOC, Op_RegF, 13, VR13->as_VMReg()->next(2));
  329   reg_def VR13_K(SOC, SOC, Op_RegF, 13, VR13->as_VMReg()->next(3));
  330 
  331   reg_def VR14  (SOC, SOC, Op_RegF, 14, VR14->as_VMReg()         );
  332   reg_def VR14_H(SOC, SOC, Op_RegF, 14, VR14->as_VMReg()->next() );
  333   reg_def VR14_J(SOC, SOC, Op_RegF, 14, VR14->as_VMReg()->next(2));
  334   reg_def VR14_K(SOC, SOC, Op_RegF, 14, VR14->as_VMReg()->next(3));
  335 
  336   reg_def VR15  (SOC, SOC, Op_RegF, 15, VR15->as_VMReg()         );
  337   reg_def VR15_H(SOC, SOC, Op_RegF, 15, VR15->as_VMReg()->next() );
  338   reg_def VR15_J(SOC, SOC, Op_RegF, 15, VR15->as_VMReg()->next(2));
  339   reg_def VR15_K(SOC, SOC, Op_RegF, 15, VR15->as_VMReg()->next(3));
  340 
  341   reg_def VR16  (SOC, SOC, Op_RegF, 16, VR16->as_VMReg()         );
  342   reg_def VR16_H(SOC, SOC, Op_RegF, 16, VR16->as_VMReg()->next() );
  343   reg_def VR16_J(SOC, SOC, Op_RegF, 16, VR16->as_VMReg()->next(2));
  344   reg_def VR16_K(SOC, SOC, Op_RegF, 16, VR16->as_VMReg()->next(3));
  345 
  346   reg_def VR17  (SOC, SOC, Op_RegF, 17, VR17->as_VMReg()         );
  347   reg_def VR17_H(SOC, SOC, Op_RegF, 17, VR17->as_VMReg()->next() );
  348   reg_def VR17_J(SOC, SOC, Op_RegF, 17, VR17->as_VMReg()->next(2));
  349   reg_def VR17_K(SOC, SOC, Op_RegF, 17, VR17->as_VMReg()->next(3));
  350 
  351   reg_def VR18  (SOC, SOC, Op_RegF, 18, VR18->as_VMReg()         );
  352   reg_def VR18_H(SOC, SOC, Op_RegF, 18, VR18->as_VMReg()->next() );
  353   reg_def VR18_J(SOC, SOC, Op_RegF, 18, VR18->as_VMReg()->next(2));
  354   reg_def VR18_K(SOC, SOC, Op_RegF, 18, VR18->as_VMReg()->next(3));
  355 
  356   reg_def VR19  (SOC, SOC, Op_RegF, 19, VR19->as_VMReg()         );
  357   reg_def VR19_H(SOC, SOC, Op_RegF, 19, VR19->as_VMReg()->next() );
  358   reg_def VR19_J(SOC, SOC, Op_RegF, 19, VR19->as_VMReg()->next(2));
  359   reg_def VR19_K(SOC, SOC, Op_RegF, 19, VR19->as_VMReg()->next(3));
  360 
  361   reg_def VR20  (SOC, SOE, Op_RegF, 20, VR20->as_VMReg()         );
  362   reg_def VR20_H(SOC, SOE, Op_RegF, 20, VR20->as_VMReg()->next() );
  363   reg_def VR20_J(SOC, SOE, Op_RegF, 20, VR20->as_VMReg()->next(2));
  364   reg_def VR20_K(SOC, SOE, Op_RegF, 20, VR20->as_VMReg()->next(3));
  365 
  366   reg_def VR21  (SOC, SOE, Op_RegF, 21, VR21->as_VMReg()         );
  367   reg_def VR21_H(SOC, SOE, Op_RegF, 21, VR21->as_VMReg()->next() );
  368   reg_def VR21_J(SOC, SOE, Op_RegF, 21, VR21->as_VMReg()->next(2));
  369   reg_def VR21_K(SOC, SOE, Op_RegF, 21, VR21->as_VMReg()->next(3));
  370 
  371   reg_def VR22  (SOC, SOE, Op_RegF, 22, VR22->as_VMReg()         );
  372   reg_def VR22_H(SOC, SOE, Op_RegF, 22, VR22->as_VMReg()->next() );
  373   reg_def VR22_J(SOC, SOE, Op_RegF, 22, VR22->as_VMReg()->next(2));
  374   reg_def VR22_K(SOC, SOE, Op_RegF, 22, VR22->as_VMReg()->next(3));
  375 
  376   reg_def VR23  (SOC, SOE, Op_RegF, 23, VR23->as_VMReg()         );
  377   reg_def VR23_H(SOC, SOE, Op_RegF, 23, VR23->as_VMReg()->next() );
  378   reg_def VR23_J(SOC, SOE, Op_RegF, 23, VR23->as_VMReg()->next(2));
  379   reg_def VR23_K(SOC, SOE, Op_RegF, 23, VR23->as_VMReg()->next(3));
  380 
  381   reg_def VR24  (SOC, SOE, Op_RegF, 24, VR24->as_VMReg()         );
  382   reg_def VR24_H(SOC, SOE, Op_RegF, 24, VR24->as_VMReg()->next() );
  383   reg_def VR24_J(SOC, SOE, Op_RegF, 24, VR24->as_VMReg()->next(2));
  384   reg_def VR24_K(SOC, SOE, Op_RegF, 24, VR24->as_VMReg()->next(3));
  385 
  386   reg_def VR25  (SOC, SOE, Op_RegF, 25, VR25->as_VMReg()         );
  387   reg_def VR25_H(SOC, SOE, Op_RegF, 25, VR25->as_VMReg()->next() );
  388   reg_def VR25_J(SOC, SOE, Op_RegF, 25, VR25->as_VMReg()->next(2));
  389   reg_def VR25_K(SOC, SOE, Op_RegF, 25, VR25->as_VMReg()->next(3));
  390 
  391   reg_def VR26  (SOC, SOE, Op_RegF, 26, VR26->as_VMReg()         );
  392   reg_def VR26_H(SOC, SOE, Op_RegF, 26, VR26->as_VMReg()->next() );
  393   reg_def VR26_J(SOC, SOE, Op_RegF, 26, VR26->as_VMReg()->next(2));
  394   reg_def VR26_K(SOC, SOE, Op_RegF, 26, VR26->as_VMReg()->next(3));
  395 
  396   reg_def VR27  (SOC, SOE, Op_RegF, 27, VR27->as_VMReg()         );
  397   reg_def VR27_H(SOC, SOE, Op_RegF, 27, VR27->as_VMReg()->next() );
  398   reg_def VR27_J(SOC, SOE, Op_RegF, 27, VR27->as_VMReg()->next(2));
  399   reg_def VR27_K(SOC, SOE, Op_RegF, 27, VR27->as_VMReg()->next(3));
  400 
  401   reg_def VR28  (SOC, SOE, Op_RegF, 28, VR28->as_VMReg()         );
  402   reg_def VR28_H(SOC, SOE, Op_RegF, 28, VR28->as_VMReg()->next() );
  403   reg_def VR28_J(SOC, SOE, Op_RegF, 28, VR28->as_VMReg()->next(2));
  404   reg_def VR28_K(SOC, SOE, Op_RegF, 28, VR28->as_VMReg()->next(3));
  405 
  406   reg_def VR29  (SOC, SOE, Op_RegF, 29, VR29->as_VMReg()         );
  407   reg_def VR29_H(SOC, SOE, Op_RegF, 29, VR29->as_VMReg()->next() );
  408   reg_def VR29_J(SOC, SOE, Op_RegF, 29, VR29->as_VMReg()->next(2));
  409   reg_def VR29_K(SOC, SOE, Op_RegF, 29, VR29->as_VMReg()->next(3));
  410 
  411   reg_def VR30  (SOC, SOE, Op_RegF, 30, VR30->as_VMReg()         );
  412   reg_def VR30_H(SOC, SOE, Op_RegF, 30, VR30->as_VMReg()->next() );
  413   reg_def VR30_J(SOC, SOE, Op_RegF, 30, VR30->as_VMReg()->next(2));
  414   reg_def VR30_K(SOC, SOE, Op_RegF, 30, VR30->as_VMReg()->next(3));
  415 
  416   reg_def VR31  (SOC, SOE, Op_RegF, 31, VR31->as_VMReg()         );
  417   reg_def VR31_H(SOC, SOE, Op_RegF, 31, VR31->as_VMReg()->next() );
  418   reg_def VR31_J(SOC, SOE, Op_RegF, 31, VR31->as_VMReg()->next(2));
  419   reg_def VR31_K(SOC, SOE, Op_RegF, 31, VR31->as_VMReg()->next(3));
  420 
  421 // ----------------------------
  422 // Specify priority of register selection within phases of register
  423 // allocation. Highest priority is first. A useful heuristic is to
  424 // give registers a low priority when they are required by machine
  425 // instructions, like EAX and EDX on I486, and choose no-save registers
  426 // before save-on-call, & save-on-call before save-on-entry. Registers
  427 // which participate in fixed calling sequences should come last.
  428 // Registers which are used as pairs must fall on an even boundary.
  429 
  430 // It's worth about 1% on SPEC geomean to get this right.
  431 
  432 // Chunk0, chunk1, and chunk2 form the MachRegisterNumbers enumeration
  433 // in adGlobals_ppc.hpp which defines the <register>_num values, e.g.
  434 // R3_num. Therefore, R3_num may not be (and in reality is not)
  435 // the same as R3->encoding()! Furthermore, we cannot make any
  436 // assumptions on ordering, e.g. R3_num may be less than R2_num.
  437 // Additionally, the function
  438 //   static enum RC rc_class(OptoReg::Name reg )
  439 // maps a given <register>_num value to its chunk type (except for flags)
  440 // and its current implementation relies on chunk0 and chunk1 having a
  441 // size of 64 each.
  442 
  443 // If you change this allocation class, please have a look at the
  444 // default values for the parameters RoundRobinIntegerRegIntervalStart
  445 // and RoundRobinFloatRegIntervalStart
  446 
  447 alloc_class chunk0 (
  448   // Chunk0 contains *all* 64 integer registers halves.
  449 
  450   // "non-volatile" registers
  451   R14, R14_H,
  452   R15, R15_H,
  453   R17, R17_H,
  454   R18, R18_H,
  455   R19, R19_H,
  456   R20, R20_H,
  457   R21, R21_H,
  458   R22, R22_H,
  459   R23, R23_H,
  460   R24, R24_H,
  461   R25, R25_H,
  462   R26, R26_H,
  463   R27, R27_H,
  464   R28, R28_H,
  465   R29, R29_H,
  466   R30, R30_H,
  467   R31, R31_H,
  468 
  469   // scratch/special registers
  470   R11, R11_H,
  471   R12, R12_H,
  472 
  473   // argument registers
  474   R10, R10_H,
  475   R9,  R9_H,
  476   R8,  R8_H,
  477   R7,  R7_H,
  478   R6,  R6_H,
  479   R5,  R5_H,
  480   R4,  R4_H,
  481   R3,  R3_H,
  482 
  483   // special registers, not available for allocation
  484   R16, R16_H,     // R16_thread
  485   R13, R13_H,     // system thread id
  486   R2,  R2_H,      // may be used for TOC
  487   R1,  R1_H,      // SP
  488   R0,  R0_H       // R0 (scratch)
  489 );
  490 
  491 // If you change this allocation class, please have a look at the
  492 // default values for the parameters RoundRobinIntegerRegIntervalStart
  493 // and RoundRobinFloatRegIntervalStart
  494 
  495 alloc_class chunk1 (
  496   // Chunk1 contains *all* 64 floating-point registers halves.
  497 
  498   // scratch register
  499   F0,  F0_H,
  500 
  501   // argument registers
  502   F13, F13_H,
  503   F12, F12_H,
  504   F11, F11_H,
  505   F10, F10_H,
  506   F9,  F9_H,
  507   F8,  F8_H,
  508   F7,  F7_H,
  509   F6,  F6_H,
  510   F5,  F5_H,
  511   F4,  F4_H,
  512   F3,  F3_H,
  513   F2,  F2_H,
  514   F1,  F1_H,
  515 
  516   // non-volatile registers
  517   F14, F14_H,
  518   F15, F15_H,
  519   F16, F16_H,
  520   F17, F17_H,
  521   F18, F18_H,
  522   F19, F19_H,
  523   F20, F20_H,
  524   F21, F21_H,
  525   F22, F22_H,
  526   F23, F23_H,
  527   F24, F24_H,
  528   F25, F25_H,
  529   F26, F26_H,
  530   F27, F27_H,
  531   F28, F28_H,
  532   F29, F29_H,
  533   F30, F30_H,
  534   F31, F31_H
  535 );
  536 
  537 alloc_class chunk2 (
  538   VR0 , VR0_H , VR0_J , VR0_K ,
  539   VR1 , VR1_H , VR1_J , VR1_K ,
  540   VR2 , VR2_H , VR2_J , VR2_K ,
  541   VR3 , VR3_H , VR3_J , VR3_K ,
  542   VR4 , VR4_H , VR4_J , VR4_K ,
  543   VR5 , VR5_H , VR5_J , VR5_K ,
  544   VR6 , VR6_H , VR6_J , VR6_K ,
  545   VR7 , VR7_H , VR7_J , VR7_K ,
  546   VR8 , VR8_H , VR8_J , VR8_K ,
  547   VR9 , VR9_H , VR9_J , VR9_K ,
  548   VR10, VR10_H, VR10_J, VR10_K,
  549   VR11, VR11_H, VR11_J, VR11_K,
  550   VR12, VR12_H, VR12_J, VR12_K,
  551   VR13, VR13_H, VR13_J, VR13_K,
  552   VR14, VR14_H, VR14_J, VR14_K,
  553   VR15, VR15_H, VR15_J, VR15_K,
  554   VR16, VR16_H, VR16_J, VR16_K,
  555   VR17, VR17_H, VR17_J, VR17_K,
  556   VR18, VR18_H, VR18_J, VR18_K,
  557   VR19, VR19_H, VR19_J, VR19_K,
  558   VR20, VR20_H, VR20_J, VR20_K,
  559   VR21, VR21_H, VR21_J, VR21_K,
  560   VR22, VR22_H, VR22_J, VR22_K,
  561   VR23, VR23_H, VR23_J, VR23_K,
  562   VR24, VR24_H, VR24_J, VR24_K,
  563   VR25, VR25_H, VR25_J, VR25_K,
  564   VR26, VR26_H, VR26_J, VR26_K,
  565   VR27, VR27_H, VR27_J, VR27_K,
  566   VR28, VR28_H, VR28_J, VR28_K,
  567   VR29, VR29_H, VR29_J, VR29_K,
  568   VR30, VR30_H, VR30_J, VR30_K,
  569   VR31, VR31_H, VR31_J, VR31_K
  570 );
  571 
  572 alloc_class chunk3 (
  573   // Chunk2 contains *all* 8 condition code registers.
  574   CR0,
  575   CR1,
  576   CR2,
  577   CR3,
  578   CR4,
  579   CR5,
  580   CR6,
  581   CR7
  582 );
  583 
  584 alloc_class chunk4 (
  585   // special registers
  586   // These registers are not allocated, but used for nodes generated by postalloc expand.
  587   SR_XER,
  588   SR_LR,
  589   SR_CTR,
  590   SR_VRSAVE,
  591   SR_SPEFSCR,
  592   SR_PPR
  593 );
  594 
  595 //-------Architecture Description Register Classes-----------------------
  596 
  597 // Several register classes are automatically defined based upon
  598 // information in this architecture description.
  599 
  600 // 1) reg_class inline_cache_reg           ( as defined in frame section )
  601 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  602 //
  603 
  604 // ----------------------------
  605 // 32 Bit Register Classes
  606 // ----------------------------
  607 
  608 // We specify registers twice, once as read/write, and once read-only.
  609 // We use the read-only registers for source operands. With this, we
  610 // can include preset read only registers in this class, as a hard-coded
  611 // '0'-register. (We used to simulate this on ppc.)
  612 
  613 // 32 bit registers that can be read and written i.e. these registers
  614 // can be dest (or src) of normal instructions.
  615 reg_class bits32_reg_rw(
  616 /*R0*/              // R0
  617 /*R1*/              // SP
  618   R2,               // TOC
  619   R3,
  620   R4,
  621   R5,
  622   R6,
  623   R7,
  624   R8,
  625   R9,
  626   R10,
  627   R11,
  628   R12,
  629 /*R13*/             // system thread id
  630   R14,
  631   R15,
  632 /*R16*/             // R16_thread
  633   R17,
  634   R18,
  635   R19,
  636   R20,
  637   R21,
  638   R22,
  639   R23,
  640   R24,
  641   R25,
  642   R26,
  643   R27,
  644   R28,
  645 /*R29,*/             // global TOC
  646   R30,
  647   R31
  648 );
  649 
  650 // 32 bit registers that can only be read i.e. these registers can
  651 // only be src of all instructions.
  652 reg_class bits32_reg_ro(
  653 /*R0*/              // R0
  654 /*R1*/              // SP
  655   R2                // TOC
  656   R3,
  657   R4,
  658   R5,
  659   R6,
  660   R7,
  661   R8,
  662   R9,
  663   R10,
  664   R11,
  665   R12,
  666 /*R13*/             // system thread id
  667   R14,
  668   R15,
  669 /*R16*/             // R16_thread
  670   R17,
  671   R18,
  672   R19,
  673   R20,
  674   R21,
  675   R22,
  676   R23,
  677   R24,
  678   R25,
  679   R26,
  680   R27,
  681   R28,
  682 /*R29,*/
  683   R30,
  684   R31
  685 );
  686 
  687 reg_class rscratch1_bits32_reg(R11);
  688 reg_class rscratch2_bits32_reg(R12);
  689 reg_class rarg1_bits32_reg(R3);
  690 reg_class rarg2_bits32_reg(R4);
  691 reg_class rarg3_bits32_reg(R5);
  692 reg_class rarg4_bits32_reg(R6);
  693 
  694 // ----------------------------
  695 // 64 Bit Register Classes
  696 // ----------------------------
  697 // 64-bit build means 64-bit pointers means hi/lo pairs
  698 
  699 reg_class rscratch1_bits64_reg(R11_H, R11);
  700 reg_class rscratch2_bits64_reg(R12_H, R12);
  701 reg_class rarg1_bits64_reg(R3_H, R3);
  702 reg_class rarg2_bits64_reg(R4_H, R4);
  703 reg_class rarg3_bits64_reg(R5_H, R5);
  704 reg_class rarg4_bits64_reg(R6_H, R6);
  705 reg_class rarg5_bits64_reg(R7_H, R7);
  706 reg_class rarg6_bits64_reg(R8_H, R8);
  707 // Thread register, 'written' by tlsLoadP, see there.
  708 reg_class thread_bits64_reg(R16_H, R16);
  709 
  710 reg_class r19_bits64_reg(R19_H, R19);
  711 
  712 // 64 bit registers that can be read and written i.e. these registers
  713 // can be dest (or src) of normal instructions.
  714 reg_class bits64_reg_rw(
  715 /*R0_H,  R0*/     // R0
  716 /*R1_H,  R1*/     // SP
  717   R2_H,  R2,      // TOC
  718   R3_H,  R3,
  719   R4_H,  R4,
  720   R5_H,  R5,
  721   R6_H,  R6,
  722   R7_H,  R7,
  723   R8_H,  R8,
  724   R9_H,  R9,
  725   R10_H, R10,
  726   R11_H, R11,
  727   R12_H, R12,
  728 /*R13_H, R13*/   // system thread id
  729   R14_H, R14,
  730   R15_H, R15,
  731 /*R16_H, R16*/   // R16_thread
  732   R17_H, R17,
  733   R18_H, R18,
  734   R19_H, R19,
  735   R20_H, R20,
  736   R21_H, R21,
  737   R22_H, R22,
  738   R23_H, R23,
  739   R24_H, R24,
  740   R25_H, R25,
  741   R26_H, R26,
  742   R27_H, R27,
  743   R28_H, R28,
  744 /*R29_H, R29,*/
  745   R30_H, R30,
  746   R31_H, R31
  747 );
  748 
  749 // 64 bit registers used excluding r2, r11 and r12
  750 // Used to hold the TOC to avoid collisions with expanded LeafCall which uses
  751 // r2, r11 and r12 internally.
  752 reg_class bits64_reg_leaf_call(
  753 /*R0_H,  R0*/     // R0
  754 /*R1_H,  R1*/     // SP
  755 /*R2_H,  R2*/     // TOC
  756   R3_H,  R3,
  757   R4_H,  R4,
  758   R5_H,  R5,
  759   R6_H,  R6,
  760   R7_H,  R7,
  761   R8_H,  R8,
  762   R9_H,  R9,
  763   R10_H, R10,
  764 /*R11_H, R11*/
  765 /*R12_H, R12*/
  766 /*R13_H, R13*/   // system thread id
  767   R14_H, R14,
  768   R15_H, R15,
  769 /*R16_H, R16*/   // R16_thread
  770   R17_H, R17,
  771   R18_H, R18,
  772   R19_H, R19,
  773   R20_H, R20,
  774   R21_H, R21,
  775   R22_H, R22,
  776   R23_H, R23,
  777   R24_H, R24,
  778   R25_H, R25,
  779   R26_H, R26,
  780   R27_H, R27,
  781   R28_H, R28,
  782 /*R29_H, R29,*/
  783   R30_H, R30,
  784   R31_H, R31
  785 );
  786 
  787 // Used to hold the TOC to avoid collisions with expanded DynamicCall
  788 // which uses r19 as inline cache internally and expanded LeafCall which uses
  789 // r2, r11 and r12 internally.
  790 reg_class bits64_constant_table_base(
  791 /*R0_H,  R0*/     // R0
  792 /*R1_H,  R1*/     // SP
  793 /*R2_H,  R2*/     // TOC
  794   R3_H,  R3,
  795   R4_H,  R4,
  796   R5_H,  R5,
  797   R6_H,  R6,
  798   R7_H,  R7,
  799   R8_H,  R8,
  800   R9_H,  R9,
  801   R10_H, R10,
  802 /*R11_H, R11*/
  803 /*R12_H, R12*/
  804 /*R13_H, R13*/   // system thread id
  805   R14_H, R14,
  806   R15_H, R15,
  807 /*R16_H, R16*/   // R16_thread
  808   R17_H, R17,
  809   R18_H, R18,
  810 /*R19_H, R19*/
  811   R20_H, R20,
  812   R21_H, R21,
  813   R22_H, R22,
  814   R23_H, R23,
  815   R24_H, R24,
  816   R25_H, R25,
  817   R26_H, R26,
  818   R27_H, R27,
  819   R28_H, R28,
  820 /*R29_H, R29,*/
  821   R30_H, R30,
  822   R31_H, R31
  823 );
  824 
  825 // 64 bit registers that can only be read i.e. these registers can
  826 // only be src of all instructions.
  827 reg_class bits64_reg_ro(
  828 /*R0_H,  R0*/     // R0
  829   R1_H,  R1,
  830   R2_H,  R2,       // TOC
  831   R3_H,  R3,
  832   R4_H,  R4,
  833   R5_H,  R5,
  834   R6_H,  R6,
  835   R7_H,  R7,
  836   R8_H,  R8,
  837   R9_H,  R9,
  838   R10_H, R10,
  839   R11_H, R11,
  840   R12_H, R12,
  841 /*R13_H, R13*/   // system thread id
  842   R14_H, R14,
  843   R15_H, R15,
  844   R16_H, R16,    // R16_thread
  845   R17_H, R17,
  846   R18_H, R18,
  847   R19_H, R19,
  848   R20_H, R20,
  849   R21_H, R21,
  850   R22_H, R22,
  851   R23_H, R23,
  852   R24_H, R24,
  853   R25_H, R25,
  854   R26_H, R26,
  855   R27_H, R27,
  856   R28_H, R28,
  857 /*R29_H, R29,*/ // TODO: let allocator handle TOC!!
  858   R30_H, R30,
  859   R31_H, R31
  860 );
  861 
  862 
  863 // ----------------------------
  864 // Special Class for Condition Code Flags Register
  865 
  866 reg_class int_flags(
  867 /*CR0*/             // scratch
  868 /*CR1*/             // scratch
  869 /*CR2*/             // nv!
  870 /*CR3*/             // nv!
  871 /*CR4*/             // nv!
  872   CR5,
  873   CR6,
  874   CR7
  875 );
  876 
  877 reg_class int_flags_ro(
  878   CR0,
  879   CR1,
  880   CR2,
  881   CR3,
  882   CR4,
  883   CR5,
  884   CR6,
  885   CR7
  886 );
  887 
  888 reg_class int_flags_CR0(CR0);
  889 reg_class int_flags_CR1(CR1);
  890 reg_class int_flags_CR6(CR6);
  891 reg_class ctr_reg(SR_CTR);
  892 
  893 // ----------------------------
  894 // Float Register Classes
  895 // ----------------------------
  896 
  897 reg_class flt_reg(
  898   F0,
  899   F1,
  900   F2,
  901   F3,
  902   F4,
  903   F5,
  904   F6,
  905   F7,
  906   F8,
  907   F9,
  908   F10,
  909   F11,
  910   F12,
  911   F13,
  912   F14,              // nv!
  913   F15,              // nv!
  914   F16,              // nv!
  915   F17,              // nv!
  916   F18,              // nv!
  917   F19,              // nv!
  918   F20,              // nv!
  919   F21,              // nv!
  920   F22,              // nv!
  921   F23,              // nv!
  922   F24,              // nv!
  923   F25,              // nv!
  924   F26,              // nv!
  925   F27,              // nv!
  926   F28,              // nv!
  927   F29,              // nv!
  928   F30,              // nv!
  929   F31               // nv!
  930 );
  931 
  932 // Double precision float registers have virtual `high halves' that
  933 // are needed by the allocator.
  934 reg_class dbl_reg(
  935   F0,  F0_H,
  936   F1,  F1_H,
  937   F2,  F2_H,
  938   F3,  F3_H,
  939   F4,  F4_H,
  940   F5,  F5_H,
  941   F6,  F6_H,
  942   F7,  F7_H,
  943   F8,  F8_H,
  944   F9,  F9_H,
  945   F10, F10_H,
  946   F11, F11_H,
  947   F12, F12_H,
  948   F13, F13_H,
  949   F14, F14_H,    // nv!
  950   F15, F15_H,    // nv!
  951   F16, F16_H,    // nv!
  952   F17, F17_H,    // nv!
  953   F18, F18_H,    // nv!
  954   F19, F19_H,    // nv!
  955   F20, F20_H,    // nv!
  956   F21, F21_H,    // nv!
  957   F22, F22_H,    // nv!
  958   F23, F23_H,    // nv!
  959   F24, F24_H,    // nv!
  960   F25, F25_H,    // nv!
  961   F26, F26_H,    // nv!
  962   F27, F27_H,    // nv!
  963   F28, F28_H,    // nv!
  964   F29, F29_H,    // nv!
  965   F30, F30_H,    // nv!
  966   F31, F31_H     // nv!
  967 );
  968 
  969 // ----------------------------
  970 // Vector-Scalar Register Class
  971 // ----------------------------
  972 
  973 reg_class v_reg(
  974   VR0 , VR0_H , VR0_J , VR0_K ,
  975   VR1 , VR1_H , VR1_J , VR1_K ,
  976   VR2 , VR2_H , VR2_J , VR2_K ,
  977   VR3 , VR3_H , VR3_J , VR3_K ,
  978   VR4 , VR4_H , VR4_J , VR4_K ,
  979   VR5 , VR5_H , VR5_J , VR5_K ,
  980   VR6 , VR6_H , VR6_J , VR6_K ,
  981   VR7 , VR7_H , VR7_J , VR7_K ,
  982   VR8 , VR8_H , VR8_J , VR8_K ,
  983   VR9 , VR9_H , VR9_J , VR9_K ,
  984   VR10, VR10_H, VR10_J, VR10_K,
  985   VR11, VR11_H, VR11_J, VR11_K,
  986   VR12, VR12_H, VR12_J, VR12_K,
  987   VR13, VR13_H, VR13_J, VR13_K,
  988   VR14, VR14_H, VR14_J, VR14_K,
  989   VR15, VR15_H, VR15_J, VR15_K,
  990   VR16, VR16_H, VR16_J, VR16_K,
  991   VR17, VR17_H, VR17_J, VR17_K,
  992   VR18, VR18_H, VR18_J, VR18_K,
  993   VR19, VR19_H, VR19_J, VR19_K,
  994   VR20, VR20_H, VR20_J, VR20_K,
  995   VR21, VR21_H, VR21_J, VR21_K,
  996   VR22, VR22_H, VR22_J, VR22_K,
  997   VR23, VR23_H, VR23_J, VR23_K,
  998   VR24, VR24_H, VR24_J, VR24_K,
  999   VR25, VR25_H, VR25_J, VR25_K,
 1000   VR26, VR26_H, VR26_J, VR26_K,
 1001   VR27, VR27_H, VR27_J, VR27_K,
 1002   VR28, VR28_H, VR28_J, VR28_K,
 1003   VR29, VR29_H, VR29_J, VR29_K,
 1004   VR30, VR30_H, VR30_J, VR30_K,
 1005   VR31, VR31_H, VR31_J, VR31_K
 1006 );
 1007 
 1008  %}
 1009 
 1010 //----------DEFINITION BLOCK---------------------------------------------------
 1011 // Define name --> value mappings to inform the ADLC of an integer valued name
 1012 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 1013 // Format:
 1014 //        int_def  <name>         ( <int_value>, <expression>);
 1015 // Generated Code in ad_<arch>.hpp
 1016 //        #define  <name>   (<expression>)
 1017 //        // value == <int_value>
 1018 // Generated code in ad_<arch>.cpp adlc_verification()
 1019 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 1020 //
 1021 definitions %{
 1022   // The default cost (of an ALU instruction).
 1023   int_def DEFAULT_COST_LOW        (     30,      30);
 1024   int_def DEFAULT_COST            (    100,     100);
 1025   int_def HUGE_COST               (1000000, 1000000);
 1026 
 1027   // Memory refs
 1028   int_def MEMORY_REF_COST_LOW     (    200, DEFAULT_COST * 2);
 1029   int_def MEMORY_REF_COST         (    300, DEFAULT_COST * 3);
 1030 
 1031   // Branches are even more expensive.
 1032   int_def BRANCH_COST             (    900, DEFAULT_COST * 9);
 1033   int_def CALL_COST               (   1300, DEFAULT_COST * 13);
 1034 %}
 1035 
 1036 
 1037 //----------SOURCE BLOCK-------------------------------------------------------
 1038 // This is a block of C++ code which provides values, functions, and
 1039 // definitions necessary in the rest of the architecture description.
 1040 source_hpp %{
 1041   // Header information of the source block.
 1042   // Method declarations/definitions which are used outside
 1043   // the ad-scope can conveniently be defined here.
 1044   //
 1045   // To keep related declarations/definitions/uses close together,
 1046   // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 1047 
 1048 #include "opto/convertnode.hpp"
 1049 
 1050   // Returns true if Node n is followed by a MemBar node that
 1051   // will do an acquire. If so, this node must not do the acquire
 1052   // operation.
 1053   bool followed_by_acquire(const Node *n);
 1054 %}
 1055 
 1056 source %{
 1057 
 1058 #include "opto/c2_CodeStubs.hpp"
 1059 #include "oops/klass.inline.hpp"
 1060 
 1061 void PhaseOutput::pd_perform_mach_node_analysis() {
 1062 }
 1063 
 1064 int MachNode::pd_alignment_required() const {
 1065   return 1;
 1066 }
 1067 
 1068 int MachNode::compute_padding(int current_offset) const {
 1069   return 0;
 1070 }
 1071 
 1072 // Should the matcher clone input 'm' of node 'n'?
 1073 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 1074   if (is_encode_and_store_pattern(n, m)) {
 1075     mstack.push(m, Visit);
 1076     return true;
 1077   }
 1078   return false;
 1079 }
 1080 
 1081 // Should the Matcher clone shifts on addressing modes, expecting them
 1082 // to be subsumed into complex addressing expressions or compute them
 1083 // into registers?
 1084 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 1085   return clone_base_plus_offset_address(m, mstack, address_visited);
 1086 }
 1087 
 1088 // Optimize load-acquire.
 1089 //
 1090 // Check if acquire is unnecessary due to following operation that does
 1091 // acquire anyways.
 1092 // Walk the pattern:
 1093 //
 1094 //      n: Load.acq
 1095 //           |
 1096 //      MemBarAcquire
 1097 //       |         |
 1098 //  Proj(ctrl)  Proj(mem)
 1099 //       |         |
 1100 //   MemBarRelease/Volatile
 1101 //
 1102 bool followed_by_acquire(const Node *load) {
 1103   assert(load->is_Load(), "So far implemented only for loads.");
 1104 
 1105   // Find MemBarAcquire.
 1106   const Node *mba = nullptr;
 1107   for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) {
 1108     const Node *out = load->fast_out(i);
 1109     if (out->Opcode() == Op_MemBarAcquire) {
 1110       if (out->in(0) == load) continue; // Skip control edge, membar should be found via precedence edge.
 1111       mba = out;
 1112       break;
 1113     }
 1114   }
 1115   if (!mba) return false;
 1116 
 1117   // Find following MemBar node.
 1118   //
 1119   // The following node must be reachable by control AND memory
 1120   // edge to assure no other operations are in between the two nodes.
 1121   //
 1122   // So first get the Proj node, mem_proj, to use it to iterate forward.
 1123   Node *mem_proj = nullptr;
 1124   for (DUIterator_Fast imax, i = mba->fast_outs(imax); i < imax; i++) {
 1125     mem_proj = mba->fast_out(i);      // Runs out of bounds and asserts if Proj not found.
 1126     assert(mem_proj->is_Proj(), "only projections here");
 1127     ProjNode *proj = mem_proj->as_Proj();
 1128     if (proj->_con == TypeFunc::Memory &&
 1129         !Compile::current()->node_arena()->contains(mem_proj)) // Unmatched old-space only
 1130       break;
 1131   }
 1132   assert(mem_proj->as_Proj()->_con == TypeFunc::Memory, "Graph broken");
 1133 
 1134   // Search MemBar behind Proj. If there are other memory operations
 1135   // behind the Proj we lost.
 1136   for (DUIterator_Fast jmax, j = mem_proj->fast_outs(jmax); j < jmax; j++) {
 1137     Node *x = mem_proj->fast_out(j);
 1138     // Proj might have an edge to a store or load node which precedes the membar.
 1139     if (x->is_Mem()) return false;
 1140 
 1141     // On PPC64 release and volatile are implemented by an instruction
 1142     // that also has acquire semantics. I.e. there is no need for an
 1143     // acquire before these.
 1144     int xop = x->Opcode();
 1145     if (xop == Op_MemBarRelease || xop == Op_MemBarVolatile) {
 1146       // Make sure we're not missing Call/Phi/MergeMem by checking
 1147       // control edges. The control edge must directly lead back
 1148       // to the MemBarAcquire
 1149       Node *ctrl_proj = x->in(0);
 1150       if (ctrl_proj->is_Proj() && ctrl_proj->in(0) == mba) {
 1151         return true;
 1152       }
 1153     }
 1154   }
 1155 
 1156   return false;
 1157 }
 1158 
 1159 #define __ masm->
 1160 
 1161 // Tertiary op of a LoadP or StoreP encoding.
 1162 #define REGP_OP true
 1163 
 1164 // ****************************************************************************
 1165 
 1166 // REQUIRED FUNCTIONALITY
 1167 
 1168 // !!!!! Special hack to get all type of calls to specify the byte offset
 1169 //       from the start of the call to the point where the return address
 1170 //       will point.
 1171 
 1172 // PPC port: Removed use of lazy constant construct.
 1173 
 1174 int MachCallStaticJavaNode::ret_addr_offset() {
 1175   // It's only a single branch-and-link instruction.
 1176   return 4;
 1177 }
 1178 
 1179 int MachCallDynamicJavaNode::ret_addr_offset() {
 1180   return 12;
 1181 }
 1182 
 1183 int MachCallRuntimeNode::ret_addr_offset() {
 1184   if (rule() == CallRuntimeDirect_rule) {
 1185     // CallRuntimeDirectNode uses call_c.
 1186 #if defined(ABI_ELFv2)
 1187     return 28;
 1188 #else
 1189     return 40;
 1190 #endif
 1191   }
 1192   assert(rule() == CallLeafDirect_rule, "unexpected node with rule %u", rule());
 1193   // CallLeafDirectNode uses bl.
 1194   return 4;
 1195 }
 1196 
 1197 //=============================================================================
 1198 
 1199 // condition code conversions
 1200 
 1201 static int cc_to_boint(int cc) {
 1202   return Assembler::bcondCRbiIs0 | (cc & 8);
 1203 }
 1204 
 1205 static int cc_to_inverse_boint(int cc) {
 1206   return Assembler::bcondCRbiIs0 | (8-(cc & 8));
 1207 }
 1208 
 1209 static int cc_to_biint(int cc, int flags_reg) {
 1210   return (flags_reg << 2) | (cc & 3);
 1211 }
 1212 
 1213 //=============================================================================
 1214 
 1215 // Compute padding required for nodes which need alignment. The padding
 1216 // is the number of bytes (not instructions) which will be inserted before
 1217 // the instruction. The padding must match the size of a NOP instruction.
 1218 
 1219 // Add nop if a prefixed (two-word) instruction is going to cross a 64-byte boundary.
 1220 // (See Section 1.6 of Power ISA Version 3.1)
 1221 static int compute_prefix_padding(int current_offset) {
 1222   assert(PowerArchitecturePPC64 >= 10 && (CodeEntryAlignment & 63) == 0,
 1223          "Code buffer must be aligned to a multiple of 64 bytes");
 1224   if (is_aligned(current_offset + BytesPerInstWord, 64)) {
 1225     return BytesPerInstWord;
 1226   }
 1227   return 0;
 1228 }
 1229 
 1230 int loadConI32Node::compute_padding(int current_offset) const {
 1231   return compute_prefix_padding(current_offset);
 1232 }
 1233 
 1234 int loadConL34Node::compute_padding(int current_offset) const {
 1235   return compute_prefix_padding(current_offset);
 1236 }
 1237 
 1238 int addI_reg_imm32Node::compute_padding(int current_offset) const {
 1239   return compute_prefix_padding(current_offset);
 1240 }
 1241 
 1242 int addL_reg_imm34Node::compute_padding(int current_offset) const {
 1243   return compute_prefix_padding(current_offset);
 1244 }
 1245 
 1246 int addP_reg_imm34Node::compute_padding(int current_offset) const {
 1247   return compute_prefix_padding(current_offset);
 1248 }
 1249 
 1250 int cmprb_Whitespace_reg_reg_prefixedNode::compute_padding(int current_offset) const {
 1251   return compute_prefix_padding(current_offset);
 1252 }
 1253 
 1254 
 1255 //=============================================================================
 1256 
 1257 // Emit an interrupt that is caught by the debugger (for debugging compiler).
 1258 void emit_break(C2_MacroAssembler *masm) {
 1259   __ illtrap();
 1260 }
 1261 
 1262 #ifndef PRODUCT
 1263 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1264   st->print("BREAKPOINT");
 1265 }
 1266 #endif
 1267 
 1268 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1269   emit_break(masm);
 1270 }
 1271 
 1272 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 1273   return MachNode::size(ra_);
 1274 }
 1275 
 1276 //=============================================================================
 1277 
 1278 void emit_nop(C2_MacroAssembler *masm) {
 1279   __ nop();
 1280 }
 1281 
 1282 static inline void emit_long(C2_MacroAssembler *masm, int value) {
 1283   *((int*)(__ pc())) = value;
 1284   __ set_inst_end(__ pc() + BytesPerInstWord);
 1285 }
 1286 
 1287 //=============================================================================
 1288 
 1289 %} // interrupt source
 1290 
 1291 source_hpp %{ // Header information of the source block.
 1292 
 1293 //--------------------------------------------------------------
 1294 //---<  Used for optimization in Compile::Shorten_branches  >---
 1295 //--------------------------------------------------------------
 1296 
 1297 class C2_MacroAssembler;
 1298 
 1299 class CallStubImpl {
 1300 
 1301  public:
 1302 
 1303   // Size of call trampoline stub.
 1304   // This doesn't need to be accurate to the byte, but it
 1305   // must be larger than or equal to the real size of the stub.
 1306   static uint size_call_trampoline() {
 1307     return MacroAssembler::trampoline_stub_size;
 1308   }
 1309 
 1310   // number of relocations needed by a call trampoline stub
 1311   static uint reloc_call_trampoline() {
 1312     return 5;
 1313   }
 1314 
 1315 };
 1316 
 1317 %} // end source_hpp
 1318 
 1319 source %{
 1320 
 1321 // Factory for creating loadConL* nodes for large/small constant pool.
 1322 
 1323 static inline jlong replicate_immF(float con) {
 1324   // Replicate float con 2 times and pack into vector.
 1325   int val = *((int*)&con);
 1326   jlong lval = val;
 1327   lval = (lval << 32) | (lval & 0xFFFFFFFFl);
 1328   return lval;
 1329 }
 1330 
 1331 //=============================================================================
 1332 
 1333 const RegMask& MachConstantBaseNode::_out_RegMask = BITS64_CONSTANT_TABLE_BASE_mask();
 1334 int ConstantTable::calculate_table_base_offset() const {
 1335   return 0;  // absolute addressing, no offset
 1336 }
 1337 
 1338 bool MachConstantBaseNode::requires_postalloc_expand() const { return true; }
 1339 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1340   iRegLdstOper *op_dst = new iRegLdstOper();
 1341   MachNode *m1 = new loadToc_hiNode();
 1342   MachNode *m2 = new loadToc_loNode();
 1343 
 1344   m1->add_req(nullptr);
 1345   m2->add_req(nullptr, m1);
 1346   m1->_opnds[0] = op_dst;
 1347   m2->_opnds[0] = op_dst;
 1348   m2->_opnds[1] = op_dst;
 1349   ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 1350   ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 1351   nodes->push(m1);
 1352   nodes->push(m2);
 1353 }
 1354 
 1355 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1356   // Is postalloc expanded.
 1357   ShouldNotReachHere();
 1358 }
 1359 
 1360 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1361   return 0;
 1362 }
 1363 
 1364 #ifndef PRODUCT
 1365 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1366   st->print("-- \t// MachConstantBaseNode (empty encoding)");
 1367 }
 1368 #endif
 1369 
 1370 //=============================================================================
 1371 
 1372 #ifndef PRODUCT
 1373 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1374   Compile* C = ra_->C;
 1375   const long framesize = C->output()->frame_slots() << LogBytesPerInt;
 1376 
 1377   st->print("PROLOG\n\t");
 1378   if (C->output()->need_stack_bang(framesize)) {
 1379     st->print("stack_overflow_check\n\t");
 1380   }
 1381 
 1382   if (!false /* TODO: PPC port C->is_frameless_method()*/) {
 1383     st->print("save return pc\n\t");
 1384     st->print("push frame %ld\n\t", -framesize);
 1385   }
 1386 
 1387   if (C->stub_function() == nullptr) {
 1388     st->print("nmethod entry barrier\n\t");
 1389   }
 1390 }
 1391 #endif
 1392 
 1393 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1394   Compile* C = ra_->C;
 1395 
 1396   const long framesize = C->output()->frame_size_in_bytes();
 1397   assert(framesize % (2 * wordSize) == 0, "must preserve 2*wordSize alignment");
 1398 
 1399   const bool method_is_frameless      = false /* TODO: PPC port C->is_frameless_method()*/;
 1400 
 1401   const Register return_pc            = R20; // Must match return_addr() in frame section.
 1402   const Register callers_sp           = R21;
 1403   const Register push_frame_temp      = R22;
 1404   const Register toc_temp             = R23;
 1405   assert_different_registers(R11, return_pc, callers_sp, push_frame_temp, toc_temp);
 1406 
 1407   if (!method_is_frameless) {
 1408     // Get return pc.
 1409     __ mflr(return_pc);
 1410   }
 1411 
 1412   if (C->clinit_barrier_on_entry()) {
 1413     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1414 
 1415     Label L_skip_barrier;
 1416     Register klass = toc_temp;
 1417 
 1418     // Notify OOP recorder (don't need the relocation)
 1419     AddressLiteral md = __ constant_metadata_address(C->method()->holder()->constant_encoding());
 1420     __ load_const_optimized(klass, md.value(), R0);
 1421     __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/);
 1422 
 1423     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0);
 1424     __ mtctr(klass);
 1425     __ bctr();
 1426 
 1427     __ bind(L_skip_barrier);
 1428   }
 1429 
 1430   // Calls to C2R adapters often do not accept exceptional returns.
 1431   // We require that their callers must bang for them. But be
 1432   // careful, because some VM calls (such as call site linkage) can
 1433   // use several kilobytes of stack. But the stack safety zone should
 1434   // account for that. See bugs 4446381, 4468289, 4497237.
 1435 
 1436   int bangsize = C->output()->bang_size_in_bytes();
 1437   assert(bangsize >= framesize || bangsize <= 0, "stack bang size incorrect");
 1438   if (C->output()->need_stack_bang(bangsize)) {
 1439     // Unfortunately we cannot use the function provided in
 1440     // assembler.cpp as we have to emulate the pipes. So I had to
 1441     // insert the code of generate_stack_overflow_check(), see
 1442     // assembler.cpp for some illuminative comments.
 1443     const int page_size = os::vm_page_size();
 1444     int bang_end = StackOverflow::stack_shadow_zone_size();
 1445 
 1446     // This is how far the previous frame's stack banging extended.
 1447     const int bang_end_safe = bang_end;
 1448 
 1449     if (bangsize > page_size) {
 1450       bang_end += bangsize;
 1451     }
 1452 
 1453     int bang_offset = bang_end_safe;
 1454 
 1455     while (bang_offset <= bang_end) {
 1456       // Need at least one stack bang at end of shadow zone.
 1457 
 1458       // Again I had to copy code, this time from assembler_ppc.cpp,
 1459       // bang_stack_with_offset - see there for comments.
 1460 
 1461       // Stack grows down, caller passes positive offset.
 1462       assert(bang_offset > 0, "must bang with positive offset");
 1463 
 1464       long stdoffset = -bang_offset;
 1465 
 1466       if (Assembler::is_simm(stdoffset, 16)) {
 1467         // Signed 16 bit offset, a simple std is ok.
 1468         if (UseLoadInstructionsForStackBangingPPC64) {
 1469           __ ld(R0,  (int)(signed short)stdoffset, R1_SP);
 1470         } else {
 1471           __ std(R0, (int)(signed short)stdoffset, R1_SP);
 1472         }
 1473       } else if (Assembler::is_simm(stdoffset, 31)) {
 1474         // Use largeoffset calculations for addis & ld/std.
 1475         const int hi = MacroAssembler::largeoffset_si16_si16_hi(stdoffset);
 1476         const int lo = MacroAssembler::largeoffset_si16_si16_lo(stdoffset);
 1477 
 1478         Register tmp = R11;
 1479         __ addis(tmp, R1_SP, hi);
 1480         if (UseLoadInstructionsForStackBangingPPC64) {
 1481           __ ld(R0, lo, tmp);
 1482         } else {
 1483           __ std(R0, lo, tmp);
 1484         }
 1485       } else {
 1486         ShouldNotReachHere();
 1487       }
 1488 
 1489       bang_offset += page_size;
 1490     }
 1491     // R11 trashed
 1492   } // C->output()->need_stack_bang(framesize)
 1493 
 1494   unsigned int bytes = (unsigned int)framesize;
 1495   long offset = Assembler::align_addr(bytes, frame::alignment_in_bytes);
 1496   ciMethod *currMethod = C->method();
 1497 
 1498   if (!method_is_frameless) {
 1499     // Get callers sp.
 1500     __ mr(callers_sp, R1_SP);
 1501 
 1502     // Push method's frame, modifies SP.
 1503     assert(Assembler::is_uimm(framesize, 32U), "wrong type");
 1504     // The ABI is already accounted for in 'framesize' via the
 1505     // 'out_preserve' area.
 1506     Register tmp = push_frame_temp;
 1507     // Had to insert code of push_frame((unsigned int)framesize, push_frame_temp).
 1508     if (Assembler::is_simm(-offset, 16)) {
 1509       __ stdu(R1_SP, -offset, R1_SP);
 1510     } else {
 1511       long x = -offset;
 1512       // Had to insert load_const(tmp, -offset).
 1513       __ lis( tmp, (int)((signed short)(((x >> 32) & 0xffff0000) >> 16)));
 1514       __ ori( tmp, tmp, ((x >> 32) & 0x0000ffff));
 1515       __ sldi(tmp, tmp, 32);
 1516       __ oris(tmp, tmp, (x & 0xffff0000) >> 16);
 1517       __ ori( tmp, tmp, (x & 0x0000ffff));
 1518 
 1519       __ stdux(R1_SP, R1_SP, tmp);
 1520     }
 1521   }
 1522 #if 0 // TODO: PPC port
 1523   // For testing large constant pools, emit a lot of constants to constant pool.
 1524   // "Randomize" const_size.
 1525   if (ConstantsALot) {
 1526     const int num_consts = const_size();
 1527     for (int i = 0; i < num_consts; i++) {
 1528       __ long_constant(0xB0B5B00BBABE);
 1529     }
 1530   }
 1531 #endif
 1532   if (!method_is_frameless) {
 1533     // Save return pc.
 1534     __ std(return_pc, _abi0(lr), callers_sp);
 1535   }
 1536 
 1537   if (C->stub_function() == nullptr) {
 1538     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1539     bs->nmethod_entry_barrier(masm, push_frame_temp);
 1540   }
 1541 
 1542   C->output()->set_frame_complete(__ offset());
 1543 }
 1544 
 1545 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 1546   // Variable size. determine dynamically.
 1547   return MachNode::size(ra_);
 1548 }
 1549 
 1550 int MachPrologNode::reloc() const {
 1551   // Return number of relocatable values contained in this instruction.
 1552   return 1; // 1 reloc entry for load_const(toc).
 1553 }
 1554 
 1555 //=============================================================================
 1556 
 1557 #ifndef PRODUCT
 1558 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1559   Compile* C = ra_->C;
 1560 
 1561   st->print("EPILOG\n\t");
 1562   st->print("restore return pc\n\t");
 1563   st->print("pop frame\n\t");
 1564 
 1565   if (do_polling() && C->is_method_compilation()) {
 1566     st->print("safepoint poll\n\t");
 1567   }
 1568 }
 1569 #endif
 1570 
 1571 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1572   Compile* C = ra_->C;
 1573 
 1574   const long framesize = ((long)C->output()->frame_slots()) << LogBytesPerInt;
 1575   assert(framesize >= 0, "negative frame-size?");
 1576 
 1577   const bool method_needs_polling = do_polling() && C->is_method_compilation();
 1578   const bool method_is_frameless  = false /* TODO: PPC port C->is_frameless_method()*/;
 1579   const Register return_pc        = R31;  // Must survive C-call to enable_stack_reserved_zone().
 1580   const Register temp             = R12;
 1581 
 1582   if (!method_is_frameless) {
 1583     // Restore return pc relative to callers' sp.
 1584     __ ld(return_pc, ((int)framesize) + _abi0(lr), R1_SP);
 1585     // Move return pc to LR.
 1586     __ mtlr(return_pc);
 1587     // Pop frame (fixed frame-size).
 1588     __ addi(R1_SP, R1_SP, (int)framesize);
 1589   }
 1590 
 1591   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1592     __ reserved_stack_check(return_pc);
 1593   }
 1594 
 1595   if (method_needs_polling) {
 1596     Label dummy_label;
 1597     Label* code_stub = &dummy_label;
 1598     if (!UseSIGTRAP && !C->output()->in_scratch_emit_size()) {
 1599       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1600       C->output()->add_stub(stub);
 1601       code_stub = &stub->entry();
 1602       __ relocate(relocInfo::poll_return_type);
 1603     }
 1604     __ safepoint_poll(*code_stub, temp, true /* at_return */, true /* in_nmethod */);
 1605   }
 1606 }
 1607 
 1608 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 1609   // Variable size. Determine dynamically.
 1610   return MachNode::size(ra_);
 1611 }
 1612 
 1613 int MachEpilogNode::reloc() const {
 1614   // Return number of relocatable values contained in this instruction.
 1615   return 1; // 1 for load_from_polling_page.
 1616 }
 1617 
 1618 const Pipeline * MachEpilogNode::pipeline() const {
 1619   return MachNode::pipeline_class();
 1620 }
 1621 
 1622 // =============================================================================
 1623 
 1624 // Figure out which register class each belongs in: rc_int, rc_float, rc_vec or
 1625 // rc_stack.
 1626 enum RC { rc_bad, rc_int, rc_float, rc_vec, rc_stack };
 1627 
 1628 static enum RC rc_class(OptoReg::Name reg) {
 1629   // Return the register class for the given register. The given register
 1630   // reg is a <register>_num value, which is an index into the MachRegisterNumbers
 1631   // enumeration in adGlobals_ppc.hpp.
 1632 
 1633   if (reg == OptoReg::Bad) return rc_bad;
 1634 
 1635   // We have 64 integer register halves, starting at index 0.
 1636   STATIC_ASSERT((int)ConcreteRegisterImpl::max_gpr == (int)MachRegisterNumbers::F0_num);
 1637   if (reg < ConcreteRegisterImpl::max_gpr) return rc_int;
 1638 
 1639   // We have 64 floating-point register halves, starting at index 64.
 1640   STATIC_ASSERT((int)ConcreteRegisterImpl::max_fpr == (int)MachRegisterNumbers::VR0_num);
 1641   if (reg < ConcreteRegisterImpl::max_fpr) return rc_float;
 1642 
 1643   // We have 64 vector-scalar registers, starting at index 128.
 1644   STATIC_ASSERT((int)ConcreteRegisterImpl::max_vr == (int)MachRegisterNumbers::CR0_num);
 1645   if (reg < ConcreteRegisterImpl::max_vr) return rc_vec;
 1646 
 1647   // Condition and special purpose registers are not allocated. We only accept stack from here.
 1648   assert(OptoReg::is_stack(reg), "what else is it?");
 1649   return rc_stack;
 1650 }
 1651 
 1652 static int ld_st_helper(C2_MacroAssembler *masm, const char *op_str, uint opcode, int reg, int offset,
 1653                         bool do_print, Compile* C, outputStream *st) {
 1654 
 1655   assert(opcode == Assembler::LD_OPCODE   ||
 1656          opcode == Assembler::STD_OPCODE  ||
 1657          opcode == Assembler::LWZ_OPCODE  ||
 1658          opcode == Assembler::STW_OPCODE  ||
 1659          opcode == Assembler::LFD_OPCODE  ||
 1660          opcode == Assembler::STFD_OPCODE ||
 1661          opcode == Assembler::LFS_OPCODE  ||
 1662          opcode == Assembler::STFS_OPCODE,
 1663          "opcode not supported");
 1664 
 1665   if (masm) {
 1666     int d =
 1667       (Assembler::LD_OPCODE == opcode || Assembler::STD_OPCODE == opcode) ?
 1668         Assembler::ds(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/)
 1669       : Assembler::d1(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/); // Makes no difference in opt build.
 1670     emit_long(masm, opcode | Assembler::rt(Matcher::_regEncode[reg]) | d | Assembler::ra(R1_SP));
 1671   }
 1672 #ifndef PRODUCT
 1673   else if (do_print) {
 1674     st->print("%-7s %s, [R1_SP + #%d+%d] \t// spill copy",
 1675               op_str,
 1676               Matcher::regName[reg],
 1677               offset, 0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/);
 1678   }
 1679 #endif
 1680   return 4; // size
 1681 }
 1682 
 1683 uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
 1684   Compile* C = ra_->C;
 1685 
 1686   // Get registers to move.
 1687   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
 1688   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
 1689   OptoReg::Name dst_hi = ra_->get_reg_second(this);
 1690   OptoReg::Name dst_lo = ra_->get_reg_first(this);
 1691 
 1692   enum RC src_hi_rc = rc_class(src_hi);
 1693   enum RC src_lo_rc = rc_class(src_lo);
 1694   enum RC dst_hi_rc = rc_class(dst_hi);
 1695   enum RC dst_lo_rc = rc_class(dst_lo);
 1696 
 1697   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
 1698   if (src_hi != OptoReg::Bad)
 1699     assert((src_lo&1)==0 && src_lo+1==src_hi &&
 1700            (dst_lo&1)==0 && dst_lo+1==dst_hi,
 1701            "expected aligned-adjacent pairs");
 1702   // Generate spill code!
 1703   int size = 0;
 1704 
 1705   if (src_lo == dst_lo && src_hi == dst_hi)
 1706     return size;            // Self copy, no move.
 1707 
 1708   if (bottom_type()->isa_vect() != nullptr && ideal_reg() == Op_VecX) {
 1709     int src_offset = ra_->reg2offset(src_lo);
 1710     int dst_offset = ra_->reg2offset(dst_lo);
 1711     DEBUG_ONLY(int algm = MIN2(RegMask::num_registers(ideal_reg()), (int)Matcher::stack_alignment_in_slots()) * VMRegImpl::stack_slot_size);
 1712     assert((src_lo_rc != rc_stack) || is_aligned(src_offset, algm), "unaligned vector spill sp offset %d (src)", src_offset);
 1713     assert((dst_lo_rc != rc_stack) || is_aligned(dst_offset, algm), "unaligned vector spill sp offset %d (dst)", dst_offset);
 1714     // Memory->Memory Spill.
 1715     if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
 1716       if (masm) {
 1717         __ ld(R0, src_offset, R1_SP);
 1718         __ std(R0, dst_offset, R1_SP);
 1719         __ ld(R0, src_offset+8, R1_SP);
 1720         __ std(R0, dst_offset+8, R1_SP);
 1721       }
 1722       size += 16;
 1723 #ifndef PRODUCT
 1724       if (st != nullptr) {
 1725         st->print("%-7s [R1_SP + #%d] -> [R1_SP + #%d] \t// vector spill copy", "SPILL", src_offset, dst_offset);
 1726       }
 1727 #endif // !PRODUCT
 1728     }
 1729     // VectorRegister->Memory Spill.
 1730     else if (src_lo_rc == rc_vec && dst_lo_rc == rc_stack) {
 1731       VectorSRegister Rsrc = as_VectorRegister(Matcher::_regEncode[src_lo]).to_vsr();
 1732       if (masm) {
 1733         __ stxv(Rsrc, dst_offset, R1_SP); // matches storeV16
 1734       }
 1735       size += 4;
 1736 #ifndef PRODUCT
 1737       if (st != nullptr) {
 1738         st->print("%-7s %s, [R1_SP + #%d] \t// vector spill copy", "STXV", Matcher::regName[src_lo], dst_offset);
 1739       }
 1740 #endif // !PRODUCT
 1741     }
 1742     // Memory->VectorRegister Spill.
 1743     else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vec) {
 1744       VectorSRegister Rdst = as_VectorRegister(Matcher::_regEncode[dst_lo]).to_vsr();
 1745       if (masm) {
 1746         __ lxv(Rdst, src_offset, R1_SP);
 1747       }
 1748       size += 4;
 1749 #ifndef PRODUCT
 1750       if (st != nullptr) {
 1751         st->print("%-7s %s, [R1_SP + #%d] \t// vector spill copy", "LXV", Matcher::regName[dst_lo], src_offset);
 1752       }
 1753 #endif // !PRODUCT
 1754     }
 1755     // VectorRegister->VectorRegister.
 1756     else if (src_lo_rc == rc_vec && dst_lo_rc == rc_vec) {
 1757       VectorSRegister Rsrc = as_VectorRegister(Matcher::_regEncode[src_lo]).to_vsr();
 1758       VectorSRegister Rdst = as_VectorRegister(Matcher::_regEncode[dst_lo]).to_vsr();
 1759       if (masm) {
 1760         __ xxlor(Rdst, Rsrc, Rsrc);
 1761       }
 1762       size += 4;
 1763 #ifndef PRODUCT
 1764       if (st != nullptr) {
 1765         st->print("%-7s %s, %s, %s\t// vector spill copy",
 1766                   "XXLOR", Matcher::regName[dst_lo], Matcher::regName[src_lo], Matcher::regName[src_lo]);
 1767       }
 1768 #endif // !PRODUCT
 1769     }
 1770     else {
 1771       ShouldNotReachHere(); // No VR spill.
 1772     }
 1773     return size;
 1774   }
 1775 
 1776   // --------------------------------------
 1777   // Memory->Memory Spill. Use R0 to hold the value.
 1778   if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
 1779     int src_offset = ra_->reg2offset(src_lo);
 1780     int dst_offset = ra_->reg2offset(dst_lo);
 1781     if (src_hi != OptoReg::Bad) {
 1782       assert(src_hi_rc==rc_stack && dst_hi_rc==rc_stack,
 1783              "expected same type of move for high parts");
 1784       size += ld_st_helper(masm, "LD  ", Assembler::LD_OPCODE,  R0_num, src_offset, !do_size, C, st);
 1785       if (!masm && !do_size) st->print("\n\t");
 1786       size += ld_st_helper(masm, "STD ", Assembler::STD_OPCODE, R0_num, dst_offset, !do_size, C, st);
 1787     } else {
 1788       size += ld_st_helper(masm, "LWZ ", Assembler::LWZ_OPCODE, R0_num, src_offset, !do_size, C, st);
 1789       if (!masm && !do_size) st->print("\n\t");
 1790       size += ld_st_helper(masm, "STW ", Assembler::STW_OPCODE, R0_num, dst_offset, !do_size, C, st);
 1791     }
 1792     return size;
 1793   }
 1794 
 1795   // --------------------------------------
 1796   // Check for float->int copy; requires a trip through memory.
 1797   if (src_lo_rc == rc_float && dst_lo_rc == rc_int) {
 1798     Unimplemented();
 1799   }
 1800 
 1801   // --------------------------------------
 1802   // Check for integer reg-reg copy.
 1803   if (src_lo_rc == rc_int && dst_lo_rc == rc_int) {
 1804       Register Rsrc = as_Register(Matcher::_regEncode[src_lo]);
 1805       Register Rdst = as_Register(Matcher::_regEncode[dst_lo]);
 1806       size = (Rsrc != Rdst) ? 4 : 0;
 1807 
 1808       if (masm) {
 1809         if (size) {
 1810           __ mr(Rdst, Rsrc);
 1811         }
 1812       }
 1813 #ifndef PRODUCT
 1814       else if (!do_size) {
 1815         if (size) {
 1816           st->print("%-7s %s, %s \t// spill copy", "MR", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 1817         } else {
 1818           st->print("%-7s %s, %s \t// spill copy", "MR-NOP", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 1819         }
 1820       }
 1821 #endif
 1822       return size;
 1823   }
 1824 
 1825   // Check for integer store.
 1826   if (src_lo_rc == rc_int && dst_lo_rc == rc_stack) {
 1827     int dst_offset = ra_->reg2offset(dst_lo);
 1828     if (src_hi != OptoReg::Bad) {
 1829       assert(src_hi_rc==rc_int && dst_hi_rc==rc_stack,
 1830              "expected same type of move for high parts");
 1831       size += ld_st_helper(masm, "STD ", Assembler::STD_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1832     } else {
 1833       size += ld_st_helper(masm, "STW ", Assembler::STW_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1834     }
 1835     return size;
 1836   }
 1837 
 1838   // Check for integer load.
 1839   if (dst_lo_rc == rc_int && src_lo_rc == rc_stack) {
 1840     int src_offset = ra_->reg2offset(src_lo);
 1841     if (src_hi != OptoReg::Bad) {
 1842       assert(dst_hi_rc==rc_int && src_hi_rc==rc_stack,
 1843              "expected same type of move for high parts");
 1844       size += ld_st_helper(masm, "LD  ", Assembler::LD_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1845     } else {
 1846       size += ld_st_helper(masm, "LWZ ", Assembler::LWZ_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1847     }
 1848     return size;
 1849   }
 1850 
 1851   // Check for float reg-reg copy.
 1852   if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
 1853     if (masm) {
 1854       FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
 1855       FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
 1856       __ fmr(Rdst, Rsrc);
 1857     }
 1858 #ifndef PRODUCT
 1859     else if (!do_size) {
 1860       st->print("%-7s %s, %s \t// spill copy", "FMR", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 1861     }
 1862 #endif
 1863     return 4;
 1864   }
 1865 
 1866   // Check for float store.
 1867   if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
 1868     int dst_offset = ra_->reg2offset(dst_lo);
 1869     if (src_hi != OptoReg::Bad) {
 1870       assert(src_hi_rc==rc_float && dst_hi_rc==rc_stack,
 1871              "expected same type of move for high parts");
 1872       size += ld_st_helper(masm, "STFD", Assembler::STFD_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1873     } else {
 1874       size += ld_st_helper(masm, "STFS", Assembler::STFS_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1875     }
 1876     return size;
 1877   }
 1878 
 1879   // Check for float load.
 1880   if (dst_lo_rc == rc_float && src_lo_rc == rc_stack) {
 1881     int src_offset = ra_->reg2offset(src_lo);
 1882     if (src_hi != OptoReg::Bad) {
 1883       assert(dst_hi_rc==rc_float && src_hi_rc==rc_stack,
 1884              "expected same type of move for high parts");
 1885       size += ld_st_helper(masm, "LFD ", Assembler::LFD_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1886     } else {
 1887       size += ld_st_helper(masm, "LFS ", Assembler::LFS_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1888     }
 1889     return size;
 1890   }
 1891 
 1892   // --------------------------------------------------------------------
 1893   // Check for hi bits still needing moving. Only happens for misaligned
 1894   // arguments to native calls.
 1895   if (src_hi == dst_hi)
 1896     return size;               // Self copy; no move.
 1897 
 1898   assert(src_hi_rc != rc_bad && dst_hi_rc != rc_bad, "src_hi & dst_hi cannot be Bad");
 1899   ShouldNotReachHere(); // Unimplemented
 1900   return 0;
 1901 }
 1902 
 1903 #ifndef PRODUCT
 1904 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1905   if (!ra_)
 1906     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
 1907   else
 1908     implementation(nullptr, ra_, false, st);
 1909 }
 1910 #endif
 1911 
 1912 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1913   implementation(masm, ra_, false, nullptr);
 1914 }
 1915 
 1916 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1917   return implementation(nullptr, ra_, true, nullptr);
 1918 }
 1919 
 1920 #ifndef PRODUCT
 1921 void MachNopNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1922   st->print("NOP \t// %d nops to pad for loops or prefixed instructions.", _count);
 1923 }
 1924 #endif
 1925 
 1926 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *) const {
 1927   // _count contains the number of nops needed for padding.
 1928   for (int i = 0; i < _count; i++) {
 1929     __ nop();
 1930   }
 1931 }
 1932 
 1933 uint MachNopNode::size(PhaseRegAlloc *ra_) const {
 1934   return _count * 4;
 1935 }
 1936 
 1937 #ifndef PRODUCT
 1938 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1939   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1940   char reg_str[128];
 1941   ra_->dump_register(this, reg_str, sizeof(reg_str));
 1942   st->print("ADDI    %s, SP, %d \t// box node", reg_str, offset);
 1943 }
 1944 #endif
 1945 
 1946 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1947   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1948   int reg    = ra_->get_encode(this);
 1949 
 1950   if (Assembler::is_simm(offset, 16)) {
 1951     __ addi(as_Register(reg), R1, offset);
 1952   } else {
 1953     ShouldNotReachHere();
 1954   }
 1955 }
 1956 
 1957 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1958   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 1959   return 4;
 1960 }
 1961 
 1962 #ifndef PRODUCT
 1963 void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1964   st->print_cr("---- MachUEPNode ----");
 1965   st->print_cr("...");
 1966 }
 1967 #endif
 1968 
 1969 void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1970   // This is the unverified entry point.
 1971   __ ic_check(CodeEntryAlignment);
 1972   // Argument is valid and klass is as expected, continue.
 1973 }
 1974 
 1975 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1976   // Variable size. Determine dynamically.
 1977   return MachNode::size(ra_);
 1978 }
 1979 
 1980 //=============================================================================
 1981 
 1982 %} // interrupt source
 1983 
 1984 source_hpp %{ // Header information of the source block.
 1985 
 1986 class HandlerImpl {
 1987 
 1988  public:
 1989 
 1990   static int emit_deopt_handler(C2_MacroAssembler* masm);
 1991 
 1992   static uint size_deopt_handler() {
 1993     // The deopt_handler is a bl64_patchable.
 1994     return MacroAssembler::bl64_patchable_size + BytesPerInstWord;
 1995   }
 1996 
 1997 };
 1998 
 1999 class Node::PD {
 2000 public:
 2001   enum NodeFlags {
 2002     _last_flag = Node::_last_flag
 2003   };
 2004 };
 2005 
 2006 %} // end source_hpp
 2007 
 2008 source %{
 2009 
 2010 // The deopt_handler is like the exception handler, but it calls to
 2011 // the deoptimization blob instead of jumping to the exception blob.
 2012 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2013   address base = __ start_a_stub(size_deopt_handler());
 2014   if (base == nullptr) {
 2015     ciEnv::current()->record_failure("CodeCache is full");
 2016     return 0;  // CodeBuffer::expand failed
 2017   }
 2018 
 2019   int offset = __ offset();
 2020 
 2021   Label start;
 2022   __ bind(start);
 2023 
 2024   __ bl64_patchable((address)SharedRuntime::deopt_blob()->unpack(),
 2025                         relocInfo::runtime_call_type);
 2026 
 2027   int entry_offset = __ offset();
 2028 
 2029   __ b(start);
 2030 
 2031   assert(__ offset() - offset == (int) size_deopt_handler(), "must be fixed size");
 2032   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2033          "out of bounds read in post-call NOP check");
 2034   __ end_a_stub();
 2035 
 2036   return entry_offset;
 2037 }
 2038 
 2039 //=============================================================================
 2040 
 2041 // Use a frame slots bias for frameless methods if accessing the stack.
 2042 static int frame_slots_bias(int reg_enc, PhaseRegAlloc* ra_) {
 2043   if (as_Register(reg_enc) == R1_SP) {
 2044     return 0; // TODO: PPC port ra_->C->frame_slots_sp_bias_in_bytes();
 2045   }
 2046   return 0;
 2047 }
 2048 
 2049 bool Matcher::match_rule_supported(int opcode) {
 2050   if (!has_match_rule(opcode)) {
 2051     return false; // no match rule present
 2052   }
 2053 
 2054   switch (opcode) {
 2055     case Op_CountLeadingZerosI:
 2056     case Op_CountLeadingZerosL:
 2057       return UseCountLeadingZerosInstructionsPPC64;
 2058     case Op_CountTrailingZerosI:
 2059     case Op_CountTrailingZerosL:
 2060       return (UseCountLeadingZerosInstructionsPPC64 || UseCountTrailingZerosInstructionsPPC64);
 2061     case Op_PopCountI:
 2062     case Op_PopCountL:
 2063       return UsePopCountInstruction;
 2064     case Op_ConvF2HF:
 2065     case Op_ConvHF2F:
 2066       return VM_Version::supports_float16();
 2067     case Op_AddVB:
 2068     case Op_AddVS:
 2069     case Op_AddVI:
 2070     case Op_AddVF:
 2071     case Op_AddVD:
 2072     case Op_SubVB:
 2073     case Op_SubVS:
 2074     case Op_SubVI:
 2075     case Op_SubVF:
 2076     case Op_SubVD:
 2077     case Op_MulVS:
 2078     case Op_MulVF:
 2079     case Op_MulVD:
 2080     case Op_DivVF:
 2081     case Op_DivVD:
 2082     case Op_AbsVF:
 2083     case Op_AbsVD:
 2084     case Op_NegVI:
 2085     case Op_NegVF:
 2086     case Op_NegVD:
 2087     case Op_SqrtVF:
 2088     case Op_SqrtVD:
 2089     case Op_AddVL:
 2090     case Op_SubVL:
 2091     case Op_MulVI:
 2092     case Op_RoundDoubleModeV:
 2093     case Op_MinV:
 2094     case Op_MaxV:
 2095     case Op_UMinV:
 2096     case Op_UMaxV:
 2097     case Op_AndV:
 2098     case Op_OrV:
 2099     case Op_XorV:
 2100     case Op_AddReductionVI:
 2101     case Op_MulReductionVI:
 2102     case Op_AndReductionV:
 2103     case Op_OrReductionV:
 2104     case Op_XorReductionV:
 2105     case Op_MinReductionV:
 2106     case Op_MaxReductionV:
 2107       return SuperwordUseVSX;
 2108     case Op_PopCountVI:
 2109     case Op_PopCountVL:
 2110       return (SuperwordUseVSX && UsePopCountInstruction);
 2111     case Op_CountLeadingZerosV:
 2112       return SuperwordUseVSX && UseCountLeadingZerosInstructionsPPC64;
 2113     case Op_CountTrailingZerosV:
 2114       return SuperwordUseVSX && UseCountTrailingZerosInstructionsPPC64;
 2115     case Op_FmaF:
 2116     case Op_FmaD:
 2117       return UseFMA;
 2118     case Op_FmaVF:
 2119     case Op_FmaVD:
 2120       return (SuperwordUseVSX && UseFMA);
 2121 
 2122     case Op_MinF:
 2123     case Op_MaxF:
 2124     case Op_MinD:
 2125     case Op_MaxD:
 2126       return (PowerArchitecturePPC64 >= 9);
 2127 
 2128     case Op_Digit:
 2129       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isDigit);
 2130     case Op_LowerCase:
 2131       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isLowerCase);
 2132     case Op_UpperCase:
 2133       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isUpperCase);
 2134     case Op_Whitespace:
 2135       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isWhitespace);
 2136 
 2137     case Op_CacheWB:
 2138     case Op_CacheWBPreSync:
 2139     case Op_CacheWBPostSync:
 2140       return VM_Version::supports_data_cache_line_flush();
 2141 
 2142     case Op_OnSpinWait:
 2143       return VM_Version::supports_on_spin_wait();
 2144   }
 2145 
 2146   return true; // Per default match rules are supported.
 2147 }
 2148 
 2149 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 2150   return match_rule_supported_vector(opcode, vlen, bt);
 2151 }
 2152 
 2153 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 2154   if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
 2155     return false;
 2156   }
 2157   // Special cases
 2158   switch (opcode) {
 2159     // Reductions only support INT at the moment.
 2160     case Op_AddReductionVI:
 2161     case Op_MulReductionVI:
 2162     case Op_AndReductionV:
 2163     case Op_OrReductionV:
 2164     case Op_XorReductionV:
 2165     case Op_MinReductionV:
 2166     case Op_MaxReductionV:
 2167       return bt == T_INT;
 2168     // MaxV, MinV need types == INT || LONG.
 2169     case Op_MaxV:
 2170     case Op_MinV:
 2171     case Op_UMinV:
 2172     case Op_UMaxV:
 2173       return bt == T_INT || bt == T_LONG;
 2174     case Op_NegVI:
 2175       return bt == T_INT;
 2176   }
 2177   return true; // Per default match rules are supported.
 2178 }
 2179 
 2180 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 2181   return false;
 2182 }
 2183 
 2184 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 2185   return false;
 2186 }
 2187 
 2188 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 2189   return false;
 2190 }
 2191 
 2192 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 2193   return false;
 2194 }
 2195 
 2196 const RegMask* Matcher::predicate_reg_mask(void) {
 2197   return nullptr;
 2198 }
 2199 
 2200 // Vector calling convention not yet implemented.
 2201 bool Matcher::supports_vector_calling_convention(void) {
 2202   return false;
 2203 }
 2204 
 2205 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2206   Unimplemented();
 2207   return OptoRegPair(0, 0);
 2208 }
 2209 
 2210 // Vector width in bytes.
 2211 int Matcher::vector_width_in_bytes(BasicType bt) {
 2212   if (SuperwordUseVSX) {
 2213     assert(MaxVectorSize == 16,
 2214            "SuperwordUseVSX requires MaxVectorSize 16, got " INT64_FORMAT, (int64_t)MaxVectorSize);
 2215     return 16;
 2216   } else {
 2217     assert(MaxVectorSize == 8,
 2218            "expected MaxVectorSize 8, got " INT64_FORMAT, (int64_t)MaxVectorSize);
 2219     return 8;
 2220   }
 2221 }
 2222 
 2223 // Vector ideal reg.
 2224 uint Matcher::vector_ideal_reg(int size) {
 2225   if (SuperwordUseVSX) {
 2226     assert(MaxVectorSize == 16 && size == 16,
 2227            "SuperwordUseVSX requires MaxVectorSize 16 and size 16, got MaxVectorSize=" INT64_FORMAT ", size=%d",
 2228            (int64_t)MaxVectorSize, size);
 2229     return Op_VecX;
 2230   } else {
 2231     assert(MaxVectorSize == 8 && size == 8,
 2232            "expected MaxVectorSize 8 and size 8, got MaxVectorSize=" INT64_FORMAT ", size=%d",
 2233            (int64_t)MaxVectorSize, size);
 2234     return Op_RegL;
 2235   }
 2236 }
 2237 
 2238 // Limits on vector size (number of elements) loaded into vector.
 2239 int Matcher::max_vector_size(const BasicType bt) {
 2240   assert(is_java_primitive(bt), "only primitive type vectors");
 2241   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 2242 }
 2243 
 2244 int Matcher::min_vector_size(const BasicType bt) {
 2245   return max_vector_size(bt); // Same as max.
 2246 }
 2247 
 2248 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 2249   return Matcher::max_vector_size(bt);
 2250 }
 2251 
 2252 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 2253   return -1;
 2254 }
 2255 
 2256 // RETURNS: whether this branch offset is short enough that a short
 2257 // branch can be used.
 2258 //
 2259 // If the platform does not provide any short branch variants, then
 2260 // this method should return `false' for offset 0.
 2261 //
 2262 // `Compile::Fill_buffer' will decide on basis of this information
 2263 // whether to do the pass `Compile::Shorten_branches' at all.
 2264 //
 2265 // And `Compile::Shorten_branches' will decide on basis of this
 2266 // information whether to replace particular branch sites by short
 2267 // ones.
 2268 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2269   // Is the offset within the range of a ppc64 pc relative branch?
 2270   bool b;
 2271 
 2272   const int safety_zone = 3 * BytesPerInstWord;
 2273   b = Assembler::is_simm((offset<0 ? offset-safety_zone : offset+safety_zone),
 2274                          29 - 16 + 1 + 2);
 2275   return b;
 2276 }
 2277 
 2278 /* TODO: PPC port
 2279 // Make a new machine dependent decode node (with its operands).
 2280 MachTypeNode *Matcher::make_decode_node() {
 2281   assert(CompressedOops::base() == nullptr && CompressedOops::shift() == 0,
 2282          "This method is only implemented for unscaled cOops mode so far");
 2283   MachTypeNode *decode = new decodeN_unscaledNode();
 2284   decode->set_opnd_array(0, new iRegPdstOper());
 2285   decode->set_opnd_array(1, new iRegNsrcOper());
 2286   return decode;
 2287 }
 2288 */
 2289 
 2290 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
 2291   ShouldNotReachHere(); // generic vector operands not supported
 2292   return nullptr;
 2293 }
 2294 
 2295 bool Matcher::is_reg2reg_move(MachNode* m) {
 2296   ShouldNotReachHere();  // generic vector operands not supported
 2297   return false;
 2298 }
 2299 
 2300 bool Matcher::is_register_biasing_candidate(const MachNode* mdef, int oper_index) {
 2301   return false;
 2302 }
 2303 
 2304 bool Matcher::is_generic_vector(MachOper* opnd)  {
 2305   ShouldNotReachHere();  // generic vector operands not supported
 2306   return false;
 2307 }
 2308 
 2309 #ifdef ASSERT
 2310 // Return whether or not this register is ever used as an argument.
 2311 bool Matcher::can_be_java_arg(int reg) {
 2312   // We must include the virtual halves in order to get STDs and LDs
 2313   // instead of STWs and LWs in the trampoline stubs.
 2314 
 2315   if (   reg == R3_num  || reg == R3_H_num
 2316       || reg == R4_num  || reg == R4_H_num
 2317       || reg == R5_num  || reg == R5_H_num
 2318       || reg == R6_num  || reg == R6_H_num
 2319       || reg == R7_num  || reg == R7_H_num
 2320       || reg == R8_num  || reg == R8_H_num
 2321       || reg == R9_num  || reg == R9_H_num
 2322       || reg == R10_num || reg == R10_H_num)
 2323     return true;
 2324 
 2325   if (   reg == F1_num  || reg == F1_H_num
 2326       || reg == F2_num  || reg == F2_H_num
 2327       || reg == F3_num  || reg == F3_H_num
 2328       || reg == F4_num  || reg == F4_H_num
 2329       || reg == F5_num  || reg == F5_H_num
 2330       || reg == F6_num  || reg == F6_H_num
 2331       || reg == F7_num  || reg == F7_H_num
 2332       || reg == F8_num  || reg == F8_H_num
 2333       || reg == F9_num  || reg == F9_H_num
 2334       || reg == F10_num || reg == F10_H_num
 2335       || reg == F11_num || reg == F11_H_num
 2336       || reg == F12_num || reg == F12_H_num
 2337       || reg == F13_num || reg == F13_H_num)
 2338     return true;
 2339 
 2340   return false;
 2341 }
 2342 #endif
 2343 
 2344 uint Matcher::int_pressure_limit()
 2345 {
 2346   return (INTPRESSURE == -1) ? 26 : INTPRESSURE;
 2347 }
 2348 
 2349 uint Matcher::float_pressure_limit()
 2350 {
 2351   return (FLOATPRESSURE == -1) ? 28 : FLOATPRESSURE;
 2352 }
 2353 
 2354 // Register for DIVI projection of divmodI.
 2355 const RegMask& Matcher::divI_proj_mask() {
 2356   ShouldNotReachHere();
 2357   return RegMask::EMPTY;
 2358 }
 2359 
 2360 // Register for MODI projection of divmodI.
 2361 const RegMask& Matcher::modI_proj_mask() {
 2362   ShouldNotReachHere();
 2363   return RegMask::EMPTY;
 2364 }
 2365 
 2366 // Register for DIVL projection of divmodL.
 2367 const RegMask& Matcher::divL_proj_mask() {
 2368   ShouldNotReachHere();
 2369   return RegMask::EMPTY;
 2370 }
 2371 
 2372 // Register for MODL projection of divmodL.
 2373 const RegMask& Matcher::modL_proj_mask() {
 2374   ShouldNotReachHere();
 2375   return RegMask::EMPTY;
 2376 }
 2377 
 2378 %}
 2379 
 2380 //----------ENCODING BLOCK-----------------------------------------------------
 2381 // This block specifies the encoding classes used by the compiler to output
 2382 // byte streams. Encoding classes are parameterized macros used by
 2383 // Machine Instruction Nodes in order to generate the bit encoding of the
 2384 // instruction. Operands specify their base encoding interface with the
 2385 // interface keyword. There are currently supported four interfaces,
 2386 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
 2387 // operand to generate a function which returns its register number when
 2388 // queried. CONST_INTER causes an operand to generate a function which
 2389 // returns the value of the constant when queried. MEMORY_INTER causes an
 2390 // operand to generate four functions which return the Base Register, the
 2391 // Index Register, the Scale Value, and the Offset Value of the operand when
 2392 // queried. COND_INTER causes an operand to generate six functions which
 2393 // return the encoding code (ie - encoding bits for the instruction)
 2394 // associated with each basic boolean condition for a conditional instruction.
 2395 //
 2396 // Instructions specify two basic values for encoding. Again, a function
 2397 // is available to check if the constant displacement is an oop. They use the
 2398 // ins_encode keyword to specify their encoding classes (which must be
 2399 // a sequence of enc_class names, and their parameters, specified in
 2400 // the encoding block), and they use the
 2401 // opcode keyword to specify, in order, their primary, secondary, and
 2402 // tertiary opcode. Only the opcode sections which a particular instruction
 2403 // needs for encoding need to be specified.
 2404 encode %{
 2405   enc_class enc_unimplemented %{
 2406     __ unimplemented("Unimplemented mach node encoding in AD file.", 13);
 2407   %}
 2408 
 2409   enc_class enc_untested %{
 2410 #ifdef ASSERT
 2411     __ untested("Untested mach node encoding in AD file.");
 2412 #else
 2413 #endif
 2414   %}
 2415 
 2416   enc_class enc_lbz(iRegIdst dst, memory mem) %{
 2417     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2418     __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
 2419   %}
 2420 
 2421   // Load acquire.
 2422   enc_class enc_lbz_ac(iRegIdst dst, memory mem) %{
 2423     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2424     __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
 2425     __ twi_0($dst$$Register);
 2426     __ isync();
 2427   %}
 2428 
 2429   enc_class enc_lhz(iRegIdst dst, memory mem) %{
 2430     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2431     __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
 2432   %}
 2433 
 2434   // Load acquire.
 2435   enc_class enc_lhz_ac(iRegIdst dst, memory mem) %{
 2436     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2437     __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
 2438     __ twi_0($dst$$Register);
 2439     __ isync();
 2440   %}
 2441 
 2442   enc_class enc_lwz(iRegIdst dst, memory mem) %{
 2443     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2444     __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
 2445   %}
 2446 
 2447   // Load acquire.
 2448   enc_class enc_lwz_ac(iRegIdst dst, memory mem) %{
 2449     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2450     __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
 2451     __ twi_0($dst$$Register);
 2452     __ isync();
 2453   %}
 2454 
 2455   enc_class enc_ld(iRegLdst dst, memoryAlg4 mem) %{
 2456     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2457     // Operand 'ds' requires 4-alignment.
 2458     assert((Idisp & 0x3) == 0, "unaligned offset");
 2459     __ ld($dst$$Register, Idisp, $mem$$base$$Register);
 2460   %}
 2461 
 2462   // Load acquire.
 2463   enc_class enc_ld_ac(iRegLdst dst, memoryAlg4 mem) %{
 2464     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2465     // Operand 'ds' requires 4-alignment.
 2466     assert((Idisp & 0x3) == 0, "unaligned offset");
 2467     __ ld($dst$$Register, Idisp, $mem$$base$$Register);
 2468     __ twi_0($dst$$Register);
 2469     __ isync();
 2470   %}
 2471 
 2472   enc_class enc_lfd(RegF dst, memory mem) %{
 2473     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2474     __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 2475   %}
 2476 
 2477   enc_class enc_load_long_constL(iRegLdst dst, immL src, iRegLdst toc) %{
 2478     int toc_offset = 0;
 2479 
 2480     address const_toc_addr;
 2481     // Create a non-oop constant, no relocation needed.
 2482     // If it is an IC, it has a virtual_call_Relocation.
 2483     const_toc_addr = __ long_constant((jlong)$src$$constant);
 2484     if (const_toc_addr == nullptr) {
 2485       ciEnv::current()->record_out_of_memory_failure();
 2486       return;
 2487     }
 2488 
 2489     // Get the constant's TOC offset.
 2490     toc_offset = __ offset_to_method_toc(const_toc_addr);
 2491 
 2492     // Keep the current instruction offset in mind.
 2493     ((loadConLNode*)this)->_cbuf_insts_offset = __ offset();
 2494 
 2495     __ ld($dst$$Register, toc_offset, $toc$$Register);
 2496   %}
 2497 
 2498   enc_class enc_load_long_constL_hi(iRegLdst dst, iRegLdst toc, immL src) %{
 2499     if (!ra_->C->output()->in_scratch_emit_size()) {
 2500       address const_toc_addr;
 2501       // Create a non-oop constant, no relocation needed.
 2502       // If it is an IC, it has a virtual_call_Relocation.
 2503       const_toc_addr = __ long_constant((jlong)$src$$constant);
 2504       if (const_toc_addr == nullptr) {
 2505         ciEnv::current()->record_out_of_memory_failure();
 2506         return;
 2507       }
 2508 
 2509       // Get the constant's TOC offset.
 2510       const int toc_offset = __ offset_to_method_toc(const_toc_addr);
 2511       // Store the toc offset of the constant.
 2512       ((loadConL_hiNode*)this)->_const_toc_offset = toc_offset;
 2513 
 2514       // Also keep the current instruction offset in mind.
 2515       ((loadConL_hiNode*)this)->_cbuf_insts_offset = __ offset();
 2516     }
 2517 
 2518     __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset));
 2519   %}
 2520 
 2521 %} // encode
 2522 
 2523 source %{
 2524 
 2525 typedef struct {
 2526   loadConL_hiNode *_large_hi;
 2527   loadConL_loNode *_large_lo;
 2528   loadConLNode    *_small;
 2529   MachNode        *_last;
 2530 } loadConLNodesTuple;
 2531 
 2532 loadConLNodesTuple loadConLNodesTuple_create(PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
 2533                                              OptoReg::Name reg_second, OptoReg::Name reg_first) {
 2534   loadConLNodesTuple nodes;
 2535 
 2536   const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2537   if (large_constant_pool) {
 2538     // Create new nodes.
 2539     loadConL_hiNode *m1 = new loadConL_hiNode();
 2540     loadConL_loNode *m2 = new loadConL_loNode();
 2541 
 2542     // inputs for new nodes
 2543     m1->add_req(nullptr, toc);
 2544     m2->add_req(nullptr, m1);
 2545 
 2546     // operands for new nodes
 2547     m1->_opnds[0] = new iRegLdstOper(); // dst
 2548     m1->_opnds[1] = immSrc;             // src
 2549     m1->_opnds[2] = new iRegLdstOper(); // toc
 2550     m2->_opnds[0] = new iRegLdstOper(); // dst
 2551     m2->_opnds[1] = immSrc;             // src
 2552     m2->_opnds[2] = new iRegLdstOper(); // base
 2553 
 2554     // Initialize ins_attrib TOC fields.
 2555     m1->_const_toc_offset = -1;
 2556     m2->_const_toc_offset_hi_node = m1;
 2557 
 2558     // Initialize ins_attrib instruction offset.
 2559     m1->_cbuf_insts_offset = -1;
 2560 
 2561     // register allocation for new nodes
 2562     ra_->set_pair(m1->_idx, reg_second, reg_first);
 2563     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2564 
 2565     // Create result.
 2566     nodes._large_hi = m1;
 2567     nodes._large_lo = m2;
 2568     nodes._small = nullptr;
 2569     nodes._last = nodes._large_lo;
 2570     assert(m2->bottom_type()->isa_long(), "must be long");
 2571   } else {
 2572     loadConLNode *m2 = new loadConLNode();
 2573 
 2574     // inputs for new nodes
 2575     m2->add_req(nullptr, toc);
 2576 
 2577     // operands for new nodes
 2578     m2->_opnds[0] = new iRegLdstOper(); // dst
 2579     m2->_opnds[1] = immSrc;             // src
 2580     m2->_opnds[2] = new iRegLdstOper(); // toc
 2581 
 2582     // Initialize ins_attrib instruction offset.
 2583     m2->_cbuf_insts_offset = -1;
 2584 
 2585     // register allocation for new nodes
 2586     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2587 
 2588     // Create result.
 2589     nodes._large_hi = nullptr;
 2590     nodes._large_lo = nullptr;
 2591     nodes._small = m2;
 2592     nodes._last = nodes._small;
 2593     assert(m2->bottom_type()->isa_long(), "must be long");
 2594   }
 2595 
 2596   return nodes;
 2597 }
 2598 
 2599 typedef struct {
 2600   loadConL_hiNode *_large_hi;
 2601   loadConL_loNode *_large_lo;
 2602   mtvsrdNode      *_moved;
 2603   xxspltdNode     *_replicated;
 2604   loadConLNode    *_small;
 2605   MachNode        *_last;
 2606 } loadConLReplicatedNodesTuple;
 2607 
 2608 loadConLReplicatedNodesTuple loadConLReplicatedNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
 2609                                                  vecXOper *dst, immI_0Oper *zero,
 2610                                                  OptoReg::Name reg_second, OptoReg::Name reg_first,
 2611                                                  OptoReg::Name reg_vec_second, OptoReg::Name reg_vec_first) {
 2612   loadConLReplicatedNodesTuple nodes;
 2613 
 2614   const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2615   if (large_constant_pool) {
 2616     // Create new nodes.
 2617     loadConL_hiNode *m1 = new  loadConL_hiNode();
 2618     loadConL_loNode *m2 = new  loadConL_loNode();
 2619     mtvsrdNode *m3 = new  mtvsrdNode();
 2620     xxspltdNode *m4 = new  xxspltdNode();
 2621 
 2622     // inputs for new nodes
 2623     m1->add_req(nullptr, toc);
 2624     m2->add_req(nullptr, m1);
 2625     m3->add_req(nullptr, m2);
 2626     m4->add_req(nullptr, m3);
 2627 
 2628     // operands for new nodes
 2629     m1->_opnds[0] = new  iRegLdstOper(); // dst
 2630     m1->_opnds[1] = immSrc;              // src
 2631     m1->_opnds[2] = new  iRegLdstOper(); // toc
 2632 
 2633     m2->_opnds[0] = new  iRegLdstOper(); // dst
 2634     m2->_opnds[1] = immSrc;              // src
 2635     m2->_opnds[2] = new  iRegLdstOper(); // base
 2636 
 2637     m3->_opnds[0] = new  vecXOper();     // dst
 2638     m3->_opnds[1] = new  iRegLdstOper(); // src
 2639 
 2640     m4->_opnds[0] = new  vecXOper();     // dst
 2641     m4->_opnds[1] = new  vecXOper();     // src
 2642     m4->_opnds[2] = zero;
 2643 
 2644     // Initialize ins_attrib TOC fields.
 2645     m1->_const_toc_offset = -1;
 2646     m2->_const_toc_offset_hi_node = m1;
 2647 
 2648     // Initialize ins_attrib instruction offset.
 2649     m1->_cbuf_insts_offset = -1;
 2650 
 2651     // register allocation for new nodes
 2652     ra_->set_pair(m1->_idx, reg_second, reg_first);
 2653     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2654     ra_->set1(m3->_idx, reg_second);
 2655     ra_->set2(m3->_idx, reg_vec_first);
 2656     ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
 2657 
 2658     // Create result.
 2659     nodes._large_hi = m1;
 2660     nodes._large_lo = m2;
 2661     nodes._moved = m3;
 2662     nodes._replicated = m4;
 2663     nodes._small = nullptr;
 2664     nodes._last = nodes._replicated;
 2665     assert(m2->bottom_type()->isa_long(), "must be long");
 2666   } else {
 2667     loadConLNode *m2 = new  loadConLNode();
 2668     mtvsrdNode *m3 = new  mtvsrdNode();
 2669     xxspltdNode *m4 = new  xxspltdNode();
 2670 
 2671     // inputs for new nodes
 2672     m2->add_req(nullptr, toc);
 2673 
 2674     // operands for new nodes
 2675     m2->_opnds[0] = new  iRegLdstOper(); // dst
 2676     m2->_opnds[1] = immSrc;              // src
 2677     m2->_opnds[2] = new  iRegLdstOper(); // toc
 2678 
 2679     m3->_opnds[0] = new  vecXOper();     // dst
 2680     m3->_opnds[1] = new  iRegLdstOper(); // src
 2681 
 2682     m4->_opnds[0] = new  vecXOper();     // dst
 2683     m4->_opnds[1] = new  vecXOper();     // src
 2684     m4->_opnds[2] = zero;
 2685 
 2686     // Initialize ins_attrib instruction offset.
 2687     m2->_cbuf_insts_offset = -1;
 2688     ra_->set1(m3->_idx, reg_second);
 2689     ra_->set2(m3->_idx, reg_vec_first);
 2690     ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
 2691 
 2692     // register allocation for new nodes
 2693     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2694 
 2695     // Create result.
 2696     nodes._large_hi = nullptr;
 2697     nodes._large_lo = nullptr;
 2698     nodes._small = m2;
 2699     nodes._moved = m3;
 2700     nodes._replicated = m4;
 2701     nodes._last = nodes._replicated;
 2702     assert(m2->bottom_type()->isa_long(), "must be long");
 2703   }
 2704 
 2705   return nodes;
 2706 }
 2707 
 2708 %} // source
 2709 
 2710 encode %{
 2711   // Postalloc expand emitter for loading a long constant from the method's TOC.
 2712   // Enc_class needed as consttanttablebase is not supported by postalloc
 2713   // expand.
 2714   enc_class postalloc_expand_load_long_constant(iRegLdst dst, immL src, iRegLdst toc) %{
 2715     // Create new nodes.
 2716     loadConLNodesTuple loadConLNodes =
 2717       loadConLNodesTuple_create(ra_, n_toc, op_src,
 2718                                 ra_->get_reg_second(this), ra_->get_reg_first(this));
 2719 
 2720     // Push new nodes.
 2721     if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi);
 2722     if (loadConLNodes._last)     nodes->push(loadConLNodes._last);
 2723 
 2724     // some asserts
 2725     assert(nodes->length() >= 1, "must have created at least 1 node");
 2726     assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
 2727   %}
 2728 
 2729   enc_class enc_load_long_constP(iRegLdst dst, immP src, iRegLdst toc) %{
 2730     int toc_offset = 0;
 2731 
 2732     intptr_t val = $src$$constant;
 2733     relocInfo::relocType constant_reloc = $src->constant_reloc();  // src
 2734     address const_toc_addr;
 2735     RelocationHolder r; // Initializes type to none.
 2736     if (constant_reloc == relocInfo::oop_type) {
 2737       // Create an oop constant and a corresponding relocation.
 2738       AddressLiteral a = __ constant_oop_address((jobject)val);
 2739       const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2740       r = a.rspec();
 2741     } else if (constant_reloc == relocInfo::metadata_type) {
 2742       // Notify OOP recorder (don't need the relocation)
 2743       AddressLiteral a = __ constant_metadata_address((Metadata *)val);
 2744       const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2745     } else {
 2746       // Create a non-oop constant, no relocation needed.
 2747       const_toc_addr = __ long_constant((jlong)$src$$constant);
 2748     }
 2749 
 2750     if (const_toc_addr == nullptr) {
 2751       ciEnv::current()->record_out_of_memory_failure();
 2752       return;
 2753     }
 2754     __ relocate(r); // If set above.
 2755     // Get the constant's TOC offset.
 2756     toc_offset = __ offset_to_method_toc(const_toc_addr);
 2757 
 2758     __ ld($dst$$Register, toc_offset, $toc$$Register);
 2759   %}
 2760 
 2761   enc_class enc_load_long_constP_hi(iRegLdst dst, immP src, iRegLdst toc) %{
 2762     if (!ra_->C->output()->in_scratch_emit_size()) {
 2763       intptr_t val = $src$$constant;
 2764       relocInfo::relocType constant_reloc = $src->constant_reloc();  // src
 2765       address const_toc_addr;
 2766       RelocationHolder r; // Initializes type to none.
 2767       if (constant_reloc == relocInfo::oop_type) {
 2768         // Create an oop constant and a corresponding relocation.
 2769         AddressLiteral a = __ constant_oop_address((jobject)val);
 2770         const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2771         r = a.rspec();
 2772       } else if (constant_reloc == relocInfo::metadata_type) {
 2773         // Notify OOP recorder (don't need the relocation)
 2774         AddressLiteral a = __ constant_metadata_address((Metadata *)val);
 2775         const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2776       } else {  // non-oop pointers, e.g. card mark base, heap top
 2777         // Create a non-oop constant, no relocation needed.
 2778         const_toc_addr = __ long_constant((jlong)$src$$constant);
 2779       }
 2780 
 2781       if (const_toc_addr == nullptr) {
 2782         ciEnv::current()->record_out_of_memory_failure();
 2783         return;
 2784       }
 2785       __ relocate(r); // If set above.
 2786       // Get the constant's TOC offset.
 2787       const int toc_offset = __ offset_to_method_toc(const_toc_addr);
 2788       // Store the toc offset of the constant.
 2789       ((loadConP_hiNode*)this)->_const_toc_offset = toc_offset;
 2790     }
 2791 
 2792     __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset));
 2793   %}
 2794 
 2795   // Postalloc expand emitter for loading a ptr constant from the method's TOC.
 2796   // Enc_class needed as consttanttablebase is not supported by postalloc
 2797   // expand.
 2798   enc_class postalloc_expand_load_ptr_constant(iRegPdst dst, immP src, iRegLdst toc) %{
 2799     const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2800     if (large_constant_pool) {
 2801       // Create new nodes.
 2802       loadConP_hiNode *m1 = new loadConP_hiNode();
 2803       loadConP_loNode *m2 = new loadConP_loNode();
 2804 
 2805       // If this is an oop, both m1 and m2 must be consider oops so postalloc scheduling does not
 2806       // put a safepoint between them
 2807       m1->_bottom_type = bottom_type();
 2808       m2->_bottom_type = bottom_type();
 2809 
 2810       // inputs for new nodes
 2811       m1->add_req(nullptr, n_toc);
 2812       m2->add_req(nullptr, m1);
 2813 
 2814       // operands for new nodes
 2815       m1->_opnds[0] = new iRegPdstOper(); // dst
 2816       m1->_opnds[1] = op_src;             // src
 2817       m1->_opnds[2] = new iRegLdstOper(); // toc
 2818 
 2819       m2->_opnds[0] = new iRegPdstOper(); // dst
 2820       m2->_opnds[1] = op_src;             // src
 2821       m2->_opnds[2] = new iRegLdstOper(); // base
 2822 
 2823       // Initialize ins_attrib TOC fields.
 2824       m1->_const_toc_offset = -1;
 2825       m2->_const_toc_offset_hi_node = m1;
 2826 
 2827       // Register allocation for new nodes.
 2828       ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2829       ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2830 
 2831       nodes->push(m1);
 2832       nodes->push(m2);
 2833       assert(m2->bottom_type()->isa_ptr(), "must be ptr");
 2834     } else {
 2835       loadConPNode *m2 = new loadConPNode();
 2836 
 2837       // inputs for new nodes
 2838       m2->add_req(nullptr, n_toc);
 2839 
 2840       // operands for new nodes
 2841       m2->_opnds[0] = new iRegPdstOper(); // dst
 2842       m2->_opnds[1] = op_src;             // src
 2843       m2->_opnds[2] = new iRegLdstOper(); // toc
 2844 
 2845       // Register allocation for new nodes.
 2846       ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2847 
 2848       nodes->push(m2);
 2849       assert(m2->bottom_type()->isa_ptr(), "must be ptr");
 2850     }
 2851   %}
 2852 
 2853   // Enc_class needed as consttanttablebase is not supported by postalloc
 2854   // expand.
 2855   enc_class postalloc_expand_load_float_constant(regF dst, immF src, iRegLdst toc) %{
 2856     bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2857 
 2858     MachNode *m2;
 2859     if (large_constant_pool) {
 2860       m2 = new loadConFCompNode();
 2861     } else {
 2862       m2 = new loadConFNode();
 2863     }
 2864     // inputs for new nodes
 2865     m2->add_req(nullptr, n_toc);
 2866 
 2867     // operands for new nodes
 2868     m2->_opnds[0] = op_dst;
 2869     m2->_opnds[1] = op_src;
 2870     m2->_opnds[2] = new iRegLdstOper(); // constanttablebase
 2871 
 2872     // register allocation for new nodes
 2873     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2874     nodes->push(m2);
 2875   %}
 2876 
 2877   // Enc_class needed as consttanttablebase is not supported by postalloc
 2878   // expand.
 2879   enc_class postalloc_expand_load_double_constant(regD dst, immD src, iRegLdst toc) %{
 2880     bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2881 
 2882     MachNode *m2;
 2883     if (large_constant_pool) {
 2884       m2 = new loadConDCompNode();
 2885     } else {
 2886       m2 = new loadConDNode();
 2887     }
 2888     // inputs for new nodes
 2889     m2->add_req(nullptr, n_toc);
 2890 
 2891     // operands for new nodes
 2892     m2->_opnds[0] = op_dst;
 2893     m2->_opnds[1] = op_src;
 2894     m2->_opnds[2] = new iRegLdstOper(); // constanttablebase
 2895 
 2896     // register allocation for new nodes
 2897     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2898     nodes->push(m2);
 2899   %}
 2900 
 2901   enc_class enc_stw(iRegIsrc src, memory mem) %{
 2902     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2903     __ stw($src$$Register, Idisp, $mem$$base$$Register);
 2904   %}
 2905 
 2906   enc_class enc_std(iRegIsrc src, memoryAlg4 mem) %{
 2907     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2908     // Operand 'ds' requires 4-alignment.
 2909     assert((Idisp & 0x3) == 0, "unaligned offset");
 2910     __ std($src$$Register, Idisp, $mem$$base$$Register);
 2911   %}
 2912 
 2913   enc_class enc_stfs(RegF src, memory mem) %{
 2914     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2915     __ stfs($src$$FloatRegister, Idisp, $mem$$base$$Register);
 2916   %}
 2917 
 2918   enc_class enc_stfd(RegF src, memory mem) %{
 2919     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2920     __ stfd($src$$FloatRegister, Idisp, $mem$$base$$Register);
 2921   %}
 2922 
 2923   enc_class postalloc_expand_encode_oop(iRegNdst dst, iRegPdst src, flagsReg crx) %{
 2924     cmpP_reg_imm16Node *n_compare  = new cmpP_reg_imm16Node();
 2925     encodeP_subNode    *n_sub_base = new encodeP_subNode();
 2926     encodeP_shiftNode  *n_shift    = new encodeP_shiftNode();
 2927     cond_set_0_oopNode *n_cond_set = new cond_set_0_oopNode();
 2928 
 2929     n_compare->add_req(n_region, n_src);
 2930     n_compare->_opnds[0] = op_crx;
 2931     n_compare->_opnds[1] = op_src;
 2932     n_compare->_opnds[2] = new immL16Oper(0);
 2933 
 2934     n_sub_base->add_req(n_region, n_src);
 2935     n_sub_base->_opnds[0] = op_dst;
 2936     n_sub_base->_opnds[1] = op_src;
 2937     n_sub_base->_bottom_type = _bottom_type;
 2938 
 2939     n_shift->add_req(n_region, n_sub_base);
 2940     n_shift->_opnds[0] = op_dst;
 2941     n_shift->_opnds[1] = op_dst;
 2942     n_shift->_bottom_type = _bottom_type;
 2943 
 2944     n_cond_set->add_req(n_region, n_compare, n_shift);
 2945     n_cond_set->_opnds[0] = op_dst;
 2946     n_cond_set->_opnds[1] = op_crx;
 2947     n_cond_set->_opnds[2] = op_dst;
 2948     n_cond_set->_bottom_type = _bottom_type;
 2949 
 2950     ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 2951     ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2952     ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2953     ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2954 
 2955     nodes->push(n_compare);
 2956     nodes->push(n_sub_base);
 2957     nodes->push(n_shift);
 2958     nodes->push(n_cond_set);
 2959 
 2960     assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed.
 2961   %}
 2962 
 2963   enc_class postalloc_expand_encode_oop_not_null(iRegNdst dst, iRegPdst src) %{
 2964 
 2965     encodeP_subNode *n1 = new encodeP_subNode();
 2966     n1->add_req(n_region, n_src);
 2967     n1->_opnds[0] = op_dst;
 2968     n1->_opnds[1] = op_src;
 2969     n1->_bottom_type = _bottom_type;
 2970 
 2971     encodeP_shiftNode *n2 = new encodeP_shiftNode();
 2972     n2->add_req(n_region, n1);
 2973     n2->_opnds[0] = op_dst;
 2974     n2->_opnds[1] = op_dst;
 2975     n2->_bottom_type = _bottom_type;
 2976     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2977     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2978 
 2979     nodes->push(n1);
 2980     nodes->push(n2);
 2981     assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed.
 2982   %}
 2983 
 2984   enc_class postalloc_expand_decode_oop(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
 2985     decodeN_shiftNode *n_shift    = new decodeN_shiftNode();
 2986     cmpN_reg_imm0Node *n_compare  = new cmpN_reg_imm0Node();
 2987 
 2988     n_compare->add_req(n_region, n_src);
 2989     n_compare->_opnds[0] = op_crx;
 2990     n_compare->_opnds[1] = op_src;
 2991     n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR);
 2992 
 2993     n_shift->add_req(n_region, n_src);
 2994     n_shift->_opnds[0] = op_dst;
 2995     n_shift->_opnds[1] = op_src;
 2996     n_shift->_bottom_type = _bottom_type;
 2997 
 2998     decodeN_addNode *n_add_base = new decodeN_addNode();
 2999     n_add_base->add_req(n_region, n_shift);
 3000     n_add_base->_opnds[0] = op_dst;
 3001     n_add_base->_opnds[1] = op_dst;
 3002     n_add_base->_bottom_type = _bottom_type;
 3003 
 3004     cond_set_0_ptrNode *n_cond_set = new cond_set_0_ptrNode();
 3005     n_cond_set->add_req(n_region, n_compare, n_add_base);
 3006     n_cond_set->_opnds[0] = op_dst;
 3007     n_cond_set->_opnds[1] = op_crx;
 3008     n_cond_set->_opnds[2] = op_dst;
 3009     n_cond_set->_bottom_type = _bottom_type;
 3010 
 3011     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 3012     ra_->set_oop(n_cond_set, true);
 3013 
 3014     ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3015     ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 3016     ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3017     ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3018 
 3019     nodes->push(n_compare);
 3020     nodes->push(n_shift);
 3021     nodes->push(n_add_base);
 3022     nodes->push(n_cond_set);
 3023 
 3024   %}
 3025 
 3026   enc_class postalloc_expand_decode_oop_not_null(iRegPdst dst, iRegNsrc src) %{
 3027     decodeN_shiftNode *n1 = new decodeN_shiftNode();
 3028     n1->add_req(n_region, n_src);
 3029     n1->_opnds[0] = op_dst;
 3030     n1->_opnds[1] = op_src;
 3031     n1->_bottom_type = _bottom_type;
 3032 
 3033     decodeN_addNode *n2 = new decodeN_addNode();
 3034     n2->add_req(n_region, n1);
 3035     n2->_opnds[0] = op_dst;
 3036     n2->_opnds[1] = op_dst;
 3037     n2->_bottom_type = _bottom_type;
 3038     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3039     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3040 
 3041     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 3042     ra_->set_oop(n2, true);
 3043 
 3044     nodes->push(n1);
 3045     nodes->push(n2);
 3046   %}
 3047 
 3048 
 3049   // This enc_class is needed so that scheduler gets proper
 3050   // input mapping for latency computation.
 3051   enc_class enc_andc(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 3052     __ andc($dst$$Register, $src1$$Register, $src2$$Register);
 3053   %}
 3054 
 3055   enc_class enc_convI2B_regI__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
 3056     Label done;
 3057     __ cmpwi($crx$$CondRegister, $src$$Register, 0);
 3058     __ li($dst$$Register, $zero$$constant);
 3059     __ beq($crx$$CondRegister, done);
 3060     __ li($dst$$Register, $notzero$$constant);
 3061     __ bind(done);
 3062   %}
 3063 
 3064   enc_class enc_convP2B_regP__cmove(iRegIdst dst, iRegPsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
 3065     Label done;
 3066     __ cmpdi($crx$$CondRegister, $src$$Register, 0);
 3067     __ li($dst$$Register, $zero$$constant);
 3068     __ beq($crx$$CondRegister, done);
 3069     __ li($dst$$Register, $notzero$$constant);
 3070     __ bind(done);
 3071   %}
 3072 
 3073   enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL mem ) %{
 3074     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 3075     Label done;
 3076     __ bso($crx$$CondRegister, done);
 3077     __ ld($dst$$Register, Idisp, $mem$$base$$Register);
 3078     __ bind(done);
 3079   %}
 3080 
 3081   enc_class enc_cmove_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
 3082     Label done;
 3083     __ bso($crx$$CondRegister, done);
 3084     __ mffprd($dst$$Register, $src$$FloatRegister);
 3085     __ bind(done);
 3086   %}
 3087 
 3088   enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
 3089     Label d;   // dummy
 3090     __ bind(d);
 3091     Label* p = ($lbl$$label);
 3092     // `p' is `nullptr' when this encoding class is used only to
 3093     // determine the size of the encoded instruction.
 3094     Label& l = (nullptr == p)? d : *(p);
 3095     int cc = $cmp$$cmpcode;
 3096     int flags_reg = $crx$$reg;
 3097     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 3098     int bhint = Assembler::bhintNoHint;
 3099 
 3100     if (UseStaticBranchPredictionForUncommonPathsPPC64) {
 3101       if (_prob <= PROB_NEVER) {
 3102         bhint = Assembler::bhintIsNotTaken;
 3103       } else if (_prob >= PROB_ALWAYS) {
 3104         bhint = Assembler::bhintIsTaken;
 3105       }
 3106     }
 3107 
 3108     __ bc(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
 3109           cc_to_biint(cc, flags_reg),
 3110           l);
 3111   %}
 3112 
 3113   enc_class enc_bc_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
 3114     // The scheduler doesn't know about branch shortening, so we set the opcode
 3115     // to ppc64Opcode_bc in order to hide this detail from the scheduler.
 3116     Label d;    // dummy
 3117     __ bind(d);
 3118     Label* p = ($lbl$$label);
 3119     // `p' is `nullptr' when this encoding class is used only to
 3120     // determine the size of the encoded instruction.
 3121     Label& l = (nullptr == p)? d : *(p);
 3122     int cc = $cmp$$cmpcode;
 3123     int flags_reg = $crx$$reg;
 3124     int bhint = Assembler::bhintNoHint;
 3125 
 3126     if (UseStaticBranchPredictionForUncommonPathsPPC64) {
 3127       if (_prob <= PROB_NEVER) {
 3128         bhint = Assembler::bhintIsNotTaken;
 3129       } else if (_prob >= PROB_ALWAYS) {
 3130         bhint = Assembler::bhintIsTaken;
 3131       }
 3132     }
 3133 
 3134     // Tell the conditional far branch to optimize itself when being relocated.
 3135     __ bc_far(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
 3136                   cc_to_biint(cc, flags_reg),
 3137                   l,
 3138                   MacroAssembler::bc_far_optimize_on_relocate);
 3139   %}
 3140 
 3141   // Postalloc expand emitter for loading a replicatef float constant from
 3142   // the method's TOC.
 3143   // Enc_class needed as consttanttablebase is not supported by postalloc
 3144   // expand.
 3145   enc_class postalloc_expand_load_replF_constant(iRegLdst dst, immF src, iRegLdst toc) %{
 3146     // Create new nodes.
 3147 
 3148     // Make an operand with the bit pattern to load as float.
 3149     immLOper *op_repl = new immLOper((jlong)replicate_immF(op_src->constantF()));
 3150 
 3151     loadConLNodesTuple loadConLNodes =
 3152       loadConLNodesTuple_create(ra_, n_toc, op_repl,
 3153                                 ra_->get_reg_second(this), ra_->get_reg_first(this));
 3154 
 3155     // Push new nodes.
 3156     if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi);
 3157     if (loadConLNodes._last)     nodes->push(loadConLNodes._last);
 3158 
 3159     assert(nodes->length() >= 1, "must have created at least 1 node");
 3160     assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
 3161   %}
 3162 
 3163   enc_class postalloc_expand_load_replF_constant_vsx(vecX dst, immF src, iRegLdst toc, iRegLdst tmp) %{
 3164     // Create new nodes.
 3165 
 3166     // Make an operand with the bit pattern to load as float.
 3167     immLOper *op_repl = new  immLOper((jlong)replicate_immF(op_src->constantF()));
 3168     immI_0Oper *op_zero = new  immI_0Oper(0);
 3169 
 3170     loadConLReplicatedNodesTuple loadConLNodes =
 3171       loadConLReplicatedNodesTuple_create(C, ra_, n_toc, op_repl, op_dst, op_zero,
 3172                                 ra_->get_reg_second(n_tmp), ra_->get_reg_first(n_tmp),
 3173                                 ra_->get_reg_second(this), ra_->get_reg_first(this));
 3174 
 3175     // Push new nodes.
 3176     if (loadConLNodes._large_hi) { nodes->push(loadConLNodes._large_hi); }
 3177     if (loadConLNodes._large_lo) { nodes->push(loadConLNodes._large_lo); }
 3178     if (loadConLNodes._moved)    { nodes->push(loadConLNodes._moved); }
 3179     if (loadConLNodes._last)     { nodes->push(loadConLNodes._last); }
 3180 
 3181     assert(nodes->length() >= 1, "must have created at least 1 node");
 3182   %}
 3183 
 3184   // This enc_class is needed so that scheduler gets proper
 3185   // input mapping for latency computation.
 3186   enc_class enc_poll(immI dst, iRegLdst poll) %{
 3187     // Fake operand dst needed for PPC scheduler.
 3188     assert($dst$$constant == 0x0, "dst must be 0x0");
 3189 
 3190     // Mark the code position where the load from the safepoint
 3191     // polling page was emitted as relocInfo::poll_type.
 3192     __ relocate(relocInfo::poll_type);
 3193     __ load_from_polling_page($poll$$Register);
 3194   %}
 3195 
 3196   // A Java static call or a runtime call.
 3197   //
 3198   // Branch-and-link relative to a trampoline.
 3199   // The trampoline loads the target address and does a long branch to there.
 3200   // In case we call java, the trampoline branches to a interpreter_stub
 3201   // which loads the inline cache and the real call target from the constant pool.
 3202   //
 3203   // This basically looks like this:
 3204   //
 3205   // >>>> consts      -+  -+
 3206   //                   |   |- offset1
 3207   // [call target1]    | <-+
 3208   // [IC cache]        |- offset2
 3209   // [call target2] <--+
 3210   //
 3211   // <<<< consts
 3212   // >>>> insts
 3213   //
 3214   // bl offset16               -+  -+             ??? // How many bits available?
 3215   //                            |   |
 3216   // <<<< insts                 |   |
 3217   // >>>> stubs                 |   |
 3218   //                            |   |- trampoline_stub_Reloc
 3219   // trampoline stub:           | <-+
 3220   //   r2 = toc                 |
 3221   //   r2 = [r2 + offset1]      |       // Load call target1 from const section
 3222   //   mtctr r2                 |
 3223   //   bctr                     |- static_stub_Reloc
 3224   // comp_to_interp_stub:   <---+
 3225   //   r1 = toc
 3226   //   ICreg = [r1 + IC_offset]         // Load IC from const section
 3227   //   r1    = [r1 + offset2]           // Load call target2 from const section
 3228   //   mtctr r1
 3229   //   bctr
 3230   //
 3231   // <<<< stubs
 3232   //
 3233   // The call instruction in the code either
 3234   // - Branches directly to a compiled method if the offset is encodable in instruction.
 3235   // - Branches to the trampoline stub if the offset to the compiled method is not encodable.
 3236   // - Branches to the compiled_to_interp stub if the target is interpreted.
 3237   //
 3238   // Further there are three relocations from the loads to the constants in
 3239   // the constant section.
 3240   //
 3241   // Usage of r1 and r2 in the stubs allows to distinguish them.
 3242   enc_class enc_java_static_call(method meth) %{
 3243     address entry_point = (address)$meth$$method;
 3244     address call_pc;
 3245 
 3246     if (!_method) {
 3247       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
 3248       call_pc = __ trampoline_call(AddressLiteral(entry_point, relocInfo::runtime_call_type));
 3249       if (call_pc == nullptr) {
 3250         ciEnv::current()->record_failure("CodeCache is full");
 3251         return;
 3252       }
 3253     } else {
 3254       int method_index = resolved_method_index(masm);
 3255       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 3256                                                   : static_call_Relocation::spec(method_index);
 3257       call_pc = __ trampoline_call(AddressLiteral(entry_point, rspec));
 3258       if (call_pc == nullptr) {
 3259         ciEnv::current()->record_failure("CodeCache is full");
 3260         return;
 3261       }
 3262 
 3263       // Emit stub for static call
 3264       address stub = CompiledDirectCall::emit_to_interp_stub(masm, call_pc);
 3265       if (stub == nullptr) {
 3266         ciEnv::current()->record_failure("CodeCache is full");
 3267         return;
 3268       }
 3269     }
 3270     __ post_call_nop();
 3271   %}
 3272 
 3273   // Compound version of call dynamic
 3274   // Toc is only passed so that it can be used in ins_encode statement.
 3275   // In the code we have to use $constanttablebase.
 3276   enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{
 3277     int start_offset = __ offset();
 3278     int method_index = resolved_method_index(masm);
 3279     bool scratch_emit = ra_ == nullptr;
 3280     Register Rtoc = scratch_emit ? R2_TOC : $constanttablebase;
 3281     bool success = __ ic_call(Rtoc, (address)$meth$$method, method_index, scratch_emit, true /*fixed_size*/);
 3282     if (!success) {
 3283       ciEnv::current()->record_failure("CodeCache is full");
 3284       return;
 3285     }
 3286     assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
 3287            "Fix constant in ret_addr_offset(), expected %d", __ offset() - start_offset);
 3288     __ post_call_nop();
 3289   %}
 3290 
 3291   // a runtime call
 3292   enc_class enc_java_to_runtime_call (method meth) %{
 3293     const address start_pc = __ pc();
 3294 
 3295 #if defined(ABI_ELFv2)
 3296     address entry= !($meth$$method) ? nullptr : (address)$meth$$method;
 3297     __ call_c(entry, relocInfo::runtime_call_type);
 3298     __ post_call_nop();
 3299 #else
 3300     // The function we're going to call.
 3301     FunctionDescriptor fdtemp;
 3302     const FunctionDescriptor* fd = !($meth$$method) ? &fdtemp : (FunctionDescriptor*)$meth$$method;
 3303 
 3304     Register Rtoc = R12_scratch2;
 3305     // Calculate the method's TOC.
 3306     __ calculate_address_from_global_toc(Rtoc, __ method_toc());
 3307     // Put entry, env, toc into the constant pool, this needs up to 3 constant
 3308     // pool entries; call_c_using_toc will optimize the call.
 3309     bool success = __ call_c_using_toc(fd, relocInfo::runtime_call_type, Rtoc);
 3310     if (!success) {
 3311       ciEnv::current()->record_out_of_memory_failure();
 3312       return;
 3313     }
 3314     __ post_call_nop();
 3315 #endif
 3316 
 3317     // Check the ret_addr_offset.
 3318     assert(((MachCallRuntimeNode*)this)->ret_addr_offset() ==  __ last_calls_return_pc() - start_pc,
 3319            "Fix constant in ret_addr_offset()");
 3320   %}
 3321 
 3322   // Move to ctr for leaf call.
 3323   // This enc_class is needed so that scheduler gets proper
 3324   // input mapping for latency computation.
 3325   enc_class enc_leaf_call_mtctr(iRegLsrc src) %{
 3326     __ mtctr($src$$Register);
 3327   %}
 3328 
 3329   // Postalloc expand emitter for runtime leaf calls.
 3330   enc_class postalloc_expand_java_to_runtime_call(method meth, iRegLdst toc) %{
 3331     loadConLNodesTuple loadConLNodes_Entry;
 3332 #if defined(ABI_ELFv2)
 3333     jlong entry_address = (jlong) this->entry_point();
 3334     assert(entry_address, "need address here");
 3335     loadConLNodes_Entry = loadConLNodesTuple_create(ra_, n_toc, new immLOper(entry_address),
 3336                                                     OptoReg::Name(R12_H_num), OptoReg::Name(R12_num));
 3337 #else
 3338     // Get the struct that describes the function we are about to call.
 3339     FunctionDescriptor* fd = (FunctionDescriptor*) this->entry_point();
 3340     assert(fd, "need fd here");
 3341     jlong entry_address = (jlong) fd->entry();
 3342     // new nodes
 3343     loadConLNodesTuple loadConLNodes_Env;
 3344     loadConLNodesTuple loadConLNodes_Toc;
 3345 
 3346     // Create nodes and operands for loading the entry point.
 3347     loadConLNodes_Entry = loadConLNodesTuple_create(ra_, n_toc, new immLOper(entry_address),
 3348                                                     OptoReg::Name(R12_H_num), OptoReg::Name(R12_num));
 3349 
 3350 
 3351     // Create nodes and operands for loading the env pointer.
 3352     if (fd->env() != nullptr) {
 3353       loadConLNodes_Env = loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong) fd->env()),
 3354                                                     OptoReg::Name(R11_H_num), OptoReg::Name(R11_num));
 3355     } else {
 3356       loadConLNodes_Env._large_hi = nullptr;
 3357       loadConLNodes_Env._large_lo = nullptr;
 3358       loadConLNodes_Env._small    = nullptr;
 3359       loadConLNodes_Env._last = new loadConL16Node();
 3360       loadConLNodes_Env._last->_opnds[0] = new iRegLdstOper();
 3361       loadConLNodes_Env._last->_opnds[1] = new immL16Oper(0);
 3362       ra_->set_pair(loadConLNodes_Env._last->_idx, OptoReg::Name(R11_H_num), OptoReg::Name(R11_num));
 3363     }
 3364 
 3365     // Create nodes and operands for loading the Toc point.
 3366     loadConLNodes_Toc = loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong) fd->toc()),
 3367                                                   OptoReg::Name(R2_H_num), OptoReg::Name(R2_num));
 3368 #endif // ABI_ELFv2
 3369     // mtctr node
 3370     MachNode *mtctr = new CallLeafDirect_mtctrNode();
 3371 
 3372     assert(loadConLNodes_Entry._last != nullptr, "entry must exist");
 3373     mtctr->add_req(nullptr, loadConLNodes_Entry._last);
 3374 
 3375     mtctr->_opnds[0] = new iRegLdstOper();
 3376     mtctr->_opnds[1] = new iRegLdstOper();
 3377 
 3378     // call node
 3379     MachCallLeafNode *call = new CallLeafDirectNode();
 3380 
 3381     call->_opnds[0] = _opnds[0];
 3382     call->_opnds[1] = new methodOper((intptr_t) entry_address); // May get set later.
 3383 
 3384     // Make the new call node look like the old one.
 3385     call->_name        = _name;
 3386     call->_tf          = _tf;
 3387     call->_entry_point = _entry_point;
 3388     call->_cnt         = _cnt;
 3389     call->_guaranteed_safepoint = false;
 3390     call->_oop_map     = _oop_map;
 3391     guarantee(!_jvms, "You must clone the jvms and adapt the offsets by fix_jvms().");
 3392     call->_jvms        = nullptr;
 3393     call->_jvmadj      = _jvmadj;
 3394     call->_in_rms      = _in_rms;
 3395     call->_nesting     = _nesting;
 3396 
 3397     // New call needs all inputs of old call.
 3398     // Req...
 3399     for (uint i = 0; i < req(); ++i) {
 3400       if (i != mach_constant_base_node_input()) {
 3401         call->add_req(in(i));
 3402       }
 3403     }
 3404 
 3405     // These must be reqired edges, as the registers are live up to
 3406     // the call. Else the constants are handled as kills.
 3407     call->add_req(mtctr);
 3408 #if !defined(ABI_ELFv2)
 3409     call->add_req(loadConLNodes_Env._last);
 3410     call->add_req(loadConLNodes_Toc._last);
 3411 #endif
 3412 
 3413     // ...as well as prec
 3414     for (uint i = req(); i < len(); ++i) {
 3415       call->add_prec(in(i));
 3416     }
 3417 
 3418     // registers
 3419     ra_->set1(mtctr->_idx, OptoReg::Name(SR_CTR_num));
 3420 
 3421     // Insert the new nodes.
 3422     if (loadConLNodes_Entry._large_hi) nodes->push(loadConLNodes_Entry._large_hi);
 3423     if (loadConLNodes_Entry._last)     nodes->push(loadConLNodes_Entry._last);
 3424 #if !defined(ABI_ELFv2)
 3425     if (loadConLNodes_Env._large_hi)   nodes->push(loadConLNodes_Env._large_hi);
 3426     if (loadConLNodes_Env._last)       nodes->push(loadConLNodes_Env._last);
 3427     if (loadConLNodes_Toc._large_hi)   nodes->push(loadConLNodes_Toc._large_hi);
 3428     if (loadConLNodes_Toc._last)       nodes->push(loadConLNodes_Toc._last);
 3429 #endif
 3430     nodes->push(mtctr);
 3431     nodes->push(call);
 3432   %}
 3433 %}
 3434 
 3435 //----------FRAME--------------------------------------------------------------
 3436 // Definition of frame structure and management information.
 3437 
 3438 frame %{
 3439   // These two registers define part of the calling convention between
 3440   // compiled code and the interpreter.
 3441 
 3442   // Inline Cache Register or method for I2C.
 3443   inline_cache_reg(R19); // R19_method
 3444 
 3445   // Optional: name the operand used by cisc-spilling to access
 3446   // [stack_pointer + offset].
 3447   cisc_spilling_operand_name(indOffset);
 3448 
 3449   // Number of stack slots consumed by a Monitor enter.
 3450   sync_stack_slots((frame::jit_monitor_size / VMRegImpl::stack_slot_size));
 3451 
 3452   // Compiled code's Frame Pointer.
 3453   frame_pointer(R1); // R1_SP
 3454 
 3455   stack_alignment(frame::alignment_in_bytes);
 3456 
 3457   // Number of outgoing stack slots killed above the
 3458   // out_preserve_stack_slots for calls to C. Supports the var-args
 3459   // backing area for register parms.
 3460   //
 3461   varargs_C_out_slots_killed(((frame::native_abi_reg_args_size - frame::jit_out_preserve_size) / VMRegImpl::stack_slot_size));
 3462 
 3463   // The after-PROLOG location of the return address. Location of
 3464   // return address specifies a type (REG or STACK) and a number
 3465   // representing the register number (i.e. - use a register name) or
 3466   // stack slot.
 3467   //
 3468   // A: Link register is stored in stack slot ...
 3469   // M:  ... but it's in the caller's frame according to PPC-64 ABI.
 3470   // J: Therefore, we make sure that the link register is also in R11_scratch1
 3471   //    at the end of the prolog.
 3472   // B: We use R20, now.
 3473   //return_addr(REG R20);
 3474 
 3475   // G: After reading the comments made by all the luminaries on their
 3476   //    failure to tell the compiler where the return address really is,
 3477   //    I hardly dare to try myself.  However, I'm convinced it's in slot
 3478   //    4 what apparently works and saves us some spills.
 3479   return_addr(STACK 4);
 3480 
 3481   // Location of compiled Java return values.  Same as C
 3482   return_value %{
 3483     assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) ||
 3484             (ideal_reg == Op_RegN && CompressedOops::base() == nullptr && CompressedOops::shift() == 0),
 3485             "only return normal values");
 3486     // enum names from opcodes.hpp
 3487     static int typeToRegLo[Op_RegL+1] = {
 3488       0,              // Op_Node
 3489       0,              // Op_Set
 3490       R3_num,         // Op_RegN
 3491       R3_num,         // Op_RegI
 3492       R3_num,         // Op_RegP
 3493       F1_num,         // Op_RegF
 3494       F1_num,         // Op_RegD
 3495       R3_num,         // Op_RegL
 3496     };
 3497 
 3498     static int typeToRegHi[Op_RegL+1] = {
 3499       0,              // Op_Node
 3500       0,              // Op_Set
 3501       OptoReg::Bad,   // Op_RegN
 3502       OptoReg::Bad,   // Op_RegI
 3503       R3_H_num,       // Op_RegP
 3504       OptoReg::Bad,   // Op_RegF
 3505       F1_H_num,       // Op_RegD
 3506       R3_H_num        // Op_RegL
 3507     };
 3508 
 3509     return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
 3510   %}
 3511 %}
 3512 
 3513 
 3514 //----------ATTRIBUTES---------------------------------------------------------
 3515 
 3516 //----------Operand Attributes-------------------------------------------------
 3517 op_attrib op_cost(1);          // Required cost attribute.
 3518 
 3519 //----------Instruction Attributes---------------------------------------------
 3520 
 3521 // Cost attribute. required.
 3522 ins_attrib ins_cost(DEFAULT_COST);
 3523 
 3524 // Is this instruction a non-matching short branch variant of some
 3525 // long branch? Not required.
 3526 ins_attrib ins_short_branch(0);
 3527 
 3528 ins_attrib ins_is_TrapBasedCheckNode(true);
 3529 
 3530 // Number of constants.
 3531 // This instruction uses the given number of constants
 3532 // (optional attribute).
 3533 // This is needed to determine in time whether the constant pool will
 3534 // exceed 4000 entries. Before postalloc_expand the overall number of constants
 3535 // is determined. It's also used to compute the constant pool size
 3536 // in Output().
 3537 ins_attrib ins_num_consts(0);
 3538 
 3539 // Required alignment attribute (must be a power of 2) specifies the
 3540 // alignment that some part of the instruction (not necessarily the
 3541 // start) requires. If > 1, a compute_padding() function must be
 3542 // provided for the instruction.
 3543 ins_attrib ins_alignment(1);
 3544 
 3545 // Enforce/prohibit rematerializations.
 3546 // - If an instruction is attributed with 'ins_cannot_rematerialize(true)'
 3547 //   then rematerialization of that instruction is prohibited and the
 3548 //   instruction's value will be spilled if necessary.
 3549 //   Causes that MachNode::rematerialize() returns false.
 3550 // - If an instruction is attributed with 'ins_should_rematerialize(true)'
 3551 //   then rematerialization should be enforced and a copy of the instruction
 3552 //   should be inserted if possible; rematerialization is not guaranteed.
 3553 //   Note: this may result in rematerializations in front of every use.
 3554 //   Causes that MachNode::rematerialize() can return true.
 3555 // (optional attribute)
 3556 ins_attrib ins_cannot_rematerialize(false);
 3557 ins_attrib ins_should_rematerialize(false);
 3558 
 3559 // Instruction has variable size depending on alignment.
 3560 ins_attrib ins_variable_size_depending_on_alignment(false);
 3561 
 3562 // Instruction is a nop.
 3563 ins_attrib ins_is_nop(false);
 3564 
 3565 // Instruction is mapped to a MachIfFastLock node (instead of MachFastLock).
 3566 ins_attrib ins_use_mach_if_fast_lock_node(false);
 3567 
 3568 // Field for the toc offset of a constant.
 3569 //
 3570 // This is needed if the toc offset is not encodable as an immediate in
 3571 // the PPC load instruction. If so, the upper (hi) bits of the offset are
 3572 // added to the toc, and from this a load with immediate is performed.
 3573 // With postalloc expand, we get two nodes that require the same offset
 3574 // but which don't know about each other. The offset is only known
 3575 // when the constant is added to the constant pool during emitting.
 3576 // It is generated in the 'hi'-node adding the upper bits, and saved
 3577 // in this node.  The 'lo'-node has a link to the 'hi'-node and reads
 3578 // the offset from there when it gets encoded.
 3579 ins_attrib ins_field_const_toc_offset(0);
 3580 ins_attrib ins_field_const_toc_offset_hi_node(0);
 3581 
 3582 // A field that can hold the instructions offset in the code buffer.
 3583 // Set in the nodes emitter.
 3584 ins_attrib ins_field_cbuf_insts_offset(-1);
 3585 
 3586 // Fields for referencing a call's load-IC-node.
 3587 // If the toc offset can not be encoded as an immediate in a load, we
 3588 // use two nodes.
 3589 ins_attrib ins_field_load_ic_hi_node(0);
 3590 ins_attrib ins_field_load_ic_node(0);
 3591 
 3592 // Whether this node is expanded during code emission into a sequence of
 3593 // instructions and the first instruction can perform an implicit null check.
 3594 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 3595 
 3596 //----------OPERANDS-----------------------------------------------------------
 3597 // Operand definitions must precede instruction definitions for correct
 3598 // parsing in the ADLC because operands constitute user defined types
 3599 // which are used in instruction definitions.
 3600 //
 3601 // Formats are generated automatically for constants and base registers.
 3602 
 3603 operand vecX() %{
 3604   constraint(ALLOC_IN_RC(v_reg));
 3605   match(VecX);
 3606 
 3607   format %{ %}
 3608   interface(REG_INTER);
 3609 %}
 3610 
 3611 //----------Simple Operands----------------------------------------------------
 3612 // Immediate Operands
 3613 
 3614 // Integer Immediate: 32-bit
 3615 operand immI() %{
 3616   match(ConI);
 3617   op_cost(40);
 3618   format %{ %}
 3619   interface(CONST_INTER);
 3620 %}
 3621 
 3622 operand immI8() %{
 3623   predicate(Assembler::is_simm(n->get_int(), 8));
 3624   op_cost(0);
 3625   match(ConI);
 3626   format %{ %}
 3627   interface(CONST_INTER);
 3628 %}
 3629 
 3630 // Integer Immediate: 16-bit
 3631 operand immI16() %{
 3632   predicate(Assembler::is_simm(n->get_int(), 16));
 3633   op_cost(0);
 3634   match(ConI);
 3635   format %{ %}
 3636   interface(CONST_INTER);
 3637 %}
 3638 
 3639 // Integer Immediate: 32-bit, where lowest 16 bits are 0x0000.
 3640 operand immIhi16() %{
 3641   predicate(((n->get_int() & 0xffff0000) != 0) && ((n->get_int() & 0xffff) == 0));
 3642   match(ConI);
 3643   op_cost(0);
 3644   format %{ %}
 3645   interface(CONST_INTER);
 3646 %}
 3647 
 3648 // Integer Immediate: 32-bit immediate for prefixed addi and load/store.
 3649 operand immI32() %{
 3650   predicate(PowerArchitecturePPC64 >= 10);
 3651   op_cost(0);
 3652   match(ConI);
 3653   format %{ %}
 3654   interface(CONST_INTER);
 3655 %}
 3656 
 3657 operand immInegpow2() %{
 3658   predicate(is_power_of_2(-(juint)(n->get_int())));
 3659   match(ConI);
 3660   op_cost(0);
 3661   format %{ %}
 3662   interface(CONST_INTER);
 3663 %}
 3664 
 3665 operand immIpow2minus1() %{
 3666   predicate(is_power_of_2((juint)(n->get_int()) + 1u));
 3667   match(ConI);
 3668   op_cost(0);
 3669   format %{ %}
 3670   interface(CONST_INTER);
 3671 %}
 3672 
 3673 operand immIpowerOf2() %{
 3674   predicate(is_power_of_2((juint)(n->get_int())));
 3675   match(ConI);
 3676   op_cost(0);
 3677   format %{ %}
 3678   interface(CONST_INTER);
 3679 %}
 3680 
 3681 // Unsigned Integer Immediate: the values 0-31
 3682 operand uimmI5() %{
 3683   predicate(Assembler::is_uimm(n->get_int(), 5));
 3684   match(ConI);
 3685   op_cost(0);
 3686   format %{ %}
 3687   interface(CONST_INTER);
 3688 %}
 3689 
 3690 // Unsigned Integer Immediate: 6-bit
 3691 operand uimmI6() %{
 3692   predicate(Assembler::is_uimm(n->get_int(), 6));
 3693   match(ConI);
 3694   op_cost(0);
 3695   format %{ %}
 3696   interface(CONST_INTER);
 3697 %}
 3698 
 3699 // Unsigned Integer Immediate:  6-bit int, greater than 32
 3700 operand uimmI6_ge32() %{
 3701   predicate(Assembler::is_uimm(n->get_int(), 6) && n->get_int() >= 32);
 3702   match(ConI);
 3703   op_cost(0);
 3704   format %{ %}
 3705   interface(CONST_INTER);
 3706 %}
 3707 
 3708 // Unsigned Integer Immediate: 15-bit
 3709 operand uimmI15() %{
 3710   predicate(Assembler::is_uimm(n->get_int(), 15));
 3711   match(ConI);
 3712   op_cost(0);
 3713   format %{ %}
 3714   interface(CONST_INTER);
 3715 %}
 3716 
 3717 // Unsigned Integer Immediate: 16-bit
 3718 operand uimmI16() %{
 3719   predicate(Assembler::is_uimm(n->get_int(), 16));
 3720   match(ConI);
 3721   op_cost(0);
 3722   format %{ %}
 3723   interface(CONST_INTER);
 3724 %}
 3725 
 3726 // constant 'int 0'.
 3727 operand immI_0() %{
 3728   predicate(n->get_int() == 0);
 3729   match(ConI);
 3730   op_cost(0);
 3731   format %{ %}
 3732   interface(CONST_INTER);
 3733 %}
 3734 
 3735 // constant 'int 1'.
 3736 operand immI_1() %{
 3737   predicate(n->get_int() == 1);
 3738   match(ConI);
 3739   op_cost(0);
 3740   format %{ %}
 3741   interface(CONST_INTER);
 3742 %}
 3743 
 3744 // constant 'int -1'.
 3745 operand immI_minus1() %{
 3746   predicate(n->get_int() == -1);
 3747   match(ConI);
 3748   op_cost(0);
 3749   format %{ %}
 3750   interface(CONST_INTER);
 3751 %}
 3752 
 3753 // int value 16.
 3754 operand immI_16() %{
 3755   predicate(n->get_int() == 16);
 3756   match(ConI);
 3757   op_cost(0);
 3758   format %{ %}
 3759   interface(CONST_INTER);
 3760 %}
 3761 
 3762 // int value 24.
 3763 operand immI_24() %{
 3764   predicate(n->get_int() == 24);
 3765   match(ConI);
 3766   op_cost(0);
 3767   format %{ %}
 3768   interface(CONST_INTER);
 3769 %}
 3770 
 3771 // Compressed oops constants
 3772 // Pointer Immediate
 3773 operand immN() %{
 3774   match(ConN);
 3775 
 3776   op_cost(10);
 3777   format %{ %}
 3778   interface(CONST_INTER);
 3779 %}
 3780 
 3781 // nullptr Pointer Immediate
 3782 operand immN_0() %{
 3783   predicate(n->get_narrowcon() == 0);
 3784   match(ConN);
 3785 
 3786   op_cost(0);
 3787   format %{ %}
 3788   interface(CONST_INTER);
 3789 %}
 3790 
 3791 // Compressed klass constants
 3792 operand immNKlass() %{
 3793   match(ConNKlass);
 3794 
 3795   op_cost(0);
 3796   format %{ %}
 3797   interface(CONST_INTER);
 3798 %}
 3799 
 3800 // This operand can be used to avoid matching of an instruct
 3801 // with chain rule.
 3802 operand immNKlass_NM() %{
 3803   match(ConNKlass);
 3804   predicate(false);
 3805   op_cost(0);
 3806   format %{ %}
 3807   interface(CONST_INTER);
 3808 %}
 3809 
 3810 // Pointer Immediate: 64-bit
 3811 operand immP() %{
 3812   match(ConP);
 3813   op_cost(0);
 3814   format %{ %}
 3815   interface(CONST_INTER);
 3816 %}
 3817 
 3818 // Operand to avoid match of loadConP.
 3819 // This operand can be used to avoid matching of an instruct
 3820 // with chain rule.
 3821 operand immP_NM() %{
 3822   match(ConP);
 3823   predicate(false);
 3824   op_cost(0);
 3825   format %{ %}
 3826   interface(CONST_INTER);
 3827 %}
 3828 
 3829 // constant 'pointer 0'.
 3830 operand immP_0() %{
 3831   predicate(n->get_ptr() == 0);
 3832   match(ConP);
 3833   op_cost(0);
 3834   format %{ %}
 3835   interface(CONST_INTER);
 3836 %}
 3837 
 3838 // pointer 0x0 or 0x1
 3839 operand immP_0or1() %{
 3840   predicate((n->get_ptr() == 0) || (n->get_ptr() == 1));
 3841   match(ConP);
 3842   op_cost(0);
 3843   format %{ %}
 3844   interface(CONST_INTER);
 3845 %}
 3846 
 3847 operand immL() %{
 3848   match(ConL);
 3849   op_cost(40);
 3850   format %{ %}
 3851   interface(CONST_INTER);
 3852 %}
 3853 
 3854 operand immLmax30() %{
 3855   predicate((n->get_long() <= 30));
 3856   match(ConL);
 3857   op_cost(0);
 3858   format %{ %}
 3859   interface(CONST_INTER);
 3860 %}
 3861 
 3862 // Long Immediate: 16-bit
 3863 operand immL16() %{
 3864   predicate(Assembler::is_simm(n->get_long(), 16));
 3865   match(ConL);
 3866   op_cost(0);
 3867   format %{ %}
 3868   interface(CONST_INTER);
 3869 %}
 3870 
 3871 // Long Immediate: 16-bit, 4-aligned
 3872 operand immL16Alg4() %{
 3873   predicate(Assembler::is_simm(n->get_long(), 16) && ((n->get_long() & 0x3) == 0));
 3874   match(ConL);
 3875   op_cost(0);
 3876   format %{ %}
 3877   interface(CONST_INTER);
 3878 %}
 3879 
 3880 // Long Immediate: 16-bit, 16-aligned
 3881 operand immL16Alg16() %{
 3882   predicate(Assembler::is_simm(n->get_long(), 16) && ((n->get_long() & 0xf) == 0));
 3883   match(ConL);
 3884   op_cost(0);
 3885   format %{ %}
 3886   interface(CONST_INTER);
 3887 %}
 3888 
 3889 // Long Immediate: 32-bit, where lowest 16 bits are 0x0000.
 3890 operand immL32hi16() %{
 3891   predicate(Assembler::is_simm(n->get_long(), 32) && ((n->get_long() & 0xffffL) == 0L));
 3892   match(ConL);
 3893   op_cost(0);
 3894   format %{ %}
 3895   interface(CONST_INTER);
 3896 %}
 3897 
 3898 // Long Immediate: 32-bit
 3899 operand immL32() %{
 3900   predicate(Assembler::is_simm(n->get_long(), 32));
 3901   match(ConL);
 3902   op_cost(0);
 3903   format %{ %}
 3904   interface(CONST_INTER);
 3905 %}
 3906 
 3907 // Long Immediate: 34-bit, immediate field in prefixed addi and load/store.
 3908 operand immL34() %{
 3909   predicate(PowerArchitecturePPC64 >= 10 && Assembler::is_simm(n->get_long(), 34));
 3910   match(ConL);
 3911   op_cost(0);
 3912   format %{ %}
 3913   interface(CONST_INTER);
 3914 %}
 3915 
 3916 // Long Immediate: 64-bit, where highest 16 bits are not 0x0000.
 3917 operand immLhighest16() %{
 3918   predicate((n->get_long() & 0xffff000000000000L) != 0L && (n->get_long() & 0x0000ffffffffffffL) == 0L);
 3919   match(ConL);
 3920   op_cost(0);
 3921   format %{ %}
 3922   interface(CONST_INTER);
 3923 %}
 3924 
 3925 operand immLnegpow2() %{
 3926   predicate(is_power_of_2(-(julong)(n->get_long())));
 3927   match(ConL);
 3928   op_cost(0);
 3929   format %{ %}
 3930   interface(CONST_INTER);
 3931 %}
 3932 
 3933 operand immLpow2minus1() %{
 3934   predicate(is_power_of_2((julong)(n->get_long()) + 1ull));
 3935   match(ConL);
 3936   op_cost(0);
 3937   format %{ %}
 3938   interface(CONST_INTER);
 3939 %}
 3940 
 3941 // constant 'long 0'.
 3942 operand immL_0() %{
 3943   predicate(n->get_long() == 0L);
 3944   match(ConL);
 3945   op_cost(0);
 3946   format %{ %}
 3947   interface(CONST_INTER);
 3948 %}
 3949 
 3950 // constat ' long -1'.
 3951 operand immL_minus1() %{
 3952   predicate(n->get_long() == -1L);
 3953   match(ConL);
 3954   op_cost(0);
 3955   format %{ %}
 3956   interface(CONST_INTER);
 3957 %}
 3958 
 3959 // Long Immediate: low 32-bit mask
 3960 operand immL_32bits() %{
 3961   predicate(n->get_long() == 0xFFFFFFFFL);
 3962   match(ConL);
 3963   op_cost(0);
 3964   format %{ %}
 3965   interface(CONST_INTER);
 3966 %}
 3967 
 3968 // Unsigned Long Immediate: 16-bit
 3969 operand uimmL16() %{
 3970   predicate(Assembler::is_uimm(n->get_long(), 16));
 3971   match(ConL);
 3972   op_cost(0);
 3973   format %{ %}
 3974   interface(CONST_INTER);
 3975 %}
 3976 
 3977 // Float Immediate
 3978 operand immF() %{
 3979   match(ConF);
 3980   op_cost(40);
 3981   format %{ %}
 3982   interface(CONST_INTER);
 3983 %}
 3984 
 3985 // Float Immediate: +0.0f.
 3986 operand immF_0() %{
 3987   predicate(jint_cast(n->getf()) == 0);
 3988   match(ConF);
 3989 
 3990   op_cost(0);
 3991   format %{ %}
 3992   interface(CONST_INTER);
 3993 %}
 3994 
 3995 // Double Immediate
 3996 operand immD() %{
 3997   match(ConD);
 3998   op_cost(40);
 3999   format %{ %}
 4000   interface(CONST_INTER);
 4001 %}
 4002 
 4003 // Double Immediate: +0.0d.
 4004 operand immD_0() %{
 4005   predicate(jlong_cast(n->getd()) == 0);
 4006   match(ConD);
 4007 
 4008   op_cost(0);
 4009   format %{ %}
 4010   interface(CONST_INTER);
 4011 %}
 4012 
 4013 // Integer Register Operands
 4014 // Integer Destination Register
 4015 // See definition of reg_class bits32_reg_rw.
 4016 operand iRegIdst() %{
 4017   constraint(ALLOC_IN_RC(bits32_reg_rw));
 4018   match(RegI);
 4019   match(rscratch1RegI);
 4020   match(rscratch2RegI);
 4021   match(rarg1RegI);
 4022   match(rarg2RegI);
 4023   match(rarg3RegI);
 4024   match(rarg4RegI);
 4025   format %{ %}
 4026   interface(REG_INTER);
 4027 %}
 4028 
 4029 // Integer Source Register
 4030 // See definition of reg_class bits32_reg_ro.
 4031 operand iRegIsrc() %{
 4032   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4033   match(RegI);
 4034   match(rscratch1RegI);
 4035   match(rscratch2RegI);
 4036   match(rarg1RegI);
 4037   match(rarg2RegI);
 4038   match(rarg3RegI);
 4039   match(rarg4RegI);
 4040   format %{ %}
 4041   interface(REG_INTER);
 4042 %}
 4043 
 4044 operand rscratch1RegI() %{
 4045   constraint(ALLOC_IN_RC(rscratch1_bits32_reg));
 4046   match(iRegIdst);
 4047   format %{ %}
 4048   interface(REG_INTER);
 4049 %}
 4050 
 4051 operand rscratch2RegI() %{
 4052   constraint(ALLOC_IN_RC(rscratch2_bits32_reg));
 4053   match(iRegIdst);
 4054   format %{ %}
 4055   interface(REG_INTER);
 4056 %}
 4057 
 4058 operand rarg1RegI() %{
 4059   constraint(ALLOC_IN_RC(rarg1_bits32_reg));
 4060   match(iRegIdst);
 4061   format %{ %}
 4062   interface(REG_INTER);
 4063 %}
 4064 
 4065 operand rarg2RegI() %{
 4066   constraint(ALLOC_IN_RC(rarg2_bits32_reg));
 4067   match(iRegIdst);
 4068   format %{ %}
 4069   interface(REG_INTER);
 4070 %}
 4071 
 4072 operand rarg3RegI() %{
 4073   constraint(ALLOC_IN_RC(rarg3_bits32_reg));
 4074   match(iRegIdst);
 4075   format %{ %}
 4076   interface(REG_INTER);
 4077 %}
 4078 
 4079 operand rarg4RegI() %{
 4080   constraint(ALLOC_IN_RC(rarg4_bits32_reg));
 4081   match(iRegIdst);
 4082   format %{ %}
 4083   interface(REG_INTER);
 4084 %}
 4085 
 4086 operand rarg1RegL() %{
 4087   constraint(ALLOC_IN_RC(rarg1_bits64_reg));
 4088   match(iRegLdst);
 4089   format %{ %}
 4090   interface(REG_INTER);
 4091 %}
 4092 
 4093 // Pointer Destination Register
 4094 // See definition of reg_class bits64_reg_rw.
 4095 operand iRegPdst() %{
 4096   constraint(ALLOC_IN_RC(bits64_reg_rw));
 4097   match(RegP);
 4098   match(rscratch1RegP);
 4099   match(rscratch2RegP);
 4100   match(rarg1RegP);
 4101   match(rarg2RegP);
 4102   match(rarg3RegP);
 4103   match(rarg4RegP);
 4104   format %{ %}
 4105   interface(REG_INTER);
 4106 %}
 4107 
 4108 // Pointer Destination Register
 4109 // Operand not using r11 and r12 (killed in epilog).
 4110 operand iRegPdstNoScratch() %{
 4111   constraint(ALLOC_IN_RC(bits64_reg_leaf_call));
 4112   match(RegP);
 4113   match(rarg1RegP);
 4114   match(rarg2RegP);
 4115   match(rarg3RegP);
 4116   match(rarg4RegP);
 4117   format %{ %}
 4118   interface(REG_INTER);
 4119 %}
 4120 
 4121 // Pointer Source Register
 4122 // See definition of reg_class bits64_reg_ro.
 4123 operand iRegPsrc() %{
 4124   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4125   match(RegP);
 4126   match(iRegPdst);
 4127   match(rscratch1RegP);
 4128   match(rscratch2RegP);
 4129   match(rarg1RegP);
 4130   match(rarg2RegP);
 4131   match(rarg3RegP);
 4132   match(rarg4RegP);
 4133   match(rarg5RegP);
 4134   match(rarg6RegP);
 4135   match(threadRegP);
 4136   format %{ %}
 4137   interface(REG_INTER);
 4138 %}
 4139 
 4140 // Thread operand.
 4141 operand threadRegP() %{
 4142   constraint(ALLOC_IN_RC(thread_bits64_reg));
 4143   match(iRegPdst);
 4144   format %{ "R16" %}
 4145   interface(REG_INTER);
 4146 %}
 4147 
 4148 operand rscratch1RegP() %{
 4149   constraint(ALLOC_IN_RC(rscratch1_bits64_reg));
 4150   match(iRegPdst);
 4151   format %{ "R11" %}
 4152   interface(REG_INTER);
 4153 %}
 4154 
 4155 operand rscratch2RegP() %{
 4156   constraint(ALLOC_IN_RC(rscratch2_bits64_reg));
 4157   match(iRegPdst);
 4158   format %{ %}
 4159   interface(REG_INTER);
 4160 %}
 4161 
 4162 operand rarg1RegP() %{
 4163   constraint(ALLOC_IN_RC(rarg1_bits64_reg));
 4164   match(iRegPdst);
 4165   format %{ %}
 4166   interface(REG_INTER);
 4167 %}
 4168 
 4169 operand rarg2RegP() %{
 4170   constraint(ALLOC_IN_RC(rarg2_bits64_reg));
 4171   match(iRegPdst);
 4172   format %{ %}
 4173   interface(REG_INTER);
 4174 %}
 4175 
 4176 operand rarg3RegP() %{
 4177   constraint(ALLOC_IN_RC(rarg3_bits64_reg));
 4178   match(iRegPdst);
 4179   format %{ %}
 4180   interface(REG_INTER);
 4181 %}
 4182 
 4183 operand rarg4RegP() %{
 4184   constraint(ALLOC_IN_RC(rarg4_bits64_reg));
 4185   match(iRegPdst);
 4186   format %{ %}
 4187   interface(REG_INTER);
 4188 %}
 4189 
 4190 operand rarg5RegP() %{
 4191   constraint(ALLOC_IN_RC(rarg5_bits64_reg));
 4192   match(iRegPdst);
 4193   format %{ %}
 4194   interface(REG_INTER);
 4195 %}
 4196 
 4197 operand rarg6RegP() %{
 4198   constraint(ALLOC_IN_RC(rarg6_bits64_reg));
 4199   match(iRegPdst);
 4200   format %{ %}
 4201   interface(REG_INTER);
 4202 %}
 4203 
 4204 operand iRegNsrc() %{
 4205   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4206   match(RegN);
 4207   match(iRegNdst);
 4208 
 4209   format %{ %}
 4210   interface(REG_INTER);
 4211 %}
 4212 
 4213 operand iRegNdst() %{
 4214   constraint(ALLOC_IN_RC(bits32_reg_rw));
 4215   match(RegN);
 4216 
 4217   format %{ %}
 4218   interface(REG_INTER);
 4219 %}
 4220 
 4221 // Long Destination Register
 4222 // See definition of reg_class bits64_reg_rw.
 4223 operand iRegLdst() %{
 4224   constraint(ALLOC_IN_RC(bits64_reg_rw));
 4225   match(RegL);
 4226   match(rscratch1RegL);
 4227   match(rscratch2RegL);
 4228   format %{ %}
 4229   interface(REG_INTER);
 4230 %}
 4231 
 4232 // Long Source Register
 4233 // See definition of reg_class bits64_reg_ro.
 4234 operand iRegLsrc() %{
 4235   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4236   match(RegL);
 4237   match(iRegLdst);
 4238   match(rscratch1RegL);
 4239   match(rscratch2RegL);
 4240   format %{ %}
 4241   interface(REG_INTER);
 4242 %}
 4243 
 4244 // Special operand for ConvL2I.
 4245 operand iRegL2Isrc(iRegLsrc reg) %{
 4246   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4247   match(ConvL2I reg);
 4248   format %{ "ConvL2I($reg)" %}
 4249   interface(REG_INTER)
 4250 %}
 4251 
 4252 operand rscratch1RegL() %{
 4253   constraint(ALLOC_IN_RC(rscratch1_bits64_reg));
 4254   match(RegL);
 4255   format %{ %}
 4256   interface(REG_INTER);
 4257 %}
 4258 
 4259 operand rscratch2RegL() %{
 4260   constraint(ALLOC_IN_RC(rscratch2_bits64_reg));
 4261   match(RegL);
 4262   format %{ %}
 4263   interface(REG_INTER);
 4264 %}
 4265 
 4266 // Condition Code Flag Registers
 4267 operand flagsReg() %{
 4268   constraint(ALLOC_IN_RC(int_flags));
 4269   match(RegFlags);
 4270   format %{ %}
 4271   interface(REG_INTER);
 4272 %}
 4273 
 4274 operand flagsRegSrc() %{
 4275   constraint(ALLOC_IN_RC(int_flags_ro));
 4276   match(RegFlags);
 4277   match(flagsReg);
 4278   match(flagsRegCR0);
 4279   format %{ %}
 4280   interface(REG_INTER);
 4281 %}
 4282 
 4283 // Condition Code Flag Register CR0
 4284 operand flagsRegCR0() %{
 4285   constraint(ALLOC_IN_RC(int_flags_CR0));
 4286   match(RegFlags);
 4287   format %{ "CR0" %}
 4288   interface(REG_INTER);
 4289 %}
 4290 
 4291 operand flagsRegCR1() %{
 4292   constraint(ALLOC_IN_RC(int_flags_CR1));
 4293   match(RegFlags);
 4294   format %{ "CR1" %}
 4295   interface(REG_INTER);
 4296 %}
 4297 
 4298 operand flagsRegCR6() %{
 4299   constraint(ALLOC_IN_RC(int_flags_CR6));
 4300   match(RegFlags);
 4301   format %{ "CR6" %}
 4302   interface(REG_INTER);
 4303 %}
 4304 
 4305 operand regCTR() %{
 4306   constraint(ALLOC_IN_RC(ctr_reg));
 4307   // RegFlags should work. Introducing a RegSpecial type would cause a
 4308   // lot of changes.
 4309   match(RegFlags);
 4310   format %{"SR_CTR" %}
 4311   interface(REG_INTER);
 4312 %}
 4313 
 4314 operand regD() %{
 4315   constraint(ALLOC_IN_RC(dbl_reg));
 4316   match(RegD);
 4317   format %{ %}
 4318   interface(REG_INTER);
 4319 %}
 4320 
 4321 operand regF() %{
 4322   constraint(ALLOC_IN_RC(flt_reg));
 4323   match(RegF);
 4324   format %{ %}
 4325   interface(REG_INTER);
 4326 %}
 4327 
 4328 // Special Registers
 4329 
 4330 // Method Register
 4331 operand inline_cache_regP(iRegPdst reg) %{
 4332   constraint(ALLOC_IN_RC(r19_bits64_reg)); // inline_cache_reg
 4333   match(reg);
 4334   format %{ %}
 4335   interface(REG_INTER);
 4336 %}
 4337 
 4338 // Operands to remove register moves in unscaled mode.
 4339 // Match read/write registers with an EncodeP node if neither shift nor add are required.
 4340 operand iRegP2N(iRegPsrc reg) %{
 4341   predicate(false /* TODO: PPC port MatchDecodeNodes*/&& CompressedOops::shift() == 0);
 4342   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4343   match(EncodeP reg);
 4344   format %{ "$reg" %}
 4345   interface(REG_INTER)
 4346 %}
 4347 
 4348 operand iRegN2P(iRegNsrc reg) %{
 4349   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4350   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4351   match(DecodeN reg);
 4352   format %{ "$reg" %}
 4353   interface(REG_INTER)
 4354 %}
 4355 
 4356 operand iRegN2P_klass(iRegNsrc reg) %{
 4357   predicate(CompressedKlassPointers::base() == nullptr && CompressedKlassPointers::shift() == 0);
 4358   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4359   match(DecodeNKlass reg);
 4360   format %{ "$reg" %}
 4361   interface(REG_INTER)
 4362 %}
 4363 
 4364 //----------Complex Operands---------------------------------------------------
 4365 // Indirect Memory Reference
 4366 operand indirect(iRegPsrc reg) %{
 4367   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4368   match(reg);
 4369   op_cost(100);
 4370   format %{ "[$reg]" %}
 4371   interface(MEMORY_INTER) %{
 4372     base($reg);
 4373     index(0x0);
 4374     scale(0x0);
 4375     disp(0x0);
 4376   %}
 4377 %}
 4378 
 4379 // Indirect with Offset
 4380 operand indOffset16(iRegPsrc reg, immL16 offset) %{
 4381   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4382   match(AddP reg offset);
 4383   op_cost(100);
 4384   format %{ "[$reg + $offset]" %}
 4385   interface(MEMORY_INTER) %{
 4386     base($reg);
 4387     index(0x0);
 4388     scale(0x0);
 4389     disp($offset);
 4390   %}
 4391 %}
 4392 
 4393 // Indirect with 4-aligned Offset
 4394 operand indOffset16Alg4(iRegPsrc reg, immL16Alg4 offset) %{
 4395   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4396   match(AddP reg offset);
 4397   op_cost(100);
 4398   format %{ "[$reg + $offset]" %}
 4399   interface(MEMORY_INTER) %{
 4400     base($reg);
 4401     index(0x0);
 4402     scale(0x0);
 4403     disp($offset);
 4404   %}
 4405 %}
 4406 
 4407 // Indirect with 16-aligned Offset
 4408 operand indOffset16Alg16(iRegPsrc reg, immL16Alg16 offset) %{
 4409   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4410   match(AddP reg offset);
 4411   op_cost(100);
 4412   format %{ "[$reg + $offset]" %}
 4413   interface(MEMORY_INTER) %{
 4414     base($reg);
 4415     index(0x0);
 4416     scale(0x0);
 4417     disp($offset);
 4418   %}
 4419 %}
 4420 
 4421 //----------Complex Operands for Compressed OOPs-------------------------------
 4422 // Compressed OOPs with narrow_oop_shift == 0.
 4423 
 4424 // Indirect Memory Reference, compressed OOP
 4425 operand indirectNarrow(iRegNsrc reg) %{
 4426   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4427   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4428   match(DecodeN reg);
 4429   op_cost(100);
 4430   format %{ "[$reg]" %}
 4431   interface(MEMORY_INTER) %{
 4432     base($reg);
 4433     index(0x0);
 4434     scale(0x0);
 4435     disp(0x0);
 4436   %}
 4437 %}
 4438 
 4439 operand indirectNarrow_klass(iRegNsrc reg) %{
 4440   predicate(CompressedKlassPointers::base() == nullptr && CompressedKlassPointers::shift() == 0);
 4441   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4442   match(DecodeNKlass reg);
 4443   op_cost(100);
 4444   format %{ "[$reg]" %}
 4445   interface(MEMORY_INTER) %{
 4446     base($reg);
 4447     index(0x0);
 4448     scale(0x0);
 4449     disp(0x0);
 4450   %}
 4451 %}
 4452 
 4453 // Indirect with Offset, compressed OOP
 4454 operand indOffset16Narrow(iRegNsrc reg, immL16 offset) %{
 4455   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4456   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4457   match(AddP (DecodeN reg) offset);
 4458   op_cost(100);
 4459   format %{ "[$reg + $offset]" %}
 4460   interface(MEMORY_INTER) %{
 4461     base($reg);
 4462     index(0x0);
 4463     scale(0x0);
 4464     disp($offset);
 4465   %}
 4466 %}
 4467 
 4468 operand indOffset16Narrow_klass(iRegNsrc reg, immL16 offset) %{
 4469   predicate(CompressedKlassPointers::base() == nullptr && CompressedKlassPointers::shift() == 0);
 4470   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4471   match(AddP (DecodeNKlass reg) offset);
 4472   op_cost(100);
 4473   format %{ "[$reg + $offset]" %}
 4474   interface(MEMORY_INTER) %{
 4475     base($reg);
 4476     index(0x0);
 4477     scale(0x0);
 4478     disp($offset);
 4479   %}
 4480 %}
 4481 
 4482 // Indirect with 4-aligned Offset, compressed OOP
 4483 operand indOffset16NarrowAlg4(iRegNsrc reg, immL16Alg4 offset) %{
 4484   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4485   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4486   match(AddP (DecodeN reg) offset);
 4487   op_cost(100);
 4488   format %{ "[$reg + $offset]" %}
 4489   interface(MEMORY_INTER) %{
 4490     base($reg);
 4491     index(0x0);
 4492     scale(0x0);
 4493     disp($offset);
 4494   %}
 4495 %}
 4496 
 4497 operand indOffset16NarrowAlg4_klass(iRegNsrc reg, immL16Alg4 offset) %{
 4498   predicate(CompressedKlassPointers::base() == nullptr && CompressedKlassPointers::shift() == 0);
 4499   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4500   match(AddP (DecodeNKlass reg) offset);
 4501   op_cost(100);
 4502   format %{ "[$reg + $offset]" %}
 4503   interface(MEMORY_INTER) %{
 4504     base($reg);
 4505     index(0x0);
 4506     scale(0x0);
 4507     disp($offset);
 4508   %}
 4509 %}
 4510 
 4511 //----------Special Memory Operands--------------------------------------------
 4512 // Stack Slot Operand
 4513 //
 4514 // This operand is used for loading and storing temporary values on
 4515 // the stack where a match requires a value to flow through memory.
 4516 operand stackSlotI(sRegI reg) %{
 4517   constraint(ALLOC_IN_RC(stack_slots));
 4518   op_cost(100);
 4519   //match(RegI);
 4520   format %{ "[sp+$reg]" %}
 4521   interface(MEMORY_INTER) %{
 4522     base(0x1);   // R1_SP
 4523     index(0x0);
 4524     scale(0x0);
 4525     disp($reg);  // Stack Offset
 4526   %}
 4527 %}
 4528 
 4529 operand stackSlotL(sRegL reg) %{
 4530   constraint(ALLOC_IN_RC(stack_slots));
 4531   op_cost(100);
 4532   //match(RegL);
 4533   format %{ "[sp+$reg]" %}
 4534   interface(MEMORY_INTER) %{
 4535     base(0x1);   // R1_SP
 4536     index(0x0);
 4537     scale(0x0);
 4538     disp($reg);  // Stack Offset
 4539   %}
 4540 %}
 4541 
 4542 operand stackSlotP(sRegP reg) %{
 4543   constraint(ALLOC_IN_RC(stack_slots));
 4544   op_cost(100);
 4545   //match(RegP);
 4546   format %{ "[sp+$reg]" %}
 4547   interface(MEMORY_INTER) %{
 4548     base(0x1);   // R1_SP
 4549     index(0x0);
 4550     scale(0x0);
 4551     disp($reg);  // Stack Offset
 4552   %}
 4553 %}
 4554 
 4555 operand stackSlotF(sRegF reg) %{
 4556   constraint(ALLOC_IN_RC(stack_slots));
 4557   op_cost(100);
 4558   //match(RegF);
 4559   format %{ "[sp+$reg]" %}
 4560   interface(MEMORY_INTER) %{
 4561     base(0x1);   // R1_SP
 4562     index(0x0);
 4563     scale(0x0);
 4564     disp($reg);  // Stack Offset
 4565   %}
 4566 %}
 4567 
 4568 operand stackSlotD(sRegD reg) %{
 4569   constraint(ALLOC_IN_RC(stack_slots));
 4570   op_cost(100);
 4571   //match(RegD);
 4572   format %{ "[sp+$reg]" %}
 4573   interface(MEMORY_INTER) %{
 4574     base(0x1);   // R1_SP
 4575     index(0x0);
 4576     scale(0x0);
 4577     disp($reg);  // Stack Offset
 4578   %}
 4579 %}
 4580 
 4581 // Operands for expressing Control Flow
 4582 // NOTE: Label is a predefined operand which should not be redefined in
 4583 //       the AD file. It is generically handled within the ADLC.
 4584 
 4585 //----------Conditional Branch Operands----------------------------------------
 4586 // Comparison Op
 4587 //
 4588 // This is the operation of the comparison, and is limited to the
 4589 // following set of codes: L (<), LE (<=), G (>), GE (>=), E (==), NE
 4590 // (!=).
 4591 //
 4592 // Other attributes of the comparison, such as unsignedness, are specified
 4593 // by the comparison instruction that sets a condition code flags register.
 4594 // That result is represented by a flags operand whose subtype is appropriate
 4595 // to the unsignedness (etc.) of the comparison.
 4596 //
 4597 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4598 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4599 // by matching a specific subtype of Bool operand below.
 4600 
 4601 // When used for floating point comparisons: unordered same as less.
 4602 operand cmpOp() %{
 4603   match(Bool);
 4604   format %{ "" %}
 4605   interface(COND_INTER) %{
 4606                            // BO only encodes bit 4 of bcondCRbiIsX, as bits 1-3 are always '100'.
 4607                            //           BO          &  BI
 4608     equal(0xA);            // 10 10:   bcondCRbiIs1 & Condition::equal
 4609     not_equal(0x2);        // 00 10:   bcondCRbiIs0 & Condition::equal
 4610     less(0x8);             // 10 00:   bcondCRbiIs1 & Condition::less
 4611     greater_equal(0x0);    // 00 00:   bcondCRbiIs0 & Condition::less
 4612     less_equal(0x1);       // 00 01:   bcondCRbiIs0 & Condition::greater
 4613     greater(0x9);          // 10 01:   bcondCRbiIs1 & Condition::greater
 4614     overflow(0xB);         // 10 11:   bcondCRbiIs1 & Condition::summary_overflow
 4615     no_overflow(0x3);      // 00 11:   bcondCRbiIs0 & Condition::summary_overflow
 4616   %}
 4617 %}
 4618 
 4619 //----------OPERAND CLASSES----------------------------------------------------
 4620 // Operand Classes are groups of operands that are used to simplify
 4621 // instruction definitions by not requiring the AD writer to specify
 4622 // separate instructions for every form of operand when the
 4623 // instruction accepts multiple operand types with the same basic
 4624 // encoding and format. The classic case of this is memory operands.
 4625 // Indirect is not included since its use is limited to Compare & Swap.
 4626 
 4627 opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indirectNarrow_klass, indOffset16Narrow, indOffset16Narrow_klass);
 4628 // Memory operand where offsets are 4-aligned. Required for ld, std.
 4629 opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4, indOffset16NarrowAlg4_klass);
 4630 opclass memoryAlg16(indirect, indOffset16Alg16);
 4631 opclass indirectMemory(indirect, indirectNarrow);
 4632 
 4633 // Special opclass for I and ConvL2I.
 4634 opclass iRegIsrc_iRegL2Isrc(iRegIsrc, iRegL2Isrc);
 4635 
 4636 // Operand classes to match encode and decode. iRegN_P2N is only used
 4637 // for storeN. I have never seen an encode node elsewhere.
 4638 opclass iRegN_P2N(iRegNsrc, iRegP2N);
 4639 opclass iRegP_N2P(iRegPsrc, iRegN2P, iRegN2P_klass);
 4640 
 4641 //----------PIPELINE-----------------------------------------------------------
 4642 
 4643 pipeline %{
 4644 
 4645 // See J.M.Tendler et al. "Power4 system microarchitecture", IBM
 4646 // J. Res. & Dev., No. 1, Jan. 2002.
 4647 
 4648 //----------ATTRIBUTES---------------------------------------------------------
 4649 attributes %{
 4650 
 4651   // Power4 instructions are of fixed length.
 4652   fixed_size_instructions;
 4653 
 4654   // TODO: if `bundle' means number of instructions fetched
 4655   // per cycle, this is 8. If `bundle' means Power4 `group', that is
 4656   // max instructions issued per cycle, this is 5.
 4657   max_instructions_per_bundle = 8;
 4658 
 4659   // A Power4 instruction is 4 bytes long.
 4660   instruction_unit_size = 4;
 4661 
 4662   // The Power4 processor fetches 64 bytes...
 4663   instruction_fetch_unit_size = 64;
 4664 
 4665   // ...in one line
 4666   instruction_fetch_units = 1
 4667 %}
 4668 
 4669 //----------RESOURCES----------------------------------------------------------
 4670 // Resources are the functional units available to the machine
 4671 resources(
 4672    PPC_BR,         // branch unit
 4673    PPC_CR,         // condition unit
 4674    PPC_FX1,        // integer arithmetic unit 1
 4675    PPC_FX2,        // integer arithmetic unit 2
 4676    PPC_LDST1,      // load/store unit 1
 4677    PPC_LDST2,      // load/store unit 2
 4678    PPC_FP1,        // float arithmetic unit 1
 4679    PPC_FP2,        // float arithmetic unit 2
 4680    PPC_LDST = PPC_LDST1 | PPC_LDST2,
 4681    PPC_FX = PPC_FX1 | PPC_FX2,
 4682    PPC_FP = PPC_FP1 | PPC_FP2
 4683  );
 4684 
 4685 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4686 // Pipeline Description specifies the stages in the machine's pipeline
 4687 pipe_desc(
 4688    // Power4 longest pipeline path
 4689    PPC_IF,   // instruction fetch
 4690    PPC_IC,
 4691    //PPC_BP, // branch prediction
 4692    PPC_D0,   // decode
 4693    PPC_D1,   // decode
 4694    PPC_D2,   // decode
 4695    PPC_D3,   // decode
 4696    PPC_Xfer1,
 4697    PPC_GD,   // group definition
 4698    PPC_MP,   // map
 4699    PPC_ISS,  // issue
 4700    PPC_RF,   // resource fetch
 4701    PPC_EX1,  // execute (all units)
 4702    PPC_EX2,  // execute (FP, LDST)
 4703    PPC_EX3,  // execute (FP, LDST)
 4704    PPC_EX4,  // execute (FP)
 4705    PPC_EX5,  // execute (FP)
 4706    PPC_EX6,  // execute (FP)
 4707    PPC_WB,   // write back
 4708    PPC_Xfer2,
 4709    PPC_CP
 4710  );
 4711 
 4712 //----------PIPELINE CLASSES---------------------------------------------------
 4713 // Pipeline Classes describe the stages in which input and output are
 4714 // referenced by the hardware pipeline.
 4715 
 4716 // Simple pipeline classes.
 4717 
 4718 // Default pipeline class.
 4719 pipe_class pipe_class_default() %{
 4720   single_instruction;
 4721   fixed_latency(2);
 4722 %}
 4723 
 4724 // Pipeline class for empty instructions.
 4725 pipe_class pipe_class_empty() %{
 4726   single_instruction;
 4727   fixed_latency(0);
 4728 %}
 4729 
 4730 // Pipeline class for compares.
 4731 pipe_class pipe_class_compare() %{
 4732   single_instruction;
 4733   fixed_latency(16);
 4734 %}
 4735 
 4736 // Pipeline class for traps.
 4737 pipe_class pipe_class_trap() %{
 4738   single_instruction;
 4739   fixed_latency(100);
 4740 %}
 4741 
 4742 // Pipeline class for memory operations.
 4743 pipe_class pipe_class_memory() %{
 4744   single_instruction;
 4745   fixed_latency(16);
 4746 %}
 4747 
 4748 // Pipeline class for call.
 4749 pipe_class pipe_class_call() %{
 4750   single_instruction;
 4751   fixed_latency(100);
 4752 %}
 4753 
 4754 // Define the class for the Nop node.
 4755 define %{
 4756    MachNop = pipe_class_default;
 4757 %}
 4758 
 4759 %}
 4760 
 4761 //----------INSTRUCTIONS-------------------------------------------------------
 4762 
 4763 // Naming of instructions:
 4764 //   opA_operB / opA_operB_operC:
 4765 //     Operation 'op' with one or two source operands 'oper'. Result
 4766 //     type is A, source operand types are B and C.
 4767 //     Iff A == B == C, B and C are left out.
 4768 //
 4769 // The instructions are ordered according to the following scheme:
 4770 //  - loads
 4771 //  - load constants
 4772 //  - prefetch
 4773 //  - store
 4774 //  - encode/decode
 4775 //  - membar
 4776 //  - conditional moves
 4777 //  - compare & swap
 4778 //  - arithmetic and logic operations
 4779 //    * int: Add, Sub, Mul, Div, Mod
 4780 //    * int: lShift, arShift, urShift, rot
 4781 //    * float: Add, Sub, Mul, Div
 4782 //    * and, or, xor ...
 4783 //  - register moves: float <-> int, reg <-> stack, repl
 4784 //  - cast (high level type cast, XtoP, castPP, castII, not_null etc.
 4785 //  - conv (low level type cast requiring bit changes (sign extend etc)
 4786 //  - compares, range & zero checks.
 4787 //  - branches
 4788 //  - complex operations, intrinsics, min, max, replicate
 4789 //  - lock
 4790 //  - Calls
 4791 //
 4792 // If there are similar instructions with different types they are sorted:
 4793 // int before float
 4794 // small before big
 4795 // signed before unsigned
 4796 // e.g., loadS before loadUS before loadI before loadF.
 4797 
 4798 
 4799 //----------Load/Store Instructions--------------------------------------------
 4800 
 4801 //----------Load Instructions--------------------------------------------------
 4802 
 4803 // Converts byte to int.
 4804 // As convB2I_reg, but without match rule.  The match rule of convB2I_reg
 4805 // reuses the 'amount' operand, but adlc expects that operand specification
 4806 // and operands in match rule are equivalent.
 4807 instruct convB2I_reg_2(iRegIdst dst, iRegIsrc src) %{
 4808   effect(DEF dst, USE src);
 4809   format %{ "EXTSB   $dst, $src \t// byte->int" %}
 4810   size(4);
 4811   ins_encode %{
 4812     __ extsb($dst$$Register, $src$$Register);
 4813   %}
 4814   ins_pipe(pipe_class_default);
 4815 %}
 4816 
 4817 instruct loadUB_indirect(iRegIdst dst, indirectMemory mem) %{
 4818   // match-rule, false predicate
 4819   match(Set dst (LoadB mem));
 4820   predicate(false);
 4821 
 4822   format %{ "LBZ     $dst, $mem" %}
 4823   size(4);
 4824   ins_encode( enc_lbz(dst, mem) );
 4825   ins_pipe(pipe_class_memory);
 4826 %}
 4827 
 4828 instruct loadUB_indirect_ac(iRegIdst dst, indirectMemory mem) %{
 4829   // match-rule, false predicate
 4830   match(Set dst (LoadB mem));
 4831   predicate(false);
 4832 
 4833   format %{ "LBZ     $dst, $mem\n\t"
 4834             "TWI     $dst\n\t"
 4835             "ISYNC" %}
 4836   size(12);
 4837   ins_encode( enc_lbz_ac(dst, mem) );
 4838   ins_pipe(pipe_class_memory);
 4839 %}
 4840 
 4841 // Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B.
 4842 instruct loadB_indirect_Ex(iRegIdst dst, indirectMemory mem) %{
 4843   match(Set dst (LoadB mem));
 4844   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 4845   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
 4846   expand %{
 4847     iRegIdst tmp;
 4848     loadUB_indirect(tmp, mem);
 4849     convB2I_reg_2(dst, tmp);
 4850   %}
 4851 %}
 4852 
 4853 instruct loadB_indirect_ac_Ex(iRegIdst dst, indirectMemory mem) %{
 4854   match(Set dst (LoadB mem));
 4855   ins_cost(3*MEMORY_REF_COST + DEFAULT_COST);
 4856   expand %{
 4857     iRegIdst tmp;
 4858     loadUB_indirect_ac(tmp, mem);
 4859     convB2I_reg_2(dst, tmp);
 4860   %}
 4861 %}
 4862 
 4863 instruct loadUB_indOffset16(iRegIdst dst, indOffset16 mem) %{
 4864   // match-rule, false predicate
 4865   match(Set dst (LoadB mem));
 4866   predicate(false);
 4867 
 4868   format %{ "LBZ     $dst, $mem" %}
 4869   size(4);
 4870   ins_encode( enc_lbz(dst, mem) );
 4871   ins_pipe(pipe_class_memory);
 4872 %}
 4873 
 4874 instruct loadUB_indOffset16_ac(iRegIdst dst, indOffset16 mem) %{
 4875   // match-rule, false predicate
 4876   match(Set dst (LoadB mem));
 4877   predicate(false);
 4878 
 4879   format %{ "LBZ     $dst, $mem\n\t"
 4880             "TWI     $dst\n\t"
 4881             "ISYNC" %}
 4882   size(12);
 4883   ins_encode( enc_lbz_ac(dst, mem) );
 4884   ins_pipe(pipe_class_memory);
 4885 %}
 4886 
 4887 // Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B.
 4888 instruct loadB_indOffset16_Ex(iRegIdst dst, indOffset16 mem) %{
 4889   match(Set dst (LoadB mem));
 4890   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 4891   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
 4892 
 4893   expand %{
 4894     iRegIdst tmp;
 4895     loadUB_indOffset16(tmp, mem);
 4896     convB2I_reg_2(dst, tmp);
 4897   %}
 4898 %}
 4899 
 4900 instruct loadB_indOffset16_ac_Ex(iRegIdst dst, indOffset16 mem) %{
 4901   match(Set dst (LoadB mem));
 4902   ins_cost(3*MEMORY_REF_COST + DEFAULT_COST);
 4903 
 4904   expand %{
 4905     iRegIdst tmp;
 4906     loadUB_indOffset16_ac(tmp, mem);
 4907     convB2I_reg_2(dst, tmp);
 4908   %}
 4909 %}
 4910 
 4911 // Load Unsigned Byte (8bit UNsigned) into an int reg.
 4912 instruct loadUB(iRegIdst dst, memory mem) %{
 4913   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 4914   match(Set dst (LoadUB mem));
 4915   ins_cost(MEMORY_REF_COST);
 4916 
 4917   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to int" %}
 4918   size(4);
 4919   ins_encode( enc_lbz(dst, mem) );
 4920   ins_pipe(pipe_class_memory);
 4921 %}
 4922 
 4923 // Load  Unsigned Byte (8bit UNsigned) acquire.
 4924 instruct loadUB_ac(iRegIdst dst, memory mem) %{
 4925   match(Set dst (LoadUB mem));
 4926   ins_cost(3*MEMORY_REF_COST);
 4927 
 4928   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to int, acquire\n\t"
 4929             "TWI     $dst\n\t"
 4930             "ISYNC" %}
 4931   size(12);
 4932   ins_encode( enc_lbz_ac(dst, mem) );
 4933   ins_pipe(pipe_class_memory);
 4934 %}
 4935 
 4936 // Load Unsigned Byte (8bit UNsigned) into a Long Register.
 4937 instruct loadUB2L(iRegLdst dst, memory mem) %{
 4938   match(Set dst (ConvI2L (LoadUB mem)));
 4939   predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf));
 4940   ins_cost(MEMORY_REF_COST);
 4941 
 4942   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to long" %}
 4943   size(4);
 4944   ins_encode( enc_lbz(dst, mem) );
 4945   ins_pipe(pipe_class_memory);
 4946 %}
 4947 
 4948 instruct loadUB2L_ac(iRegLdst dst, memory mem) %{
 4949   match(Set dst (ConvI2L (LoadUB mem)));
 4950   ins_cost(3*MEMORY_REF_COST);
 4951 
 4952   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to long, acquire\n\t"
 4953             "TWI     $dst\n\t"
 4954             "ISYNC" %}
 4955   size(12);
 4956   ins_encode( enc_lbz_ac(dst, mem) );
 4957   ins_pipe(pipe_class_memory);
 4958 %}
 4959 
 4960 // Load Short (16bit signed)
 4961 instruct loadS(iRegIdst dst, memory mem) %{
 4962   match(Set dst (LoadS mem));
 4963   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 4964   ins_cost(MEMORY_REF_COST);
 4965 
 4966   format %{ "LHA     $dst, $mem" %}
 4967   size(4);
 4968   ins_encode %{
 4969     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 4970     __ lha($dst$$Register, Idisp, $mem$$base$$Register);
 4971   %}
 4972   ins_pipe(pipe_class_memory);
 4973 %}
 4974 
 4975 // Load Short (16bit signed) acquire.
 4976 instruct loadS_ac(iRegIdst dst, memory mem) %{
 4977   match(Set dst (LoadS mem));
 4978   ins_cost(3*MEMORY_REF_COST);
 4979 
 4980   format %{ "LHA     $dst, $mem\t acquire\n\t"
 4981             "TWI     $dst\n\t"
 4982             "ISYNC" %}
 4983   size(12);
 4984   ins_encode %{
 4985     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 4986     __ lha($dst$$Register, Idisp, $mem$$base$$Register);
 4987     __ twi_0($dst$$Register);
 4988     __ isync();
 4989   %}
 4990   ins_pipe(pipe_class_memory);
 4991 %}
 4992 
 4993 // Load Char (16bit unsigned)
 4994 instruct loadUS(iRegIdst dst, memory mem) %{
 4995   match(Set dst (LoadUS mem));
 4996   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 4997   ins_cost(MEMORY_REF_COST);
 4998 
 4999   format %{ "LHZ     $dst, $mem" %}
 5000   size(4);
 5001   ins_encode( enc_lhz(dst, mem) );
 5002   ins_pipe(pipe_class_memory);
 5003 %}
 5004 
 5005 // Load Char (16bit unsigned) acquire.
 5006 instruct loadUS_ac(iRegIdst dst, memory mem) %{
 5007   match(Set dst (LoadUS mem));
 5008   ins_cost(3*MEMORY_REF_COST);
 5009 
 5010   format %{ "LHZ     $dst, $mem \t// acquire\n\t"
 5011             "TWI     $dst\n\t"
 5012             "ISYNC" %}
 5013   size(12);
 5014   ins_encode( enc_lhz_ac(dst, mem) );
 5015   ins_pipe(pipe_class_memory);
 5016 %}
 5017 
 5018 // Load Unsigned Short/Char (16bit UNsigned) into a Long Register.
 5019 instruct loadUS2L(iRegLdst dst, memory mem) %{
 5020   match(Set dst (ConvI2L (LoadUS mem)));
 5021   predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf));
 5022   ins_cost(MEMORY_REF_COST);
 5023 
 5024   format %{ "LHZ     $dst, $mem \t// short, zero-extend to long" %}
 5025   size(4);
 5026   ins_encode( enc_lhz(dst, mem) );
 5027   ins_pipe(pipe_class_memory);
 5028 %}
 5029 
 5030 // Load Unsigned Short/Char (16bit UNsigned) into a Long Register acquire.
 5031 instruct loadUS2L_ac(iRegLdst dst, memory mem) %{
 5032   match(Set dst (ConvI2L (LoadUS mem)));
 5033   ins_cost(3*MEMORY_REF_COST);
 5034 
 5035   format %{ "LHZ     $dst, $mem \t// short, zero-extend to long, acquire\n\t"
 5036             "TWI     $dst\n\t"
 5037             "ISYNC" %}
 5038   size(12);
 5039   ins_encode( enc_lhz_ac(dst, mem) );
 5040   ins_pipe(pipe_class_memory);
 5041 %}
 5042 
 5043 // Load Integer.
 5044 instruct loadI(iRegIdst dst, memory mem) %{
 5045   match(Set dst (LoadI mem));
 5046   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5047   ins_cost(MEMORY_REF_COST);
 5048 
 5049   format %{ "LWZ     $dst, $mem" %}
 5050   size(4);
 5051   ins_encode( enc_lwz(dst, mem) );
 5052   ins_pipe(pipe_class_memory);
 5053 %}
 5054 
 5055 // Load Integer acquire.
 5056 instruct loadI_ac(iRegIdst dst, memory mem) %{
 5057   match(Set dst (LoadI mem));
 5058   ins_cost(3*MEMORY_REF_COST);
 5059 
 5060   format %{ "LWZ     $dst, $mem \t// load acquire\n\t"
 5061             "TWI     $dst\n\t"
 5062             "ISYNC" %}
 5063   size(12);
 5064   ins_encode( enc_lwz_ac(dst, mem) );
 5065   ins_pipe(pipe_class_memory);
 5066 %}
 5067 
 5068 // Match loading integer and casting it to unsigned int in
 5069 // long register.
 5070 // LoadI + ConvI2L + AndL 0xffffffff.
 5071 instruct loadUI2L(iRegLdst dst, memory mem, immL_32bits mask) %{
 5072   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5073   predicate(_kids[0]->_kids[0]->_leaf->as_Load()->is_unordered());
 5074   ins_cost(MEMORY_REF_COST);
 5075 
 5076   format %{ "LWZ     $dst, $mem \t// zero-extend to long" %}
 5077   size(4);
 5078   ins_encode( enc_lwz(dst, mem) );
 5079   ins_pipe(pipe_class_memory);
 5080 %}
 5081 
 5082 // Match loading integer and casting it to long.
 5083 instruct loadI2L(iRegLdst dst, memoryAlg4 mem) %{
 5084   match(Set dst (ConvI2L (LoadI mem)));
 5085   predicate(_kids[0]->_leaf->as_Load()->is_unordered());
 5086   ins_cost(MEMORY_REF_COST);
 5087 
 5088   format %{ "LWA     $dst, $mem \t// loadI2L" %}
 5089   size(4);
 5090   ins_encode %{
 5091     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5092     __ lwa($dst$$Register, Idisp, $mem$$base$$Register);
 5093   %}
 5094   ins_pipe(pipe_class_memory);
 5095 %}
 5096 
 5097 // Match loading integer and casting it to long - acquire.
 5098 instruct loadI2L_ac(iRegLdst dst, memoryAlg4 mem) %{
 5099   match(Set dst (ConvI2L (LoadI mem)));
 5100   ins_cost(3*MEMORY_REF_COST);
 5101 
 5102   format %{ "LWA     $dst, $mem \t// loadI2L acquire"
 5103             "TWI     $dst\n\t"
 5104             "ISYNC" %}
 5105   size(12);
 5106   ins_encode %{
 5107     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5108     __ lwa($dst$$Register, Idisp, $mem$$base$$Register);
 5109     __ twi_0($dst$$Register);
 5110     __ isync();
 5111   %}
 5112   ins_pipe(pipe_class_memory);
 5113 %}
 5114 
 5115 // Load Long - aligned
 5116 instruct loadL(iRegLdst dst, memoryAlg4 mem) %{
 5117   match(Set dst (LoadL mem));
 5118   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5119   ins_cost(MEMORY_REF_COST);
 5120 
 5121   format %{ "LD      $dst, $mem \t// long" %}
 5122   size(4);
 5123   ins_encode( enc_ld(dst, mem) );
 5124   ins_pipe(pipe_class_memory);
 5125 %}
 5126 
 5127 // Load Long - aligned acquire.
 5128 instruct loadL_ac(iRegLdst dst, memoryAlg4 mem) %{
 5129   match(Set dst (LoadL mem));
 5130   ins_cost(3*MEMORY_REF_COST);
 5131 
 5132   format %{ "LD      $dst, $mem \t// long acquire\n\t"
 5133             "TWI     $dst\n\t"
 5134             "ISYNC" %}
 5135   size(12);
 5136   ins_encode( enc_ld_ac(dst, mem) );
 5137   ins_pipe(pipe_class_memory);
 5138 %}
 5139 
 5140 // Load Long - UNaligned
 5141 instruct loadL_unaligned(iRegLdst dst, memoryAlg4 mem) %{
 5142   match(Set dst (LoadL_unaligned mem));
 5143   // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense).
 5144   ins_cost(MEMORY_REF_COST);
 5145 
 5146   format %{ "LD      $dst, $mem \t// unaligned long" %}
 5147   size(4);
 5148   ins_encode( enc_ld(dst, mem) );
 5149   ins_pipe(pipe_class_memory);
 5150 %}
 5151 
 5152 // Load nodes for superwords
 5153 
 5154 // Load Aligned Packed Byte
 5155 instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{
 5156   predicate(n->as_LoadVector()->memory_size() == 8);
 5157   match(Set dst (LoadVector mem));
 5158   ins_cost(MEMORY_REF_COST);
 5159 
 5160   format %{ "LD      $dst, $mem \t// load 8-byte Vector" %}
 5161   size(4);
 5162   ins_encode( enc_ld(dst, mem) );
 5163   ins_pipe(pipe_class_memory);
 5164 %}
 5165 
 5166 
 5167 instruct loadV16(vecX dst, memoryAlg16 mem) %{
 5168   predicate(n->as_LoadVector()->memory_size() == 16);
 5169   match(Set dst (LoadVector mem));
 5170   ins_cost(MEMORY_REF_COST);
 5171 
 5172   format %{ "LXV      $dst, $mem \t// load 16-byte Vector" %}
 5173   size(4);
 5174   ins_encode %{
 5175     __ lxv($dst$$VectorRegister.to_vsr(), $mem$$disp, $mem$$Register);
 5176   %}
 5177   ins_pipe(pipe_class_default);
 5178 %}
 5179 
 5180 // Load Range, range = array length (=jint)
 5181 instruct loadRange(iRegIdst dst, memory mem) %{
 5182   match(Set dst (LoadRange mem));
 5183   ins_cost(MEMORY_REF_COST);
 5184 
 5185   format %{ "LWZ     $dst, $mem \t// range" %}
 5186   size(4);
 5187   ins_encode( enc_lwz(dst, mem) );
 5188   ins_pipe(pipe_class_memory);
 5189 %}
 5190 
 5191 // Load Compressed Pointer
 5192 instruct loadN(iRegNdst dst, memory mem) %{
 5193   match(Set dst (LoadN mem));
 5194   predicate((n->as_Load()->is_unordered() || followed_by_acquire(n)) && n->as_Load()->barrier_data() == 0);
 5195   ins_cost(MEMORY_REF_COST);
 5196 
 5197   format %{ "LWZ     $dst, $mem \t// load compressed ptr" %}
 5198   size(4);
 5199   ins_encode( enc_lwz(dst, mem) );
 5200   ins_pipe(pipe_class_memory);
 5201 %}
 5202 
 5203 // Load Compressed Pointer acquire.
 5204 instruct loadN_ac(iRegNdst dst, memory mem) %{
 5205   match(Set dst (LoadN mem));
 5206   predicate(n->as_Load()->barrier_data() == 0);
 5207   ins_cost(3*MEMORY_REF_COST);
 5208 
 5209   format %{ "LWZ     $dst, $mem \t// load acquire compressed ptr\n\t"
 5210             "TWI     $dst\n\t"
 5211             "ISYNC" %}
 5212   size(12);
 5213   ins_encode( enc_lwz_ac(dst, mem) );
 5214   ins_pipe(pipe_class_memory);
 5215 %}
 5216 
 5217 // Load Compressed Pointer and decode it if narrow_oop_shift == 0.
 5218 instruct loadN2P_unscaled(iRegPdst dst, memory mem) %{
 5219   match(Set dst (DecodeN (LoadN mem)));
 5220   predicate(_kids[0]->_leaf->as_Load()->is_unordered() && CompressedOops::shift() == 0 && _kids[0]->_leaf->as_Load()->barrier_data() == 0);
 5221   ins_cost(MEMORY_REF_COST);
 5222 
 5223   format %{ "LWZ     $dst, $mem \t// DecodeN (unscaled)" %}
 5224   size(4);
 5225   ins_encode( enc_lwz(dst, mem) );
 5226   ins_pipe(pipe_class_memory);
 5227 %}
 5228 
 5229 instruct loadN2P_klass_unscaled(iRegPdst dst, memory mem) %{
 5230   match(Set dst (DecodeNKlass (LoadNKlass mem)));
 5231   predicate(CompressedKlassPointers::base() == nullptr && CompressedKlassPointers::shift() == 0 &&
 5232             _kids[0]->_leaf->as_Load()->is_unordered());
 5233   ins_cost(MEMORY_REF_COST);
 5234 
 5235   format %{ "LWZ     $dst, $mem \t// DecodeN (unscaled)" %}
 5236   size(4);
 5237   ins_encode( enc_lwz(dst, mem) );
 5238   ins_pipe(pipe_class_memory);
 5239 %}
 5240 
 5241 // Load Pointer
 5242 instruct loadP(iRegPdst dst, memoryAlg4 mem) %{
 5243   match(Set dst (LoadP mem));
 5244   predicate((n->as_Load()->is_unordered() || followed_by_acquire(n)) && n->as_Load()->barrier_data() == 0);
 5245   ins_cost(MEMORY_REF_COST);
 5246 
 5247   format %{ "LD      $dst, $mem \t// ptr" %}
 5248   size(4);
 5249   ins_encode( enc_ld(dst, mem) );
 5250   ins_pipe(pipe_class_memory);
 5251 %}
 5252 
 5253 // Load Pointer acquire.
 5254 instruct loadP_ac(iRegPdst dst, memoryAlg4 mem) %{
 5255   match(Set dst (LoadP mem));
 5256   ins_cost(3*MEMORY_REF_COST);
 5257 
 5258   predicate(n->as_Load()->barrier_data() == 0);
 5259 
 5260   format %{ "LD      $dst, $mem \t// ptr acquire\n\t"
 5261             "TWI     $dst\n\t"
 5262             "ISYNC" %}
 5263   size(12);
 5264   ins_encode( enc_ld_ac(dst, mem) );
 5265   ins_pipe(pipe_class_memory);
 5266 %}
 5267 
 5268 // LoadP + CastP2L
 5269 instruct loadP2X(iRegLdst dst, memoryAlg4 mem) %{
 5270   match(Set dst (CastP2X (LoadP mem)));
 5271   predicate(_kids[0]->_leaf->as_Load()->is_unordered() && _kids[0]->_leaf->as_Load()->barrier_data() == 0);
 5272   ins_cost(MEMORY_REF_COST);
 5273 
 5274   format %{ "LD      $dst, $mem \t// ptr + p2x" %}
 5275   size(4);
 5276   ins_encode( enc_ld(dst, mem) );
 5277   ins_pipe(pipe_class_memory);
 5278 %}
 5279 
 5280 // Load compressed klass pointer.
 5281 instruct loadNKlass(iRegNdst dst, memory mem) %{
 5282   match(Set dst (LoadNKlass mem));
 5283   predicate(!UseCompactObjectHeaders);
 5284   ins_cost(MEMORY_REF_COST);
 5285 
 5286   format %{ "LWZ     $dst, $mem \t// compressed klass ptr" %}
 5287   size(4);
 5288   ins_encode( enc_lwz(dst, mem) );
 5289   ins_pipe(pipe_class_memory);
 5290 %}
 5291 
 5292 instruct loadNKlassCompactHeaders(iRegNdst dst, memory mem) %{
 5293   match(Set dst (LoadNKlass mem));
 5294   predicate(UseCompactObjectHeaders);
 5295   ins_cost(MEMORY_REF_COST);
 5296 
 5297   format %{ "load_narrow_klass_compact $dst, $mem \t// compressed class ptr" %}
 5298   size(8);
 5299   ins_encode %{
 5300     assert($mem$$index$$Register == R0, "must not have indexed address: %s[%s]", $mem$$base$$Register.name(), $mem$$index$$Register.name());
 5301     __ load_narrow_klass_compact_c2($dst$$Register, $mem$$base$$Register, $mem$$disp);
 5302   %}
 5303   ins_pipe(pipe_class_memory);
 5304 %}
 5305 
 5306 // Load Klass Pointer
 5307 instruct loadKlass(iRegPdst dst, memoryAlg4 mem) %{
 5308   match(Set dst (LoadKlass mem));
 5309   ins_cost(MEMORY_REF_COST);
 5310 
 5311   format %{ "LD      $dst, $mem \t// klass ptr" %}
 5312   size(4);
 5313   ins_encode( enc_ld(dst, mem) );
 5314   ins_pipe(pipe_class_memory);
 5315 %}
 5316 
 5317 // Load Float
 5318 instruct loadF(regF dst, memory mem) %{
 5319   match(Set dst (LoadF mem));
 5320   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5321   ins_cost(MEMORY_REF_COST);
 5322 
 5323   format %{ "LFS     $dst, $mem" %}
 5324   size(4);
 5325   ins_encode %{
 5326     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5327     __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 5328   %}
 5329   ins_pipe(pipe_class_memory);
 5330 %}
 5331 
 5332 // Load Float acquire.
 5333 instruct loadF_ac(regF dst, memory mem, flagsRegCR0 cr0) %{
 5334   match(Set dst (LoadF mem));
 5335   effect(TEMP cr0);
 5336   ins_cost(3*MEMORY_REF_COST);
 5337 
 5338   format %{ "LFS     $dst, $mem \t// acquire\n\t"
 5339             "FCMPU   cr0, $dst, $dst\n\t"
 5340             "BNE     cr0, next\n"
 5341             "next:\n\t"
 5342             "ISYNC" %}
 5343   size(16);
 5344   ins_encode %{
 5345     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5346     Label next;
 5347     __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 5348     __ fcmpu(CR0, $dst$$FloatRegister, $dst$$FloatRegister);
 5349     __ bne(CR0, next);
 5350     __ bind(next);
 5351     __ isync();
 5352   %}
 5353   ins_pipe(pipe_class_memory);
 5354 %}
 5355 
 5356 // Load Double - aligned
 5357 instruct loadD(regD dst, memory mem) %{
 5358   match(Set dst (LoadD mem));
 5359   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5360   ins_cost(MEMORY_REF_COST);
 5361 
 5362   format %{ "LFD     $dst, $mem" %}
 5363   size(4);
 5364   ins_encode( enc_lfd(dst, mem) );
 5365   ins_pipe(pipe_class_memory);
 5366 %}
 5367 
 5368 // Load Double - aligned acquire.
 5369 instruct loadD_ac(regD dst, memory mem, flagsRegCR0 cr0) %{
 5370   match(Set dst (LoadD mem));
 5371   effect(TEMP cr0);
 5372   ins_cost(3*MEMORY_REF_COST);
 5373 
 5374   format %{ "LFD     $dst, $mem \t// acquire\n\t"
 5375             "FCMPU   cr0, $dst, $dst\n\t"
 5376             "BNE     cr0, next\n"
 5377             "next:\n\t"
 5378             "ISYNC" %}
 5379   size(16);
 5380   ins_encode %{
 5381     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5382     Label next;
 5383     __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 5384     __ fcmpu(CR0, $dst$$FloatRegister, $dst$$FloatRegister);
 5385     __ bne(CR0, next);
 5386     __ bind(next);
 5387     __ isync();
 5388   %}
 5389   ins_pipe(pipe_class_memory);
 5390 %}
 5391 
 5392 // Load Double - UNaligned
 5393 instruct loadD_unaligned(regD dst, memory mem) %{
 5394   match(Set dst (LoadD_unaligned mem));
 5395   // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense).
 5396   ins_cost(MEMORY_REF_COST);
 5397 
 5398   format %{ "LFD     $dst, $mem" %}
 5399   size(4);
 5400   ins_encode( enc_lfd(dst, mem) );
 5401   ins_pipe(pipe_class_memory);
 5402 %}
 5403 
 5404 //----------Constants--------------------------------------------------------
 5405 
 5406 // Load MachConstantTableBase: add hi offset to global toc.
 5407 // TODO: Handle hidden register r29 in bundler!
 5408 instruct loadToc_hi(iRegLdst dst) %{
 5409   effect(DEF dst);
 5410   ins_cost(DEFAULT_COST);
 5411 
 5412   format %{ "ADDIS   $dst, R29, DISP.hi \t// load TOC hi" %}
 5413   size(4);
 5414   ins_encode %{
 5415     __ calculate_address_from_global_toc_hi16only($dst$$Register, __ method_toc());
 5416   %}
 5417   ins_pipe(pipe_class_default);
 5418 %}
 5419 
 5420 // Load MachConstantTableBase: add lo offset to global toc.
 5421 instruct loadToc_lo(iRegLdst dst, iRegLdst src) %{
 5422   effect(DEF dst, USE src);
 5423   ins_cost(DEFAULT_COST);
 5424 
 5425   format %{ "ADDI    $dst, $src, DISP.lo \t// load TOC lo" %}
 5426   size(4);
 5427   ins_encode %{
 5428     __ calculate_address_from_global_toc_lo16only($dst$$Register, __ method_toc());
 5429   %}
 5430   ins_pipe(pipe_class_default);
 5431 %}
 5432 
 5433 // Load 16-bit integer constant 0xssss????
 5434 instruct loadConI16(iRegIdst dst, immI16 src) %{
 5435   match(Set dst src);
 5436 
 5437   format %{ "LI      $dst, $src" %}
 5438   size(4);
 5439   ins_encode %{
 5440     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
 5441   %}
 5442   ins_pipe(pipe_class_default);
 5443 %}
 5444 
 5445 // Load integer constant 0x????0000
 5446 instruct loadConIhi16(iRegIdst dst, immIhi16 src) %{
 5447   match(Set dst src);
 5448   ins_cost(DEFAULT_COST);
 5449 
 5450   format %{ "LIS     $dst, $src.hi" %}
 5451   size(4);
 5452   ins_encode %{
 5453     // Lis sign extends 16-bit src then shifts it 16 bit to the left.
 5454     __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16)));
 5455   %}
 5456   ins_pipe(pipe_class_default);
 5457 %}
 5458 
 5459 // Part 2 of loading 32 bit constant: hi16 is is src1 (properly shifted
 5460 // and sign extended), this adds the low 16 bits.
 5461 instruct loadConI32_lo16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
 5462   // no match-rule, false predicate
 5463   effect(DEF dst, USE src1, USE src2);
 5464   predicate(false);
 5465 
 5466   format %{ "ORI     $dst, $src1.hi, $src2.lo" %}
 5467   size(4);
 5468   ins_encode %{
 5469     __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
 5470   %}
 5471   ins_pipe(pipe_class_default);
 5472 %}
 5473 
 5474 instruct loadConI32(iRegIdst dst, immI32 src) %{
 5475   match(Set dst src);
 5476   // This macro is valid only in Power 10 and up, but adding the following predicate here
 5477   // caused a build error, so we comment it out for now.
 5478   // predicate(PowerArchitecturePPC64 >= 10);
 5479   ins_cost(DEFAULT_COST+1);
 5480 
 5481   format %{ "PLI     $dst, $src" %}
 5482   size(8);
 5483   ins_encode %{
 5484     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 5485     __ pli($dst$$Register, $src$$constant);
 5486   %}
 5487   ins_pipe(pipe_class_default);
 5488   ins_alignment(2);
 5489 %}
 5490 
 5491 instruct loadConI_Ex(iRegIdst dst, immI src) %{
 5492   match(Set dst src);
 5493   ins_cost(DEFAULT_COST*2);
 5494 
 5495   expand %{
 5496     // Would like to use $src$$constant.
 5497     immI16 srcLo %{ _opnds[1]->constant() %}
 5498     // srcHi can be 0000 if srcLo sign-extends to a negative number.
 5499     immIhi16 srcHi %{ _opnds[1]->constant() %}
 5500     iRegIdst tmpI;
 5501     loadConIhi16(tmpI, srcHi);
 5502     loadConI32_lo16(dst, tmpI, srcLo);
 5503   %}
 5504 %}
 5505 
 5506 // No constant pool entries required.
 5507 instruct loadConL16(iRegLdst dst, immL16 src) %{
 5508   match(Set dst src);
 5509 
 5510   format %{ "LI      $dst, $src \t// long" %}
 5511   size(4);
 5512   ins_encode %{
 5513     __ li($dst$$Register, (int)((short) ($src$$constant & 0xFFFF)));
 5514   %}
 5515   ins_pipe(pipe_class_default);
 5516 %}
 5517 
 5518 // Load long constant 0xssssssss????0000
 5519 instruct loadConL32hi16(iRegLdst dst, immL32hi16 src) %{
 5520   match(Set dst src);
 5521   ins_cost(DEFAULT_COST);
 5522 
 5523   format %{ "LIS     $dst, $src.hi \t// long" %}
 5524   size(4);
 5525   ins_encode %{
 5526     __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16)));
 5527   %}
 5528   ins_pipe(pipe_class_default);
 5529 %}
 5530 
 5531 // To load a 32 bit constant: merge lower 16 bits into already loaded
 5532 // high 16 bits.
 5533 instruct loadConL32_lo16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
 5534   // no match-rule, false predicate
 5535   effect(DEF dst, USE src1, USE src2);
 5536   predicate(false);
 5537 
 5538   format %{ "ORI     $dst, $src1, $src2.lo" %}
 5539   size(4);
 5540   ins_encode %{
 5541     __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
 5542   %}
 5543   ins_pipe(pipe_class_default);
 5544 %}
 5545 
 5546 // Load 32-bit long constant
 5547 instruct loadConL32_Ex(iRegLdst dst, immL32 src) %{
 5548   match(Set dst src);
 5549   ins_cost(DEFAULT_COST*2);
 5550 
 5551   expand %{
 5552     // Would like to use $src$$constant.
 5553     immL16     srcLo %{ _opnds[1]->constant() /*& 0x0000FFFFL */%}
 5554     // srcHi can be 0000 if srcLo sign-extends to a negative number.
 5555     immL32hi16 srcHi %{ _opnds[1]->constant() /*& 0xFFFF0000L */%}
 5556     iRegLdst tmpL;
 5557     loadConL32hi16(tmpL, srcHi);
 5558     loadConL32_lo16(dst, tmpL, srcLo);
 5559   %}
 5560 %}
 5561 
 5562 // Load 34-bit long constant using prefixed addi. No constant pool entries required.
 5563 instruct loadConL34(iRegLdst dst, immL34 src) %{
 5564   match(Set dst src);
 5565   // This macro is valid only in Power 10 and up, but adding the following predicate here
 5566   // caused a build error, so we comment it out for now.
 5567   // predicate(PowerArchitecturePPC64 >= 10);
 5568   ins_cost(DEFAULT_COST+1);
 5569 
 5570   format %{ "PLI     $dst, $src \t// long" %}
 5571   size(8);
 5572   ins_encode %{
 5573     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 5574     __ pli($dst$$Register, $src$$constant);
 5575   %}
 5576   ins_pipe(pipe_class_default);
 5577   ins_alignment(2);
 5578 %}
 5579 
 5580 // Load long constant 0x????000000000000.
 5581 instruct loadConLhighest16_Ex(iRegLdst dst, immLhighest16 src) %{
 5582   match(Set dst src);
 5583   ins_cost(DEFAULT_COST);
 5584 
 5585   expand %{
 5586     immL32hi16 srcHi %{ _opnds[1]->constant() >> 32 /*& 0xFFFF0000L */%}
 5587     immI shift32 %{ 32 %}
 5588     iRegLdst tmpL;
 5589     loadConL32hi16(tmpL, srcHi);
 5590     lshiftL_regL_immI(dst, tmpL, shift32);
 5591   %}
 5592 %}
 5593 
 5594 // Expand node for constant pool load: small offset.
 5595 instruct loadConL(iRegLdst dst, immL src, iRegLdst toc) %{
 5596   effect(DEF dst, USE src, USE toc);
 5597   ins_cost(MEMORY_REF_COST);
 5598 
 5599   ins_num_consts(1);
 5600   // Needed so that CallDynamicJavaDirect can compute the address of this
 5601   // instruction for relocation.
 5602   ins_field_cbuf_insts_offset(int);
 5603 
 5604   format %{ "LD      $dst, offset, $toc \t// load long $src from TOC" %}
 5605   size(4);
 5606   ins_encode( enc_load_long_constL(dst, src, toc) );
 5607   ins_pipe(pipe_class_memory);
 5608 %}
 5609 
 5610 // Expand node for constant pool load: large offset.
 5611 instruct loadConL_hi(iRegLdst dst, immL src, iRegLdst toc) %{
 5612   effect(DEF dst, USE src, USE toc);
 5613   predicate(false);
 5614 
 5615   ins_num_consts(1);
 5616   ins_field_const_toc_offset(int);
 5617   // Needed so that CallDynamicJavaDirect can compute the address of this
 5618   // instruction for relocation.
 5619   ins_field_cbuf_insts_offset(int);
 5620 
 5621   format %{ "ADDIS   $dst, $toc, offset \t// load long $src from TOC (hi)" %}
 5622   size(4);
 5623   ins_encode( enc_load_long_constL_hi(dst, toc, src) );
 5624   ins_pipe(pipe_class_default);
 5625 %}
 5626 
 5627 // Expand node for constant pool load: large offset.
 5628 // No constant pool entries required.
 5629 instruct loadConL_lo(iRegLdst dst, immL src, iRegLdst base) %{
 5630   effect(DEF dst, USE src, USE base);
 5631   predicate(false);
 5632 
 5633   ins_field_const_toc_offset_hi_node(loadConL_hiNode*);
 5634 
 5635   format %{ "LD      $dst, offset, $base \t// load long $src from TOC (lo)" %}
 5636   size(4);
 5637   ins_encode %{
 5638     int offset = ra_->C->output()->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset;
 5639     __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register);
 5640   %}
 5641   ins_pipe(pipe_class_memory);
 5642 %}
 5643 
 5644 // Load long constant from constant table. Expand in case of
 5645 // offset > 16 bit is needed.
 5646 // Adlc adds toc node MachConstantTableBase.
 5647 instruct loadConL_Ex(iRegLdst dst, immL src) %{
 5648   match(Set dst src);
 5649   ins_cost(MEMORY_REF_COST);
 5650 
 5651   format %{ "LD      $dst, offset, $constanttablebase\t// load long $src from table, postalloc expanded" %}
 5652   // We can not inline the enc_class for the expand as that does not support constanttablebase.
 5653   postalloc_expand( postalloc_expand_load_long_constant(dst, src, constanttablebase) );
 5654 %}
 5655 
 5656 // Load nullptr as compressed oop.
 5657 instruct loadConN0(iRegNdst dst, immN_0 src) %{
 5658   match(Set dst src);
 5659   ins_cost(DEFAULT_COST);
 5660 
 5661   format %{ "LI      $dst, $src \t// compressed ptr" %}
 5662   size(4);
 5663   ins_encode %{
 5664     __ li($dst$$Register, 0);
 5665   %}
 5666   ins_pipe(pipe_class_default);
 5667 %}
 5668 
 5669 // Load hi part of compressed oop constant.
 5670 instruct loadConN_hi(iRegNdst dst, immN src) %{
 5671   effect(DEF dst, USE src);
 5672   ins_cost(DEFAULT_COST);
 5673 
 5674   format %{ "LIS     $dst, $src \t// narrow oop hi" %}
 5675   size(4);
 5676   ins_encode %{
 5677     __ lis($dst$$Register, 0); // Will get patched.
 5678   %}
 5679   ins_pipe(pipe_class_default);
 5680 %}
 5681 
 5682 // Add lo part of compressed oop constant to already loaded hi part.
 5683 instruct loadConN_lo(iRegNdst dst, iRegNsrc src1, immN src2) %{
 5684   effect(DEF dst, USE src1, USE src2);
 5685   ins_cost(DEFAULT_COST);
 5686 
 5687   format %{ "ORI     $dst, $src1, $src2 \t// narrow oop lo" %}
 5688   size(4);
 5689   ins_encode %{
 5690     AddressLiteral addrlit = __ constant_oop_address((jobject)$src2$$constant);
 5691     __ relocate(addrlit.rspec(), /*compressed format*/ 1);
 5692     __ ori($dst$$Register, $src1$$Register, 0); // Will get patched.
 5693   %}
 5694   ins_pipe(pipe_class_default);
 5695 %}
 5696 
 5697 instruct rldicl(iRegLdst dst, iRegLsrc src, immI16 shift, immI16 mask_begin) %{
 5698   effect(DEF dst, USE src, USE shift, USE mask_begin);
 5699 
 5700   size(4);
 5701   ins_encode %{
 5702     __ rldicl($dst$$Register, $src$$Register, $shift$$constant, $mask_begin$$constant);
 5703   %}
 5704   ins_pipe(pipe_class_default);
 5705 %}
 5706 
 5707 // Needed to postalloc expand loadConN: ConN is loaded as ConI
 5708 // leaving the upper 32 bits with sign-extension bits.
 5709 // This clears these bits: dst = src & 0xFFFFFFFF.
 5710 // TODO: Eventually call this maskN_regN_FFFFFFFF.
 5711 instruct clearMs32b(iRegNdst dst, iRegNsrc src) %{
 5712   effect(DEF dst, USE src);
 5713   predicate(false);
 5714 
 5715   format %{ "MASK    $dst, $src, 0xFFFFFFFF" %} // mask
 5716   size(4);
 5717   ins_encode %{
 5718     __ clrldi($dst$$Register, $src$$Register, 0x20);
 5719   %}
 5720   ins_pipe(pipe_class_default);
 5721 %}
 5722 
 5723 // Optimize DecodeN for disjoint base.
 5724 // Load base of compressed oops into a register
 5725 instruct loadBase(iRegLdst dst) %{
 5726   effect(DEF dst);
 5727 
 5728   format %{ "LoadConst $dst, heapbase" %}
 5729   ins_encode %{
 5730     __ load_const_optimized($dst$$Register, CompressedOops::base(), R0);
 5731   %}
 5732   ins_pipe(pipe_class_default);
 5733 %}
 5734 
 5735 // Loading ConN must be postalloc expanded so that edges between
 5736 // the nodes are safe. They may not interfere with a safepoint.
 5737 // GL TODO: This needs three instructions: better put this into the constant pool.
 5738 instruct loadConN_Ex(iRegNdst dst, immN src) %{
 5739   match(Set dst src);
 5740   ins_cost(DEFAULT_COST*2);
 5741 
 5742   format %{ "LoadN   $dst, $src \t// postalloc expanded" %} // mask
 5743   postalloc_expand %{
 5744     MachNode *m1 = new loadConN_hiNode();
 5745     MachNode *m2 = new loadConN_loNode();
 5746     MachNode *m3 = new clearMs32bNode();
 5747     m1->_bottom_type = bottom_type();
 5748     m2->_bottom_type = bottom_type();
 5749     m3->_bottom_type = bottom_type();
 5750     m1->add_req(nullptr);
 5751     m2->add_req(nullptr, m1);
 5752     m3->add_req(nullptr, m2);
 5753     m1->_opnds[0] = op_dst;
 5754     m1->_opnds[1] = op_src;
 5755     m2->_opnds[0] = op_dst;
 5756     m2->_opnds[1] = op_dst;
 5757     m2->_opnds[2] = op_src;
 5758     m3->_opnds[0] = op_dst;
 5759     m3->_opnds[1] = op_dst;
 5760     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 5761     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 5762     ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 5763     nodes->push(m1);
 5764     nodes->push(m2);
 5765     nodes->push(m3);
 5766   %}
 5767 %}
 5768 
 5769 // We have seen a safepoint between the hi and lo parts, and this node was handled
 5770 // as an oop. Therefore this needs a match rule so that build_oop_map knows this is
 5771 // not a narrow oop.
 5772 instruct loadConNKlass_hi(iRegNdst dst, immNKlass_NM src) %{
 5773   match(Set dst src);
 5774   effect(DEF dst, USE src);
 5775   ins_cost(DEFAULT_COST);
 5776 
 5777   format %{ "LIS     $dst, $src \t// narrow klass hi" %}
 5778   size(4);
 5779   ins_encode %{
 5780     intptr_t Csrc = CompressedKlassPointers::encode((Klass *)$src$$constant);
 5781     __ lis($dst$$Register, (int)(short)((Csrc >> 16) & 0xffff));
 5782   %}
 5783   ins_pipe(pipe_class_default);
 5784 %}
 5785 
 5786 // As loadConNKlass_hi this must be recognized as narrow klass, not oop!
 5787 instruct loadConNKlass_mask(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{
 5788   match(Set dst src1);
 5789   effect(TEMP src2);
 5790   ins_cost(DEFAULT_COST);
 5791 
 5792   format %{ "MASK    $dst, $src2, 0xFFFFFFFF" %} // mask
 5793   size(4);
 5794   ins_encode %{
 5795     __ clrldi($dst$$Register, $src2$$Register, 0x20);
 5796   %}
 5797   ins_pipe(pipe_class_default);
 5798 %}
 5799 
 5800 // This needs a match rule so that build_oop_map knows this is
 5801 // not a narrow oop.
 5802 instruct loadConNKlass_lo(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{
 5803   match(Set dst src1);
 5804   effect(TEMP src2);
 5805   ins_cost(DEFAULT_COST);
 5806 
 5807   format %{ "ORI     $dst, $src1, $src2 \t// narrow klass lo" %}
 5808   size(4);
 5809   ins_encode %{
 5810     // Notify OOP recorder (don't need the relocation)
 5811     AddressLiteral md = __ constant_metadata_address((Klass*)$src1$$constant);
 5812     intptr_t Csrc = CompressedKlassPointers::encode((Klass*)md.value());
 5813     __ ori($dst$$Register, $src2$$Register, Csrc & 0xffff);
 5814   %}
 5815   ins_pipe(pipe_class_default);
 5816 %}
 5817 
 5818 // Loading ConNKlass must be postalloc expanded so that edges between
 5819 // the nodes are safe. They may not interfere with a safepoint.
 5820 instruct loadConNKlass_Ex(iRegNdst dst, immNKlass src) %{
 5821   match(Set dst src);
 5822   ins_cost(DEFAULT_COST*2);
 5823 
 5824   format %{ "LoadN   $dst, $src \t// postalloc expanded" %} // mask
 5825   postalloc_expand %{
 5826     // Load high bits into register. Sign extended.
 5827     MachNode *m1 = new loadConNKlass_hiNode();
 5828     m1->_bottom_type = bottom_type();
 5829     m1->add_req(nullptr);
 5830     m1->_opnds[0] = op_dst;
 5831     m1->_opnds[1] = op_src;
 5832     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 5833     nodes->push(m1);
 5834 
 5835     MachNode *m2 = m1;
 5836     if (!Assembler::is_uimm((jlong)CompressedKlassPointers::encode((Klass *)op_src->constant()), 31)) {
 5837       // Value might be 1-extended. Mask out these bits.
 5838       m2 = new loadConNKlass_maskNode();
 5839       m2->_bottom_type = bottom_type();
 5840       m2->add_req(nullptr, m1);
 5841       m2->_opnds[0] = op_dst;
 5842       m2->_opnds[1] = op_src;
 5843       m2->_opnds[2] = op_dst;
 5844       ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 5845       nodes->push(m2);
 5846     }
 5847 
 5848     MachNode *m3 = new loadConNKlass_loNode();
 5849     m3->_bottom_type = bottom_type();
 5850     m3->add_req(nullptr, m2);
 5851     m3->_opnds[0] = op_dst;
 5852     m3->_opnds[1] = op_src;
 5853     m3->_opnds[2] = op_dst;
 5854     ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 5855     nodes->push(m3);
 5856   %}
 5857 %}
 5858 
 5859 // 0x1 is used in object initialization (initial object header).
 5860 // No constant pool entries required.
 5861 instruct loadConP0or1(iRegPdst dst, immP_0or1 src) %{
 5862   match(Set dst src);
 5863 
 5864   format %{ "LI      $dst, $src \t// ptr" %}
 5865   size(4);
 5866   ins_encode %{
 5867     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
 5868   %}
 5869   ins_pipe(pipe_class_default);
 5870 %}
 5871 
 5872 // Expand node for constant pool load: small offset.
 5873 // The match rule is needed to generate the correct bottom_type(),
 5874 // however this node should never match. The use of predicate is not
 5875 // possible since ADLC forbids predicates for chain rules. The higher
 5876 // costs do not prevent matching in this case. For that reason the
 5877 // operand immP_NM with predicate(false) is used.
 5878 instruct loadConP(iRegPdst dst, immP_NM src, iRegLdst toc) %{
 5879   match(Set dst src);
 5880   effect(TEMP toc);
 5881 
 5882   ins_num_consts(1);
 5883 
 5884   format %{ "LD      $dst, offset, $toc \t// load ptr $src from TOC" %}
 5885   size(4);
 5886   ins_encode( enc_load_long_constP(dst, src, toc) );
 5887   ins_pipe(pipe_class_memory);
 5888 %}
 5889 
 5890 // Expand node for constant pool load: large offset.
 5891 instruct loadConP_hi(iRegPdst dst, immP_NM src, iRegLdst toc) %{
 5892   effect(DEF dst, USE src, USE toc);
 5893   predicate(false);
 5894 
 5895   ins_num_consts(1);
 5896   ins_field_const_toc_offset(int);
 5897 
 5898   format %{ "ADDIS   $dst, $toc, offset \t// load ptr $src from TOC (hi)" %}
 5899   size(4);
 5900   ins_encode( enc_load_long_constP_hi(dst, src, toc) );
 5901   ins_pipe(pipe_class_default);
 5902 %}
 5903 
 5904 // Expand node for constant pool load: large offset.
 5905 instruct loadConP_lo(iRegPdst dst, immP_NM src, iRegLdst base) %{
 5906   match(Set dst src);
 5907   effect(TEMP base);
 5908 
 5909   ins_field_const_toc_offset_hi_node(loadConP_hiNode*);
 5910 
 5911   format %{ "LD      $dst, offset, $base \t// load ptr $src from TOC (lo)" %}
 5912   size(4);
 5913   ins_encode %{
 5914     int offset = ra_->C->output()->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset;
 5915     __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register);
 5916   %}
 5917   ins_pipe(pipe_class_memory);
 5918 %}
 5919 
 5920 // Load pointer constant from constant table. Expand in case an
 5921 // offset > 16 bit is needed.
 5922 // Adlc adds toc node MachConstantTableBase.
 5923 instruct loadConP_Ex(iRegPdst dst, immP src) %{
 5924   match(Set dst src);
 5925   ins_cost(MEMORY_REF_COST);
 5926 
 5927   // This rule does not use "expand" because then
 5928   // the result type is not known to be an Oop.  An ADLC
 5929   // enhancement will be needed to make that work - not worth it!
 5930 
 5931   // If this instruction rematerializes, it prolongs the live range
 5932   // of the toc node, causing illegal graphs.
 5933   // assert(edge_from_to(_reg_node[reg_lo],def)) fails in verify_good_schedule().
 5934   ins_cannot_rematerialize(true);
 5935 
 5936   format %{ "LD    $dst, offset, $constanttablebase \t//  load ptr $src from table, postalloc expanded" %}
 5937   postalloc_expand( postalloc_expand_load_ptr_constant(dst, src, constanttablebase) );
 5938 %}
 5939 
 5940 // Expand node for constant pool load: small offset.
 5941 instruct loadConF(regF dst, immF src, iRegLdst toc) %{
 5942   effect(DEF dst, USE src, USE toc);
 5943   ins_cost(MEMORY_REF_COST);
 5944 
 5945   ins_num_consts(1);
 5946 
 5947   format %{ "LFS     $dst, offset, $toc \t// load float $src from TOC" %}
 5948   size(4);
 5949   ins_encode %{
 5950     address float_address = __ float_constant($src$$constant);
 5951     if (float_address == nullptr) {
 5952       ciEnv::current()->record_out_of_memory_failure();
 5953       return;
 5954     }
 5955     __ lfs($dst$$FloatRegister, __ offset_to_method_toc(float_address), $toc$$Register);
 5956   %}
 5957   ins_pipe(pipe_class_memory);
 5958 %}
 5959 
 5960 // Expand node for constant pool load: large offset.
 5961 instruct loadConFComp(regF dst, immF src, iRegLdst toc) %{
 5962   effect(DEF dst, USE src, USE toc);
 5963   ins_cost(MEMORY_REF_COST);
 5964 
 5965   ins_num_consts(1);
 5966 
 5967   format %{ "ADDIS   $toc, $toc, offset_hi\n\t"
 5968             "LFS     $dst, offset_lo, $toc \t// load float $src from TOC (hi/lo)\n\t"
 5969             "ADDIS   $toc, $toc, -offset_hi"%}
 5970   size(12);
 5971   ins_encode %{
 5972     FloatRegister Rdst    = $dst$$FloatRegister;
 5973     Register Rtoc         = $toc$$Register;
 5974     address float_address = __ float_constant($src$$constant);
 5975     if (float_address == nullptr) {
 5976       ciEnv::current()->record_out_of_memory_failure();
 5977       return;
 5978     }
 5979     int offset            = __ offset_to_method_toc(float_address);
 5980     int hi = (offset + (1<<15))>>16;
 5981     int lo = offset - hi * (1<<16);
 5982 
 5983     __ addis(Rtoc, Rtoc, hi);
 5984     __ lfs(Rdst, lo, Rtoc);
 5985     __ addis(Rtoc, Rtoc, -hi);
 5986   %}
 5987   ins_pipe(pipe_class_memory);
 5988 %}
 5989 
 5990 // Adlc adds toc node MachConstantTableBase.
 5991 instruct loadConF_Ex(regF dst, immF src) %{
 5992   match(Set dst src);
 5993   ins_cost(MEMORY_REF_COST);
 5994 
 5995   // See loadConP.
 5996   ins_cannot_rematerialize(true);
 5997 
 5998   format %{ "LFS     $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %}
 5999   postalloc_expand( postalloc_expand_load_float_constant(dst, src, constanttablebase) );
 6000 %}
 6001 
 6002 // Expand node for constant pool load: small offset.
 6003 instruct loadConD(regD dst, immD src, iRegLdst toc) %{
 6004   effect(DEF dst, USE src, USE toc);
 6005   ins_cost(MEMORY_REF_COST);
 6006 
 6007   ins_num_consts(1);
 6008 
 6009   format %{ "LFD     $dst, offset, $toc \t// load double $src from TOC" %}
 6010   size(4);
 6011   ins_encode %{
 6012     address float_address = __ double_constant($src$$constant);
 6013     if (float_address == nullptr) {
 6014       ciEnv::current()->record_out_of_memory_failure();
 6015       return;
 6016     }
 6017     int offset =  __ offset_to_method_toc(float_address);
 6018     __ lfd($dst$$FloatRegister, offset, $toc$$Register);
 6019   %}
 6020   ins_pipe(pipe_class_memory);
 6021 %}
 6022 
 6023 // Expand node for constant pool load: large offset.
 6024 instruct loadConDComp(regD dst, immD src, iRegLdst toc) %{
 6025   effect(DEF dst, USE src, USE toc);
 6026   ins_cost(MEMORY_REF_COST);
 6027 
 6028   ins_num_consts(1);
 6029 
 6030   format %{ "ADDIS   $toc, $toc, offset_hi\n\t"
 6031             "LFD     $dst, offset_lo, $toc \t// load double $src from TOC (hi/lo)\n\t"
 6032             "ADDIS   $toc, $toc, -offset_hi" %}
 6033   size(12);
 6034   ins_encode %{
 6035     FloatRegister Rdst    = $dst$$FloatRegister;
 6036     Register      Rtoc    = $toc$$Register;
 6037     address float_address = __ double_constant($src$$constant);
 6038     if (float_address == nullptr) {
 6039       ciEnv::current()->record_out_of_memory_failure();
 6040       return;
 6041     }
 6042     int offset = __ offset_to_method_toc(float_address);
 6043     int hi = (offset + (1<<15))>>16;
 6044     int lo = offset - hi * (1<<16);
 6045 
 6046     __ addis(Rtoc, Rtoc, hi);
 6047     __ lfd(Rdst, lo, Rtoc);
 6048     __ addis(Rtoc, Rtoc, -hi);
 6049   %}
 6050   ins_pipe(pipe_class_memory);
 6051 %}
 6052 
 6053 // Adlc adds toc node MachConstantTableBase.
 6054 instruct loadConD_Ex(regD dst, immD src) %{
 6055   match(Set dst src);
 6056   ins_cost(MEMORY_REF_COST);
 6057 
 6058   // See loadConP.
 6059   ins_cannot_rematerialize(true);
 6060 
 6061   format %{ "ConD    $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %}
 6062   postalloc_expand( postalloc_expand_load_double_constant(dst, src, constanttablebase) );
 6063 %}
 6064 
 6065 // Prefetch instructions.
 6066 // Must be safe to execute with invalid address (cannot fault).
 6067 
 6068 instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{
 6069   match(PrefetchAllocation (AddP mem src));
 6070   ins_cost(MEMORY_REF_COST);
 6071 
 6072   format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many" %}
 6073   size(4);
 6074   ins_encode %{
 6075     __ dcbtst($src$$Register, $mem$$base$$Register);
 6076   %}
 6077   ins_pipe(pipe_class_memory);
 6078 %}
 6079 
 6080 instruct prefetch_alloc_no_offset(indirectMemory mem) %{
 6081   match(PrefetchAllocation mem);
 6082   ins_cost(MEMORY_REF_COST);
 6083 
 6084   format %{ "PREFETCH $mem, 2 \t// Prefetch write-many" %}
 6085   size(4);
 6086   ins_encode %{
 6087     __ dcbtst($mem$$base$$Register);
 6088   %}
 6089   ins_pipe(pipe_class_memory);
 6090 %}
 6091 
 6092 //----------Store Instructions-------------------------------------------------
 6093 
 6094 // Store Byte
 6095 instruct storeB(memory mem, iRegIsrc src) %{
 6096   match(Set mem (StoreB mem src));
 6097   ins_cost(MEMORY_REF_COST);
 6098 
 6099   format %{ "STB     $src, $mem \t// byte" %}
 6100   size(4);
 6101   ins_encode %{
 6102     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 6103     __ stb($src$$Register, Idisp, $mem$$base$$Register);
 6104   %}
 6105   ins_pipe(pipe_class_memory);
 6106 %}
 6107 
 6108 // Store Char/Short
 6109 instruct storeC(memory mem, iRegIsrc src) %{
 6110   match(Set mem (StoreC mem src));
 6111   ins_cost(MEMORY_REF_COST);
 6112 
 6113   format %{ "STH     $src, $mem \t// short" %}
 6114   size(4);
 6115   ins_encode %{
 6116     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 6117     __ sth($src$$Register, Idisp, $mem$$base$$Register);
 6118   %}
 6119   ins_pipe(pipe_class_memory);
 6120 %}
 6121 
 6122 // Store Integer
 6123 instruct storeI(memory mem, iRegIsrc src) %{
 6124   match(Set mem (StoreI mem src));
 6125   ins_cost(MEMORY_REF_COST);
 6126 
 6127   format %{ "STW     $src, $mem" %}
 6128   size(4);
 6129   ins_encode( enc_stw(src, mem) );
 6130   ins_pipe(pipe_class_memory);
 6131 %}
 6132 
 6133 // ConvL2I + StoreI.
 6134 instruct storeI_convL2I(memory mem, iRegLsrc src) %{
 6135   match(Set mem (StoreI mem (ConvL2I src)));
 6136   ins_cost(MEMORY_REF_COST);
 6137 
 6138   format %{ "STW     l2i($src), $mem" %}
 6139   size(4);
 6140   ins_encode( enc_stw(src, mem) );
 6141   ins_pipe(pipe_class_memory);
 6142 %}
 6143 
 6144 // Store Long
 6145 instruct storeL(memoryAlg4 mem, iRegLsrc src) %{
 6146   match(Set mem (StoreL mem src));
 6147   ins_cost(MEMORY_REF_COST);
 6148 
 6149   format %{ "STD     $src, $mem \t// long" %}
 6150   size(4);
 6151   ins_encode( enc_std(src, mem) );
 6152   ins_pipe(pipe_class_memory);
 6153 %}
 6154 
 6155 // Store super word nodes.
 6156 
 6157 // Store Aligned Packed Byte long register to memory
 6158 instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{
 6159   predicate(n->as_StoreVector()->memory_size() == 8);
 6160   match(Set mem (StoreVector mem src));
 6161   ins_cost(MEMORY_REF_COST);
 6162 
 6163   format %{ "STD     $mem, $src \t// packed8B" %}
 6164   size(4);
 6165   ins_encode( enc_std(src, mem) );
 6166   ins_pipe(pipe_class_memory);
 6167 %}
 6168 
 6169 
 6170 instruct storeV16(memoryAlg16 mem, vecX src) %{
 6171   predicate(n->as_StoreVector()->memory_size() == 16);
 6172   match(Set mem (StoreVector mem src));
 6173   ins_cost(MEMORY_REF_COST);
 6174 
 6175   format %{ "STXV     $mem, $src \t// store 16-byte Vector" %}
 6176   size(4);
 6177   ins_encode %{
 6178     __ stxv($src$$VectorRegister.to_vsr(), $mem$$disp, $mem$$Register);
 6179   %}
 6180   ins_pipe(pipe_class_default);
 6181 %}
 6182 
 6183 // Reinterpret: only one vector size used: either L or X
 6184 instruct reinterpretL(iRegLdst dst) %{
 6185   match(Set dst (VectorReinterpret dst));
 6186   ins_cost(0);
 6187   format %{ "reinterpret $dst" %}
 6188   size(0);
 6189   ins_encode( /*empty*/ );
 6190   ins_pipe(pipe_class_empty);
 6191 %}
 6192 
 6193 instruct reinterpretX(vecX dst) %{
 6194   match(Set dst (VectorReinterpret dst));
 6195   ins_cost(0);
 6196   format %{ "reinterpret $dst" %}
 6197   size(0);
 6198   ins_encode( /*empty*/ );
 6199   ins_pipe(pipe_class_empty);
 6200 %}
 6201 
 6202 // Store Compressed Oop
 6203 instruct storeN(memory dst, iRegN_P2N src) %{
 6204   match(Set dst (StoreN dst src));
 6205   predicate(n->as_Store()->barrier_data() == 0);
 6206   ins_cost(MEMORY_REF_COST);
 6207 
 6208   format %{ "STW     $src, $dst \t// compressed oop" %}
 6209   size(4);
 6210   ins_encode( enc_stw(src, dst) );
 6211   ins_pipe(pipe_class_memory);
 6212 %}
 6213 
 6214 // Store Compressed KLass
 6215 instruct storeNKlass(memory dst, iRegN_P2N src) %{
 6216   match(Set dst (StoreNKlass dst src));
 6217   ins_cost(MEMORY_REF_COST);
 6218 
 6219   format %{ "STW     $src, $dst \t// compressed klass" %}
 6220   size(4);
 6221   ins_encode( enc_stw(src, dst) );
 6222   ins_pipe(pipe_class_memory);
 6223 %}
 6224 
 6225 // Store Pointer
 6226 instruct storeP(memoryAlg4 dst, iRegPsrc src) %{
 6227   match(Set dst (StoreP dst src));
 6228   predicate(n->as_Store()->barrier_data() == 0);
 6229   ins_cost(MEMORY_REF_COST);
 6230 
 6231   format %{ "STD     $src, $dst \t// ptr" %}
 6232   size(4);
 6233   ins_encode( enc_std(src, dst) );
 6234   ins_pipe(pipe_class_memory);
 6235 %}
 6236 
 6237 // Store Float
 6238 instruct storeF(memory mem, regF src) %{
 6239   match(Set mem (StoreF mem src));
 6240   ins_cost(MEMORY_REF_COST);
 6241 
 6242   format %{ "STFS    $src, $mem" %}
 6243   size(4);
 6244   ins_encode( enc_stfs(src, mem) );
 6245   ins_pipe(pipe_class_memory);
 6246 %}
 6247 
 6248 // Store Double
 6249 instruct storeD(memory mem, regD src) %{
 6250   match(Set mem (StoreD mem src));
 6251   ins_cost(MEMORY_REF_COST);
 6252 
 6253   format %{ "STFD    $src, $mem" %}
 6254   size(4);
 6255   ins_encode( enc_stfd(src, mem) );
 6256   ins_pipe(pipe_class_memory);
 6257 %}
 6258 
 6259 // Convert oop pointer into compressed form.
 6260 
 6261 // Nodes for postalloc expand.
 6262 
 6263 // Shift node for expand.
 6264 instruct encodeP_shift(iRegNdst dst, iRegNsrc src) %{
 6265   // The match rule is needed to make it a 'MachTypeNode'!
 6266   match(Set dst (EncodeP src));
 6267   predicate(false);
 6268 
 6269   format %{ "SRDI    $dst, $src, 3 \t// encode" %}
 6270   size(4);
 6271   ins_encode %{
 6272     __ srdi($dst$$Register, $src$$Register, CompressedOops::shift() & 0x3f);
 6273   %}
 6274   ins_pipe(pipe_class_default);
 6275 %}
 6276 
 6277 // Add node for expand.
 6278 instruct encodeP_sub(iRegPdst dst, iRegPdst src) %{
 6279   // The match rule is needed to make it a 'MachTypeNode'!
 6280   match(Set dst (EncodeP src));
 6281   predicate(false);
 6282 
 6283   format %{ "SUB     $dst, $src, oop_base \t// encode" %}
 6284   ins_encode %{
 6285     __ sub_const_optimized($dst$$Register, $src$$Register, CompressedOops::base(), R0);
 6286   %}
 6287   ins_pipe(pipe_class_default);
 6288 %}
 6289 
 6290 // Conditional sub base.
 6291 instruct cond_sub_base(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
 6292   // The match rule is needed to make it a 'MachTypeNode'!
 6293   match(Set dst (EncodeP (Binary crx src1)));
 6294   predicate(false);
 6295 
 6296   format %{ "BEQ     $crx, done\n\t"
 6297             "SUB     $dst, $src1, heapbase \t// encode: subtract base if != nullptr\n"
 6298             "done:" %}
 6299   ins_encode %{
 6300     Label done;
 6301     __ beq($crx$$CondRegister, done);
 6302     __ sub_const_optimized($dst$$Register, $src1$$Register, CompressedOops::base(), R0);
 6303     __ bind(done);
 6304   %}
 6305   ins_pipe(pipe_class_default);
 6306 %}
 6307 
 6308 instruct cond_set_0_oop(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
 6309   // The match rule is needed to make it a 'MachTypeNode'!
 6310   match(Set dst (EncodeP (Binary crx src1)));
 6311   predicate(false);
 6312 
 6313   format %{ "CMOVE   $dst, $crx eq, 0, $src1 \t// encode: preserve 0" %}
 6314   size(4);
 6315   ins_encode %{
 6316     __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
 6317   %}
 6318   ins_pipe(pipe_class_default);
 6319 %}
 6320 
 6321 // Disjoint narrow oop base.
 6322 instruct encodeP_Disjoint(iRegNdst dst, iRegPsrc src) %{
 6323   match(Set dst (EncodeP src));
 6324   predicate(CompressedOops::base_disjoint());
 6325 
 6326   format %{ "EXTRDI  $dst, $src, #32, #3 \t// encode with disjoint base" %}
 6327   size(4);
 6328   ins_encode %{
 6329     __ rldicl($dst$$Register, $src$$Register, 64-CompressedOops::shift(), 32);
 6330   %}
 6331   ins_pipe(pipe_class_default);
 6332 %}
 6333 
 6334 // shift != 0, base != 0
 6335 instruct encodeP_Ex(iRegNdst dst, flagsReg crx, iRegPsrc src) %{
 6336   match(Set dst (EncodeP src));
 6337   effect(TEMP crx);
 6338   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull &&
 6339             CompressedOops::shift() != 0 &&
 6340             CompressedOops::base_overlaps());
 6341 
 6342   format %{ "EncodeP $dst, $crx, $src \t// postalloc expanded" %}
 6343   postalloc_expand( postalloc_expand_encode_oop(dst, src, crx));
 6344 %}
 6345 
 6346 // shift != 0, base != 0
 6347 instruct encodeP_not_null_Ex(iRegNdst dst, iRegPsrc src) %{
 6348   match(Set dst (EncodeP src));
 6349   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull &&
 6350             CompressedOops::shift() != 0 &&
 6351             CompressedOops::base_overlaps());
 6352 
 6353   format %{ "EncodeP $dst, $src\t// $src != Null, postalloc expanded" %}
 6354   postalloc_expand( postalloc_expand_encode_oop_not_null(dst, src) );
 6355 %}
 6356 
 6357 // shift != 0, base == 0
 6358 // TODO: This is the same as encodeP_shift. Merge!
 6359 instruct encodeP_not_null_base_null(iRegNdst dst, iRegPsrc src) %{
 6360   match(Set dst (EncodeP src));
 6361   predicate(CompressedOops::shift() != 0 &&
 6362             CompressedOops::base() == nullptr);
 6363 
 6364   format %{ "SRDI    $dst, $src, #3 \t// encodeP, $src != nullptr" %}
 6365   size(4);
 6366   ins_encode %{
 6367     __ srdi($dst$$Register, $src$$Register, CompressedOops::shift() & 0x3f);
 6368   %}
 6369   ins_pipe(pipe_class_default);
 6370 %}
 6371 
 6372 // Compressed OOPs with narrow_oop_shift == 0.
 6373 // shift == 0, base == 0
 6374 instruct encodeP_narrow_oop_shift_0(iRegNdst dst, iRegPsrc src) %{
 6375   match(Set dst (EncodeP src));
 6376   predicate(CompressedOops::shift() == 0);
 6377 
 6378   format %{ "MR      $dst, $src \t// Ptr->Narrow" %}
 6379   // variable size, 0 or 4.
 6380   ins_encode %{
 6381     __ mr_if_needed($dst$$Register, $src$$Register);
 6382   %}
 6383   ins_pipe(pipe_class_default);
 6384 %}
 6385 
 6386 // Decode nodes.
 6387 
 6388 // Shift node for expand.
 6389 instruct decodeN_shift(iRegPdst dst, iRegPsrc src) %{
 6390   // The match rule is needed to make it a 'MachTypeNode'!
 6391   match(Set dst (DecodeN src));
 6392   predicate(false);
 6393 
 6394   format %{ "SLDI    $dst, $src, #3 \t// DecodeN" %}
 6395   size(4);
 6396   ins_encode %{
 6397     __ sldi($dst$$Register, $src$$Register, CompressedOops::shift());
 6398   %}
 6399   ins_pipe(pipe_class_default);
 6400 %}
 6401 
 6402 // Add node for expand.
 6403 instruct decodeN_add(iRegPdst dst, iRegPdst src) %{
 6404   // The match rule is needed to make it a 'MachTypeNode'!
 6405   match(Set dst (DecodeN src));
 6406   predicate(false);
 6407 
 6408   format %{ "ADD     $dst, $src, heapbase \t// DecodeN, add oop base" %}
 6409   ins_encode %{
 6410     __ add_const_optimized($dst$$Register, $src$$Register, CompressedOops::base(), R0);
 6411   %}
 6412   ins_pipe(pipe_class_default);
 6413 %}
 6414 
 6415 // conditianal add base for expand
 6416 instruct cond_add_base(iRegPdst dst, flagsRegSrc crx, iRegPsrc src) %{
 6417   // The match rule is needed to make it a 'MachTypeNode'!
 6418   // NOTICE that the rule is nonsense - we just have to make sure that:
 6419   //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
 6420   //  - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
 6421   match(Set dst (DecodeN (Binary crx src)));
 6422   predicate(false);
 6423 
 6424   format %{ "BEQ     $crx, done\n\t"
 6425             "ADD     $dst, $src, heapbase \t// DecodeN: add oop base if $src != nullptr\n"
 6426             "done:" %}
 6427   ins_encode %{
 6428     Label done;
 6429     __ beq($crx$$CondRegister, done);
 6430     __ add_const_optimized($dst$$Register, $src$$Register, CompressedOops::base(), R0);
 6431     __ bind(done);
 6432   %}
 6433   ins_pipe(pipe_class_default);
 6434 %}
 6435 
 6436 instruct cond_set_0_ptr(iRegPdst dst, flagsRegSrc crx, iRegPsrc src1) %{
 6437   // The match rule is needed to make it a 'MachTypeNode'!
 6438   // NOTICE that the rule is nonsense - we just have to make sure that:
 6439   //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
 6440   //  - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
 6441   match(Set dst (DecodeN (Binary crx src1)));
 6442   predicate(false);
 6443 
 6444   format %{ "CMOVE   $dst, $crx eq, 0, $src1 \t// decode: preserve 0" %}
 6445   size(4);
 6446   ins_encode %{
 6447     __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
 6448   %}
 6449   ins_pipe(pipe_class_default);
 6450 %}
 6451 
 6452 //  shift != 0, base != 0
 6453 instruct decodeN_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
 6454   match(Set dst (DecodeN src));
 6455   predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
 6456              n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) &&
 6457             CompressedOops::shift() != 0 &&
 6458             CompressedOops::base() != nullptr);
 6459   ins_cost(4 * DEFAULT_COST); // Should be more expensive than decodeN_Disjoint_isel_Ex.
 6460   effect(TEMP crx);
 6461 
 6462   format %{ "DecodeN $dst, $src \t// Kills $crx, postalloc expanded" %}
 6463   postalloc_expand( postalloc_expand_decode_oop(dst, src, crx) );
 6464 %}
 6465 
 6466 // shift != 0, base == 0
 6467 instruct decodeN_nullBase(iRegPdst dst, iRegNsrc src) %{
 6468   match(Set dst (DecodeN src));
 6469   predicate(CompressedOops::shift() != 0 &&
 6470             CompressedOops::base() == nullptr);
 6471 
 6472   format %{ "SLDI    $dst, $src, #3 \t// DecodeN (zerobased)" %}
 6473   size(4);
 6474   ins_encode %{
 6475     __ sldi($dst$$Register, $src$$Register, CompressedOops::shift());
 6476   %}
 6477   ins_pipe(pipe_class_default);
 6478 %}
 6479 
 6480 // Optimize DecodeN for disjoint base.
 6481 // Shift narrow oop and or it into register that already contains the heap base.
 6482 // Base == dst must hold, and is assured by construction in postaloc_expand.
 6483 instruct decodeN_mergeDisjoint(iRegPdst dst, iRegNsrc src, iRegLsrc base) %{
 6484   match(Set dst (DecodeN src));
 6485   effect(TEMP base);
 6486   predicate(false);
 6487 
 6488   format %{ "RLDIMI  $dst, $src, shift, 32-shift \t// DecodeN (disjoint base)" %}
 6489   size(4);
 6490   ins_encode %{
 6491     __ rldimi($dst$$Register, $src$$Register, CompressedOops::shift(), 32-CompressedOops::shift());
 6492   %}
 6493   ins_pipe(pipe_class_default);
 6494 %}
 6495 
 6496 // Optimize DecodeN for disjoint base.
 6497 // This node requires only one cycle on the critical path.
 6498 // We must postalloc_expand as we can not express use_def effects where
 6499 // the used register is L and the def'ed register P.
 6500 instruct decodeN_Disjoint_notNull_Ex(iRegPdst dst, iRegNsrc src) %{
 6501   match(Set dst (DecodeN src));
 6502   effect(TEMP_DEF dst);
 6503   predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
 6504              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
 6505             CompressedOops::base_disjoint());
 6506   ins_cost(DEFAULT_COST);
 6507 
 6508   format %{ "MOV     $dst, heapbase \t\n"
 6509             "RLDIMI  $dst, $src, shift, 32-shift \t// decode with disjoint base" %}
 6510   postalloc_expand %{
 6511     loadBaseNode *n1 = new loadBaseNode();
 6512     n1->add_req(nullptr);
 6513     n1->_opnds[0] = op_dst;
 6514 
 6515     decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode();
 6516     n2->add_req(n_region, n_src, n1);
 6517     n2->_opnds[0] = op_dst;
 6518     n2->_opnds[1] = op_src;
 6519     n2->_opnds[2] = op_dst;
 6520     n2->_bottom_type = _bottom_type;
 6521 
 6522     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 6523     ra_->set_oop(n2, true);
 6524 
 6525     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6526     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6527 
 6528     nodes->push(n1);
 6529     nodes->push(n2);
 6530   %}
 6531 %}
 6532 
 6533 instruct decodeN_Disjoint_isel_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
 6534   match(Set dst (DecodeN src));
 6535   effect(TEMP_DEF dst, TEMP crx);
 6536   predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
 6537              n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) &&
 6538             CompressedOops::base_disjoint());
 6539   ins_cost(3 * DEFAULT_COST);
 6540 
 6541   format %{ "DecodeN  $dst, $src \t// decode with disjoint base using isel" %}
 6542   postalloc_expand %{
 6543     loadBaseNode *n1 = new loadBaseNode();
 6544     n1->add_req(nullptr);
 6545     n1->_opnds[0] = op_dst;
 6546 
 6547     cmpN_reg_imm0Node *n_compare  = new cmpN_reg_imm0Node();
 6548     n_compare->add_req(n_region, n_src);
 6549     n_compare->_opnds[0] = op_crx;
 6550     n_compare->_opnds[1] = op_src;
 6551     n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR);
 6552 
 6553     decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode();
 6554     n2->add_req(n_region, n_src, n1);
 6555     n2->_opnds[0] = op_dst;
 6556     n2->_opnds[1] = op_src;
 6557     n2->_opnds[2] = op_dst;
 6558     n2->_bottom_type = _bottom_type;
 6559 
 6560     cond_set_0_ptrNode *n_cond_set = new cond_set_0_ptrNode();
 6561     n_cond_set->add_req(n_region, n_compare, n2);
 6562     n_cond_set->_opnds[0] = op_dst;
 6563     n_cond_set->_opnds[1] = op_crx;
 6564     n_cond_set->_opnds[2] = op_dst;
 6565     n_cond_set->_bottom_type = _bottom_type;
 6566 
 6567     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 6568     ra_->set_oop(n_cond_set, true);
 6569 
 6570     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6571     ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 6572     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6573     ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6574 
 6575     nodes->push(n1);
 6576     nodes->push(n_compare);
 6577     nodes->push(n2);
 6578     nodes->push(n_cond_set);
 6579   %}
 6580 %}
 6581 
 6582 // src != 0, shift != 0, base != 0
 6583 instruct decodeN_notNull_addBase_Ex(iRegPdst dst, iRegNsrc src) %{
 6584   match(Set dst (DecodeN src));
 6585   predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
 6586              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
 6587             CompressedOops::shift() != 0 &&
 6588             CompressedOops::base() != nullptr);
 6589   ins_cost(2 * DEFAULT_COST);
 6590 
 6591   format %{ "DecodeN $dst, $src \t// $src != nullptr, postalloc expanded" %}
 6592   postalloc_expand( postalloc_expand_decode_oop_not_null(dst, src));
 6593 %}
 6594 
 6595 // Compressed OOPs with narrow_oop_shift == 0.
 6596 instruct decodeN_unscaled(iRegPdst dst, iRegNsrc src) %{
 6597   match(Set dst (DecodeN src));
 6598   predicate(CompressedOops::shift() == 0);
 6599   ins_cost(DEFAULT_COST);
 6600 
 6601   format %{ "MR      $dst, $src \t// DecodeN (unscaled)" %}
 6602   // variable size, 0 or 4.
 6603   ins_encode %{
 6604     __ mr_if_needed($dst$$Register, $src$$Register);
 6605   %}
 6606   ins_pipe(pipe_class_default);
 6607 %}
 6608 
 6609 // Convert compressed oop into int for vectors alignment masking.
 6610 instruct decodeN2I_unscaled(iRegIdst dst, iRegNsrc src) %{
 6611   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 6612   predicate(CompressedOops::shift() == 0);
 6613   ins_cost(DEFAULT_COST);
 6614 
 6615   format %{ "MR      $dst, $src \t// (int)DecodeN (unscaled)" %}
 6616   // variable size, 0 or 4.
 6617   ins_encode %{
 6618     __ mr_if_needed($dst$$Register, $src$$Register);
 6619   %}
 6620   ins_pipe(pipe_class_default);
 6621 %}
 6622 
 6623 // Convert klass pointer into compressed form.
 6624 
 6625 // Nodes for postalloc expand.
 6626 
 6627 // Shift node for expand.
 6628 instruct encodePKlass_shift(iRegNdst dst, iRegNsrc src) %{
 6629   // The match rule is needed to make it a 'MachTypeNode'!
 6630   match(Set dst (EncodePKlass src));
 6631   predicate(false);
 6632 
 6633   format %{ "SRDI    $dst, $src, 3 \t// encode" %}
 6634   size(4);
 6635   ins_encode %{
 6636     __ srdi($dst$$Register, $src$$Register, CompressedKlassPointers::shift());
 6637   %}
 6638   ins_pipe(pipe_class_default);
 6639 %}
 6640 
 6641 // Add node for expand.
 6642 instruct encodePKlass_sub_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{
 6643   // The match rule is needed to make it a 'MachTypeNode'!
 6644   match(Set dst (EncodePKlass (Binary base src)));
 6645   predicate(false);
 6646 
 6647   format %{ "SUB     $dst, $base, $src \t// encode" %}
 6648   size(4);
 6649   ins_encode %{
 6650     __ subf($dst$$Register, $base$$Register, $src$$Register);
 6651   %}
 6652   ins_pipe(pipe_class_default);
 6653 %}
 6654 
 6655 // Disjoint narrow oop base.
 6656 instruct encodePKlass_Disjoint(iRegNdst dst, iRegPsrc src) %{
 6657   match(Set dst (EncodePKlass src));
 6658   predicate(false /* TODO: PPC port CompressedKlassPointers::base_disjoint()*/);
 6659 
 6660   format %{ "EXTRDI  $dst, $src, #32, #3 \t// encode with disjoint base" %}
 6661   size(4);
 6662   ins_encode %{
 6663     __ rldicl($dst$$Register, $src$$Register, 64-CompressedKlassPointers::shift(), 32);
 6664   %}
 6665   ins_pipe(pipe_class_default);
 6666 %}
 6667 
 6668 // shift != 0, base != 0
 6669 instruct encodePKlass_not_null_Ex(iRegNdst dst, iRegLsrc base, iRegPsrc src) %{
 6670   match(Set dst (EncodePKlass (Binary base src)));
 6671   predicate(false);
 6672 
 6673   format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %}
 6674   postalloc_expand %{
 6675     encodePKlass_sub_baseNode *n1 = new encodePKlass_sub_baseNode();
 6676     n1->add_req(n_region, n_base, n_src);
 6677     n1->_opnds[0] = op_dst;
 6678     n1->_opnds[1] = op_base;
 6679     n1->_opnds[2] = op_src;
 6680     n1->_bottom_type = _bottom_type;
 6681 
 6682     encodePKlass_shiftNode *n2 = new encodePKlass_shiftNode();
 6683     n2->add_req(n_region, n1);
 6684     n2->_opnds[0] = op_dst;
 6685     n2->_opnds[1] = op_dst;
 6686     n2->_bottom_type = _bottom_type;
 6687     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6688     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6689 
 6690     nodes->push(n1);
 6691     nodes->push(n2);
 6692   %}
 6693 %}
 6694 
 6695 // shift != 0, base != 0
 6696 instruct encodePKlass_not_null_ExEx(iRegNdst dst, iRegPsrc src) %{
 6697   match(Set dst (EncodePKlass src));
 6698   //predicate(CompressedKlassPointers::shift() != 0 &&
 6699   //          true /* TODO: PPC port CompressedKlassPointers::base_overlaps()*/);
 6700 
 6701   //format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %}
 6702   ins_cost(DEFAULT_COST*2);  // Don't count constant.
 6703   expand %{
 6704     immL baseImm %{ (jlong)(intptr_t)CompressedKlassPointers::base() %}
 6705     iRegLdst base;
 6706     loadConL_Ex(base, baseImm);
 6707     encodePKlass_not_null_Ex(dst, base, src);
 6708   %}
 6709 %}
 6710 
 6711 // Decode nodes.
 6712 
 6713 // Shift node for expand.
 6714 instruct decodeNKlass_shift(iRegPdst dst, iRegPsrc src) %{
 6715   // The match rule is needed to make it a 'MachTypeNode'!
 6716   match(Set dst (DecodeNKlass src));
 6717   predicate(false);
 6718 
 6719   format %{ "SLDI    $dst, $src, #3 \t// DecodeNKlass" %}
 6720   size(4);
 6721   ins_encode %{
 6722     __ sldi($dst$$Register, $src$$Register, CompressedKlassPointers::shift());
 6723   %}
 6724   ins_pipe(pipe_class_default);
 6725 %}
 6726 
 6727 // Add node for expand.
 6728 
 6729 instruct decodeNKlass_add_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{
 6730   // The match rule is needed to make it a 'MachTypeNode'!
 6731   match(Set dst (DecodeNKlass (Binary base src)));
 6732   predicate(false);
 6733 
 6734   format %{ "ADD     $dst, $base, $src \t// DecodeNKlass, add klass base" %}
 6735   size(4);
 6736   ins_encode %{
 6737     __ add($dst$$Register, $base$$Register, $src$$Register);
 6738   %}
 6739   ins_pipe(pipe_class_default);
 6740 %}
 6741 
 6742 // src != 0, shift != 0, base != 0
 6743 instruct decodeNKlass_notNull_addBase_Ex(iRegPdst dst, iRegLsrc base, iRegNsrc src) %{
 6744   match(Set dst (DecodeNKlass (Binary base src)));
 6745   //effect(kill src); // We need a register for the immediate result after shifting.
 6746   predicate(false);
 6747 
 6748   format %{ "DecodeNKlass $dst =  $base + ($src << 3) \t// $src != nullptr, postalloc expanded" %}
 6749   postalloc_expand %{
 6750     decodeNKlass_add_baseNode *n1 = new decodeNKlass_add_baseNode();
 6751     n1->add_req(n_region, n_base, n_src);
 6752     n1->_opnds[0] = op_dst;
 6753     n1->_opnds[1] = op_base;
 6754     n1->_opnds[2] = op_src;
 6755     n1->_bottom_type = _bottom_type;
 6756 
 6757     decodeNKlass_shiftNode *n2 = new decodeNKlass_shiftNode();
 6758     n2->add_req(n_region, n1);
 6759     n2->_opnds[0] = op_dst;
 6760     n2->_opnds[1] = op_dst;
 6761     n2->_bottom_type = _bottom_type;
 6762 
 6763     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6764     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6765 
 6766     nodes->push(n1);
 6767     nodes->push(n2);
 6768   %}
 6769 %}
 6770 
 6771 // src != 0, shift != 0, base != 0
 6772 instruct decodeNKlass_notNull_addBase_ExEx(iRegPdst dst, iRegNsrc src) %{
 6773   match(Set dst (DecodeNKlass src));
 6774   // predicate(CompressedKlassPointers::shift() != 0 &&
 6775   //           CompressedKlassPointers::base() != 0);
 6776 
 6777   //format %{ "DecodeNKlass $dst, $src \t// $src != nullptr, expanded" %}
 6778 
 6779   ins_cost(DEFAULT_COST*2);  // Don't count constant.
 6780   expand %{
 6781     // We add first, then we shift. Like this, we can get along with one register less.
 6782     // But we have to load the base pre-shifted.
 6783     immL baseImm %{ (jlong)((intptr_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift()) %}
 6784     iRegLdst base;
 6785     loadConL_Ex(base, baseImm);
 6786     decodeNKlass_notNull_addBase_Ex(dst, base, src);
 6787   %}
 6788 %}
 6789 
 6790 //----------MemBar Instructions-----------------------------------------------
 6791 // Memory barrier flavors
 6792 
 6793 instruct membar_acquire() %{
 6794   match(LoadFence);
 6795   ins_cost(4*MEMORY_REF_COST);
 6796 
 6797   format %{ "MEMBAR-acquire" %}
 6798   size(4);
 6799   ins_encode %{
 6800     __ acquire();
 6801   %}
 6802   ins_pipe(pipe_class_default);
 6803 %}
 6804 
 6805 instruct unnecessary_membar_acquire() %{
 6806   match(MemBarAcquire);
 6807   ins_cost(0);
 6808 
 6809   format %{ " -- \t// redundant MEMBAR-acquire - empty" %}
 6810   size(0);
 6811   ins_encode( /*empty*/ );
 6812   ins_pipe(pipe_class_default);
 6813 %}
 6814 
 6815 instruct membar_acquire_lock() %{
 6816   match(MemBarAcquireLock);
 6817   ins_cost(0);
 6818 
 6819   format %{ " -- \t// redundant MEMBAR-acquire - empty (acquire as part of CAS in prior FastLock)" %}
 6820   size(0);
 6821   ins_encode( /*empty*/ );
 6822   ins_pipe(pipe_class_default);
 6823 %}
 6824 
 6825 instruct membar_release() %{
 6826   match(MemBarRelease);
 6827   match(StoreFence);
 6828   ins_cost(4*MEMORY_REF_COST);
 6829 
 6830   format %{ "MEMBAR-release" %}
 6831   size(4);
 6832   ins_encode %{
 6833     __ release();
 6834   %}
 6835   ins_pipe(pipe_class_default);
 6836 %}
 6837 
 6838 instruct membar_storestore() %{
 6839   match(MemBarStoreStore);
 6840   match(StoreStoreFence);
 6841   ins_cost(4*MEMORY_REF_COST);
 6842 
 6843   format %{ "MEMBAR-store-store" %}
 6844   size(4);
 6845   ins_encode %{
 6846     __ membar(Assembler::StoreStore);
 6847   %}
 6848   ins_pipe(pipe_class_default);
 6849 %}
 6850 
 6851 instruct membar_release_lock() %{
 6852   match(MemBarReleaseLock);
 6853   ins_cost(0);
 6854 
 6855   format %{ " -- \t// redundant MEMBAR-release - empty (release in FastUnlock)" %}
 6856   size(0);
 6857   ins_encode( /*empty*/ );
 6858   ins_pipe(pipe_class_default);
 6859 %}
 6860 
 6861 instruct membar_storeload() %{
 6862   match(MemBarStoreLoad);
 6863   ins_cost(4*MEMORY_REF_COST);
 6864 
 6865   format %{ "MEMBAR-store-load" %}
 6866   size(4);
 6867   ins_encode %{
 6868     __ fence();
 6869   %}
 6870   ins_pipe(pipe_class_default);
 6871 %}
 6872 
 6873 instruct membar_volatile() %{
 6874   match(MemBarVolatile);
 6875   ins_cost(4*MEMORY_REF_COST);
 6876 
 6877   format %{ "MEMBAR-volatile" %}
 6878   size(4);
 6879   ins_encode %{
 6880     __ fence();
 6881   %}
 6882   ins_pipe(pipe_class_default);
 6883 %}
 6884 
 6885 // This optimization is wrong on PPC. The following pattern is not supported:
 6886 //  MemBarVolatile
 6887 //   ^        ^
 6888 //   |        |
 6889 //  CtrlProj MemProj
 6890 //   ^        ^
 6891 //   |        |
 6892 //   |       Load
 6893 //   |
 6894 //  MemBarVolatile
 6895 //
 6896 //  The first MemBarVolatile could get optimized out! According to
 6897 //  Vladimir, this pattern can not occur on Oracle platforms.
 6898 //  However, it does occur on PPC64 (because of membars in
 6899 //  inline_unsafe_load_store).
 6900 //
 6901 // Add this node again if we found a good solution for inline_unsafe_load_store().
 6902 // Don't forget to look at the implementation of post_store_load_barrier again,
 6903 // we did other fixes in that method.
 6904 //instruct unnecessary_membar_volatile() %{
 6905 //  match(MemBarVolatile);
 6906 //  predicate(Matcher::post_store_load_barrier(n));
 6907 //  ins_cost(0);
 6908 //
 6909 //  format %{ " -- \t// redundant MEMBAR-volatile - empty" %}
 6910 //  size(0);
 6911 //  ins_encode( /*empty*/ );
 6912 //  ins_pipe(pipe_class_default);
 6913 //%}
 6914 
 6915 instruct membar_full() %{
 6916   match(MemBarFull);
 6917   ins_cost(4*MEMORY_REF_COST);
 6918 
 6919   format %{ "MEMBAR-full" %}
 6920   size(4);
 6921   ins_encode %{
 6922     __ fence();
 6923   %}
 6924   ins_pipe(pipe_class_default);
 6925 %}
 6926 
 6927 instruct membar_CPUOrder() %{
 6928   match(MemBarCPUOrder);
 6929   ins_cost(0);
 6930 
 6931   format %{ " -- \t// MEMBAR-CPUOrder - empty: PPC64 processors are self-consistent." %}
 6932   size(0);
 6933   ins_encode( /*empty*/ );
 6934   ins_pipe(pipe_class_default);
 6935 %}
 6936 
 6937 instruct onspinwait() %{
 6938   match(OnSpinWait);
 6939   ins_cost(DEFAULT_COST);
 6940 
 6941   format %{ "OnSpinWait (smt_prio_low ; smt_prio_medium)" %}
 6942   size(8);
 6943   ins_encode %{
 6944     __ block_comment("spin_wait {");
 6945     __ smt_prio_low();
 6946     __ smt_prio_medium();
 6947     __ block_comment("}");
 6948   %}
 6949   ins_pipe(pipe_class_default);
 6950 %}
 6951 
 6952 //----------Conditional Move---------------------------------------------------
 6953 
 6954 // Cmove using isel.
 6955 instruct cmovI_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
 6956   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
 6957   ins_cost(DEFAULT_COST);
 6958 
 6959   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 6960   size(4);
 6961   ins_encode %{
 6962     int cc        = $cmp$$cmpcode;
 6963     __ isel($dst$$Register, $crx$$CondRegister,
 6964             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 6965   %}
 6966   ins_pipe(pipe_class_default);
 6967 %}
 6968 
 6969 // Cmove using isel.
 6970 instruct cmovL_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
 6971   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
 6972   ins_cost(DEFAULT_COST);
 6973 
 6974   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 6975   size(4);
 6976   ins_encode %{
 6977     int cc        = $cmp$$cmpcode;
 6978     __ isel($dst$$Register, $crx$$CondRegister,
 6979             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 6980   %}
 6981   ins_pipe(pipe_class_default);
 6982 %}
 6983 
 6984 // Cmove using isel.
 6985 instruct cmovN_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
 6986   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
 6987   ins_cost(DEFAULT_COST);
 6988 
 6989   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 6990   size(4);
 6991   ins_encode %{
 6992     int cc        = $cmp$$cmpcode;
 6993     __ isel($dst$$Register, $crx$$CondRegister,
 6994             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 6995   %}
 6996   ins_pipe(pipe_class_default);
 6997 %}
 6998 
 6999 // Cmove using isel.
 7000 instruct cmovP_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegPsrc src) %{
 7001   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
 7002   ins_cost(DEFAULT_COST);
 7003 
 7004   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7005   size(4);
 7006   ins_encode %{
 7007     int cc        = $cmp$$cmpcode;
 7008     __ isel($dst$$Register, $crx$$CondRegister,
 7009             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7010   %}
 7011   ins_pipe(pipe_class_default);
 7012 %}
 7013 
 7014 instruct cmovF_reg(cmpOp cmp, flagsRegSrc crx, regF dst, regF src) %{
 7015   match(Set dst (CMoveF (Binary cmp crx) (Binary dst src)));
 7016   ins_cost(DEFAULT_COST+BRANCH_COST);
 7017 
 7018   ins_variable_size_depending_on_alignment(true);
 7019 
 7020   format %{ "CMOVEF  $cmp, $crx, $dst, $src\n\t" %}
 7021   size(8);
 7022   ins_encode %{
 7023     Label done;
 7024     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 7025     // Branch if not (cmp crx).
 7026     __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
 7027     __ fmr($dst$$FloatRegister, $src$$FloatRegister);
 7028     __ bind(done);
 7029   %}
 7030   ins_pipe(pipe_class_default);
 7031 %}
 7032 
 7033 instruct cmovD_reg(cmpOp cmp, flagsRegSrc crx, regD dst, regD src) %{
 7034   match(Set dst (CMoveD (Binary cmp crx) (Binary dst src)));
 7035   ins_cost(DEFAULT_COST+BRANCH_COST);
 7036 
 7037   ins_variable_size_depending_on_alignment(true);
 7038 
 7039   format %{ "CMOVEF  $cmp, $crx, $dst, $src\n\t" %}
 7040   size(8);
 7041   ins_encode %{
 7042     Label done;
 7043     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 7044     // Branch if not (cmp crx).
 7045     __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
 7046     __ fmr($dst$$FloatRegister, $src$$FloatRegister);
 7047     __ bind(done);
 7048   %}
 7049   ins_pipe(pipe_class_default);
 7050 %}
 7051 
 7052 instruct cmovF_cmpF(cmpOp cop, regF op1, regF op2, regF dst, regF false_result, regF true_result, regD tmp) %{
 7053   match(Set dst (CMoveF (Binary cop (CmpF op1 op2)) (Binary false_result true_result)));
 7054   predicate(PowerArchitecturePPC64 >= 9);
 7055   effect(TEMP tmp);
 7056   ins_cost(2*DEFAULT_COST);
 7057   format %{ "cmovF_cmpF  $dst = ($op1 $cop $op2) ? $true_result : $false_result\n\t" %}
 7058   size(8);
 7059   ins_encode %{
 7060     __ cmovF($cop$$cmpcode, $dst$$FloatRegister->to_vsr(),
 7061              $op1$$FloatRegister->to_vsr(), $op2$$FloatRegister->to_vsr(),
 7062              $true_result$$FloatRegister->to_vsr(), $false_result$$FloatRegister->to_vsr(),
 7063              $tmp$$FloatRegister->to_vsr());
 7064   %}
 7065   ins_pipe(pipe_class_default);
 7066 %}
 7067 
 7068 instruct cmovF_cmpD(cmpOp cop, regD op1, regD op2, regF dst, regF false_result, regF true_result, regD tmp) %{
 7069   match(Set dst (CMoveF (Binary cop (CmpD op1 op2)) (Binary false_result true_result)));
 7070   predicate(PowerArchitecturePPC64 >= 9);
 7071   effect(TEMP tmp);
 7072   ins_cost(2*DEFAULT_COST);
 7073   format %{ "cmovF_cmpD  $dst = ($op1 $cop $op2) ? $true_result : $false_result\n\t" %}
 7074   size(8);
 7075   ins_encode %{
 7076     __ cmovF($cop$$cmpcode, $dst$$FloatRegister->to_vsr(),
 7077              $op1$$FloatRegister->to_vsr(), $op2$$FloatRegister->to_vsr(),
 7078              $true_result$$FloatRegister->to_vsr(), $false_result$$FloatRegister->to_vsr(),
 7079              $tmp$$FloatRegister->to_vsr());
 7080   %}
 7081   ins_pipe(pipe_class_default);
 7082 %}
 7083 
 7084 instruct cmovD_cmpD(cmpOp cop, regD op1, regD op2, regD dst, regD false_result, regD true_result, regD tmp) %{
 7085   match(Set dst (CMoveD (Binary cop (CmpD op1 op2)) (Binary false_result true_result)));
 7086   predicate(PowerArchitecturePPC64 >= 9);
 7087   effect(TEMP tmp);
 7088   ins_cost(2*DEFAULT_COST);
 7089   format %{ "cmovD_cmpD  $dst = ($op1 $cop $op2) ? $true_result : $false_result\n\t" %}
 7090   size(8);
 7091   ins_encode %{
 7092     __ cmovF($cop$$cmpcode, $dst$$FloatRegister->to_vsr(),
 7093              $op1$$FloatRegister->to_vsr(), $op2$$FloatRegister->to_vsr(),
 7094              $true_result$$FloatRegister->to_vsr(), $false_result$$FloatRegister->to_vsr(),
 7095              $tmp$$FloatRegister->to_vsr());
 7096   %}
 7097   ins_pipe(pipe_class_default);
 7098 %}
 7099 
 7100 instruct cmovD_cmpF(cmpOp cop, regF op1, regF op2, regD dst, regD false_result, regD true_result, regD tmp) %{
 7101   match(Set dst (CMoveD (Binary cop (CmpF op1 op2)) (Binary false_result true_result)));
 7102   predicate(PowerArchitecturePPC64 >= 9);
 7103   effect(TEMP tmp);
 7104   ins_cost(2*DEFAULT_COST);
 7105   format %{ "cmovD_cmpF  $dst = ($op1 $cop $op2) ? $true_result : $false_result\n\t" %}
 7106   size(8);
 7107   ins_encode %{
 7108     __ cmovF($cop$$cmpcode, $dst$$FloatRegister->to_vsr(),
 7109              $op1$$FloatRegister->to_vsr(), $op2$$FloatRegister->to_vsr(),
 7110              $true_result$$FloatRegister->to_vsr(), $false_result$$FloatRegister->to_vsr(),
 7111              $tmp$$FloatRegister->to_vsr());
 7112   %}
 7113   ins_pipe(pipe_class_default);
 7114 %}
 7115 
 7116 //----------Compare-And-Swap---------------------------------------------------
 7117 
 7118 // CompareAndSwap{P,I,L} have more than one output, therefore "CmpI
 7119 // (CompareAndSwap ...)" or "If (CmpI (CompareAndSwap ..))"  cannot be
 7120 // matched.
 7121 
 7122 // Strong versions:
 7123 
 7124 instruct compareAndSwapB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7125   match(Set res (CompareAndSwapB mem_ptr (Binary src1 src2)));
 7126   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7127   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7128   ins_encode %{
 7129     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7130     __ cmpxchgb(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7131                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7132                 $res$$Register, nullptr, true);
 7133     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7134       __ isync();
 7135     } else {
 7136       __ sync();
 7137     }
 7138   %}
 7139   ins_pipe(pipe_class_default);
 7140 %}
 7141 
 7142 instruct compareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7143   match(Set res (CompareAndSwapS mem_ptr (Binary src1 src2)));
 7144   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7145   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7146   ins_encode %{
 7147     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7148     __ cmpxchgh(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7149                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7150                 $res$$Register, nullptr, true);
 7151     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7152       __ isync();
 7153     } else {
 7154       __ sync();
 7155     }
 7156   %}
 7157   ins_pipe(pipe_class_default);
 7158 %}
 7159 
 7160 instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7161   match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2)));
 7162   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7163   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7164   ins_encode %{
 7165     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7166     __ cmpxchgw(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7167                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7168                 $res$$Register, nullptr, true);
 7169     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7170       __ isync();
 7171     } else {
 7172       __ sync();
 7173     }
 7174   %}
 7175   ins_pipe(pipe_class_default);
 7176 %}
 7177 
 7178 instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7179   match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2)));
 7180   predicate(n->as_LoadStore()->barrier_data() == 0);
 7181   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7182   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7183   ins_encode %{
 7184     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7185     __ cmpxchgw(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7186                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7187                 $res$$Register, nullptr, true);
 7188     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7189       __ isync();
 7190     } else {
 7191       __ sync();
 7192     }
 7193   %}
 7194   ins_pipe(pipe_class_default);
 7195 %}
 7196 
 7197 instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7198   match(Set res (CompareAndSwapL mem_ptr (Binary src1 src2)));
 7199   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7200   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
 7201   ins_encode %{
 7202     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7203     __ cmpxchgd(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7204                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7205                 $res$$Register, nullptr, true);
 7206     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7207       __ isync();
 7208     } else {
 7209       __ sync();
 7210     }
 7211   %}
 7212   ins_pipe(pipe_class_default);
 7213 %}
 7214 
 7215 instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7216   match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2)));
 7217   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7218   predicate(n->as_LoadStore()->barrier_data() == 0);
 7219   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
 7220   ins_encode %{
 7221     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7222     __ cmpxchgd(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7223                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7224                 $res$$Register, nullptr, true);
 7225     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7226       __ isync();
 7227     } else {
 7228       __ sync();
 7229     }
 7230   %}
 7231   ins_pipe(pipe_class_default);
 7232 %}
 7233 
 7234 // Weak versions:
 7235 
 7236 instruct weakCompareAndSwapB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7237   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7238   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7239   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7240   format %{ "weak CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7241   ins_encode %{
 7242     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7243     __ cmpxchgb(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7244                 MacroAssembler::MemBarNone,
 7245                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true);
 7246   %}
 7247   ins_pipe(pipe_class_default);
 7248 %}
 7249 
 7250 instruct weakCompareAndSwapB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7251   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7252   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) );
 7253   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7254   format %{ "weak CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7255   ins_encode %{
 7256     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7257     __ cmpxchgb(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7258                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7259                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true);
 7260   %}
 7261   ins_pipe(pipe_class_default);
 7262 %}
 7263 
 7264 instruct weakCompareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7265   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7266   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7267   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7268   format %{ "weak CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7269   ins_encode %{
 7270     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7271     __ cmpxchgh(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7272                 MacroAssembler::MemBarNone,
 7273                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true);
 7274   %}
 7275   ins_pipe(pipe_class_default);
 7276 %}
 7277 
 7278 instruct weakCompareAndSwapS_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7279   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7280   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst));
 7281   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7282   format %{ "weak CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7283   ins_encode %{
 7284     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7285     __ cmpxchgh(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7286                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7287                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true);
 7288   %}
 7289   ins_pipe(pipe_class_default);
 7290 %}
 7291 
 7292 instruct weakCompareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7293   match(Set res (WeakCompareAndSwapI mem_ptr (Binary src1 src2)));
 7294   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7295   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7296   format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7297   ins_encode %{
 7298     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7299     __ cmpxchgw(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7300                 MacroAssembler::MemBarNone,
 7301                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true);
 7302   %}
 7303   ins_pipe(pipe_class_default);
 7304 %}
 7305 
 7306 instruct weakCompareAndSwapI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7307   match(Set res (WeakCompareAndSwapI mem_ptr (Binary src1 src2)));
 7308   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7309   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7310   format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7311   ins_encode %{
 7312     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7313     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7314     // value is never passed to caller.
 7315     __ cmpxchgw(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7316                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7317                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true);
 7318   %}
 7319   ins_pipe(pipe_class_default);
 7320 %}
 7321 
 7322 instruct weakCompareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7323   match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
 7324   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && n->as_LoadStore()->barrier_data() == 0);
 7325   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7326   format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7327   ins_encode %{
 7328     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7329     __ cmpxchgw(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7330                 MacroAssembler::MemBarNone,
 7331                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true);
 7332   %}
 7333   ins_pipe(pipe_class_default);
 7334 %}
 7335 
 7336 instruct weakCompareAndSwapN_acq_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7337   match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
 7338   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0);
 7339   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7340   format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7341   ins_encode %{
 7342     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7343     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7344     // value is never passed to caller.
 7345     __ cmpxchgw(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7346                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7347                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true);
 7348   %}
 7349   ins_pipe(pipe_class_default);
 7350 %}
 7351 
 7352 instruct weakCompareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7353   match(Set res (WeakCompareAndSwapL mem_ptr (Binary src1 src2)));
 7354   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7355   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7356   format %{ "weak CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
 7357   ins_encode %{
 7358     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7359     // value is never passed to caller.
 7360     __ cmpxchgd(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7361                 MacroAssembler::MemBarNone,
 7362                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true);
 7363   %}
 7364   ins_pipe(pipe_class_default);
 7365 %}
 7366 
 7367 instruct weakCompareAndSwapL_acq_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7368   match(Set res (WeakCompareAndSwapL mem_ptr (Binary src1 src2)));
 7369   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7370   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7371   format %{ "weak CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7372   ins_encode %{
 7373     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7374     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7375     // value is never passed to caller.
 7376     __ cmpxchgd(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7377                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7378                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true);
 7379   %}
 7380   ins_pipe(pipe_class_default);
 7381 %}
 7382 
 7383 instruct weakCompareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7384   match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2)));
 7385   predicate((((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0);
 7386   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7387   format %{ "weak CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
 7388   ins_encode %{
 7389     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7390     __ cmpxchgd(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7391                 MacroAssembler::MemBarNone,
 7392                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true);
 7393   %}
 7394   ins_pipe(pipe_class_default);
 7395 %}
 7396 
 7397 instruct weakCompareAndSwapP_acq_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7398   match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2)));
 7399   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0);
 7400   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7401   format %{ "weak CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
 7402   ins_encode %{
 7403     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7404     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7405     // value is never passed to caller.
 7406     __ cmpxchgd(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7407                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7408                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true);
 7409   %}
 7410   ins_pipe(pipe_class_default);
 7411 %}
 7412 
 7413 // CompareAndExchange
 7414 
 7415 instruct compareAndExchangeB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7416   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7417   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7418   effect(TEMP_DEF res, TEMP cr0);
 7419   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as int" %}
 7420   ins_encode %{
 7421     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7422     __ cmpxchgb(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7423                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7424                 noreg, nullptr, true);
 7425   %}
 7426   ins_pipe(pipe_class_default);
 7427 %}
 7428 
 7429 instruct compareAndExchangeB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7430   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7431   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst));
 7432   effect(TEMP_DEF res, TEMP cr0);
 7433   format %{ "CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as int" %}
 7434   ins_encode %{
 7435     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7436     __ cmpxchgb(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7437                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7438                 noreg, nullptr, true);
 7439     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7440       __ isync();
 7441     } else {
 7442       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 7443       __ sync();
 7444     }
 7445   %}
 7446   ins_pipe(pipe_class_default);
 7447 %}
 7448 
 7449 
 7450 instruct compareAndExchangeS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7451   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 7452   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7453   effect(TEMP_DEF res, TEMP cr0);
 7454   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as int" %}
 7455   ins_encode %{
 7456     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7457     __ cmpxchgh(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7458                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7459                 noreg, nullptr, true);
 7460   %}
 7461   ins_pipe(pipe_class_default);
 7462 %}
 7463 
 7464 instruct compareAndExchangeS_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7465   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 7466   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst));
 7467   effect(TEMP_DEF res, TEMP cr0);
 7468   format %{ "CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as int" %}
 7469   ins_encode %{
 7470     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7471     __ cmpxchgh(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7472                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7473                 noreg, nullptr, true);
 7474     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7475       __ isync();
 7476     } else {
 7477       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 7478       __ sync();
 7479     }
 7480   %}
 7481   ins_pipe(pipe_class_default);
 7482 %}
 7483 
 7484 instruct compareAndExchangeI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7485   match(Set res (CompareAndExchangeI mem_ptr (Binary src1 src2)));
 7486   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7487   effect(TEMP_DEF res, TEMP cr0);
 7488   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as int" %}
 7489   ins_encode %{
 7490     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7491     __ cmpxchgw(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7492                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7493                 noreg, nullptr, true);
 7494   %}
 7495   ins_pipe(pipe_class_default);
 7496 %}
 7497 
 7498 instruct compareAndExchangeI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7499   match(Set res (CompareAndExchangeI mem_ptr (Binary src1 src2)));
 7500   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7501   effect(TEMP_DEF res, TEMP cr0);
 7502   format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as int" %}
 7503   ins_encode %{
 7504     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7505     __ cmpxchgw(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7506                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7507                 noreg, nullptr, true);
 7508     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7509       __ isync();
 7510     } else {
 7511       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 7512       __ sync();
 7513     }
 7514   %}
 7515   ins_pipe(pipe_class_default);
 7516 %}
 7517 
 7518 instruct compareAndExchangeN_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7519   match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2)));
 7520   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && n->as_LoadStore()->barrier_data() == 0);
 7521   effect(TEMP_DEF res, TEMP cr0);
 7522   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as narrow oop" %}
 7523   ins_encode %{
 7524     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7525     __ cmpxchgw(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7526                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7527                 noreg, nullptr, true);
 7528   %}
 7529   ins_pipe(pipe_class_default);
 7530 %}
 7531 
 7532 instruct compareAndExchangeN_acq_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7533   match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2)));
 7534   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0);
 7535   effect(TEMP_DEF res, TEMP cr0);
 7536   format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as narrow oop" %}
 7537   ins_encode %{
 7538     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7539     __ cmpxchgw(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7540                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7541                 noreg, nullptr, true);
 7542     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7543       __ isync();
 7544     } else {
 7545       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 7546       __ sync();
 7547     }
 7548   %}
 7549   ins_pipe(pipe_class_default);
 7550 %}
 7551 
 7552 instruct compareAndExchangeL_regP_regL_regL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7553   match(Set res (CompareAndExchangeL mem_ptr (Binary src1 src2)));
 7554   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7555   effect(TEMP_DEF res, TEMP cr0);
 7556   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as long" %}
 7557   ins_encode %{
 7558     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7559     __ cmpxchgd(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7560                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7561                 noreg, nullptr, true);
 7562   %}
 7563   ins_pipe(pipe_class_default);
 7564 %}
 7565 
 7566 instruct compareAndExchangeL_acq_regP_regL_regL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7567   match(Set res (CompareAndExchangeL mem_ptr (Binary src1 src2)));
 7568   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7569   effect(TEMP_DEF res, TEMP cr0);
 7570   format %{ "CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as long" %}
 7571   ins_encode %{
 7572     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7573     __ cmpxchgd(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7574                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7575                 noreg, nullptr, true);
 7576     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7577       __ isync();
 7578     } else {
 7579       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 7580       __ sync();
 7581     }
 7582   %}
 7583   ins_pipe(pipe_class_default);
 7584 %}
 7585 
 7586 instruct compareAndExchangeP_regP_regP_regP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7587   match(Set res (CompareAndExchangeP mem_ptr (Binary src1 src2)));
 7588   predicate((((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst)
 7589             && n->as_LoadStore()->barrier_data() == 0);
 7590   effect(TEMP_DEF res, TEMP cr0);
 7591   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as ptr; ptr" %}
 7592   ins_encode %{
 7593     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7594     __ cmpxchgd(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7595                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7596                 noreg, nullptr, true);
 7597   %}
 7598   ins_pipe(pipe_class_default);
 7599 %}
 7600 
 7601 instruct compareAndExchangeP_acq_regP_regP_regP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7602   match(Set res (CompareAndExchangeP mem_ptr (Binary src1 src2)));
 7603   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst)
 7604             && n->as_LoadStore()->barrier_data() == 0);
 7605   effect(TEMP_DEF res, TEMP cr0);
 7606   format %{ "CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as ptr; ptr" %}
 7607   ins_encode %{
 7608     // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7609     __ cmpxchgd(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7610                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7611                 noreg, nullptr, true);
 7612     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7613       __ isync();
 7614     } else {
 7615       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 7616       __ sync();
 7617     }
 7618   %}
 7619   ins_pipe(pipe_class_default);
 7620 %}
 7621 
 7622 // Special RMW
 7623 
 7624 instruct getAndAddB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 7625   match(Set res (GetAndAddB mem_ptr src));
 7626   effect(TEMP_DEF res, TEMP cr0);
 7627   format %{ "GetAndAddB $res, $mem_ptr, $src" %}
 7628   ins_encode %{
 7629     __ getandaddb($res$$Register, $src$$Register, $mem_ptr$$Register,
 7630                   R0, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 7631     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7632       __ isync();
 7633     } else {
 7634       __ sync();
 7635     }
 7636   %}
 7637   ins_pipe(pipe_class_default);
 7638 %}
 7639 
 7640 instruct getAndAddS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 7641   match(Set res (GetAndAddS mem_ptr src));
 7642   effect(TEMP_DEF res, TEMP cr0);
 7643   format %{ "GetAndAddS $res, $mem_ptr, $src" %}
 7644   ins_encode %{
 7645     __ getandaddh($res$$Register, $src$$Register, $mem_ptr$$Register,
 7646                   R0, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 7647     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7648       __ isync();
 7649     } else {
 7650       __ sync();
 7651     }
 7652   %}
 7653   ins_pipe(pipe_class_default);
 7654 %}
 7655 
 7656 
 7657 instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 7658   match(Set res (GetAndAddI mem_ptr src));
 7659   effect(TEMP_DEF res, TEMP cr0);
 7660   format %{ "GetAndAddI $res, $mem_ptr, $src" %}
 7661   ins_encode %{
 7662     __ getandaddw($res$$Register, $src$$Register, $mem_ptr$$Register,
 7663                   R0, MacroAssembler::cmpxchgx_hint_atomic_update());
 7664     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7665       __ isync();
 7666     } else {
 7667       __ sync();
 7668     }
 7669   %}
 7670   ins_pipe(pipe_class_default);
 7671 %}
 7672 
 7673 instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
 7674   match(Set res (GetAndAddL mem_ptr src));
 7675   effect(TEMP_DEF res, TEMP cr0);
 7676   format %{ "GetAndAddL $res, $mem_ptr, $src" %}
 7677   ins_encode %{
 7678     __ getandaddd($res$$Register, $src$$Register, $mem_ptr$$Register,
 7679                   R0, MacroAssembler::cmpxchgx_hint_atomic_update());
 7680     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7681       __ isync();
 7682     } else {
 7683       __ sync();
 7684     }
 7685   %}
 7686   ins_pipe(pipe_class_default);
 7687 %}
 7688 
 7689 instruct getAndSetB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 7690   match(Set res (GetAndSetB mem_ptr src));
 7691   effect(TEMP_DEF res, TEMP cr0);
 7692   format %{ "GetAndSetB $res, $mem_ptr, $src" %}
 7693   ins_encode %{
 7694     __ getandsetb($res$$Register, $src$$Register, $mem_ptr$$Register,
 7695                   noreg, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 7696     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7697       __ isync();
 7698     } else {
 7699       __ sync();
 7700     }
 7701   %}
 7702   ins_pipe(pipe_class_default);
 7703 %}
 7704 
 7705 instruct getAndSetS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 7706   match(Set res (GetAndSetS mem_ptr src));
 7707   effect(TEMP_DEF res, TEMP cr0);
 7708   format %{ "GetAndSetS $res, $mem_ptr, $src" %}
 7709   ins_encode %{
 7710     __ getandseth($res$$Register, $src$$Register, $mem_ptr$$Register,
 7711                   noreg, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 7712     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7713       __ isync();
 7714     } else {
 7715       __ sync();
 7716     }
 7717   %}
 7718   ins_pipe(pipe_class_default);
 7719 %}
 7720 
 7721 
 7722 instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 7723   match(Set res (GetAndSetI mem_ptr src));
 7724   effect(TEMP_DEF res, TEMP cr0);
 7725   format %{ "GetAndSetI $res, $mem_ptr, $src" %}
 7726   ins_encode %{
 7727     __ getandsetw($res$$Register, $src$$Register, $mem_ptr$$Register,
 7728                   MacroAssembler::cmpxchgx_hint_atomic_update());
 7729     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7730       __ isync();
 7731     } else {
 7732       __ sync();
 7733     }
 7734   %}
 7735   ins_pipe(pipe_class_default);
 7736 %}
 7737 
 7738 instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
 7739   match(Set res (GetAndSetL mem_ptr src));
 7740   effect(TEMP_DEF res, TEMP cr0);
 7741   format %{ "GetAndSetL $res, $mem_ptr, $src" %}
 7742   ins_encode %{
 7743     __ getandsetd($res$$Register, $src$$Register, $mem_ptr$$Register,
 7744                   MacroAssembler::cmpxchgx_hint_atomic_update());
 7745     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7746       __ isync();
 7747     } else {
 7748       __ sync();
 7749     }
 7750   %}
 7751   ins_pipe(pipe_class_default);
 7752 %}
 7753 
 7754 instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr0) %{
 7755   match(Set res (GetAndSetP mem_ptr src));
 7756   predicate(n->as_LoadStore()->barrier_data() == 0);
 7757   effect(TEMP_DEF res, TEMP cr0);
 7758   format %{ "GetAndSetP $res, $mem_ptr, $src" %}
 7759   ins_encode %{
 7760     __ getandsetd($res$$Register, $src$$Register, $mem_ptr$$Register,
 7761                   MacroAssembler::cmpxchgx_hint_atomic_update());
 7762     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7763       __ isync();
 7764     } else {
 7765       __ sync();
 7766     }
 7767   %}
 7768   ins_pipe(pipe_class_default);
 7769 %}
 7770 
 7771 instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{
 7772   match(Set res (GetAndSetN mem_ptr src));
 7773   predicate(n->as_LoadStore()->barrier_data() == 0);
 7774   effect(TEMP_DEF res, TEMP cr0);
 7775   format %{ "GetAndSetN $res, $mem_ptr, $src" %}
 7776   ins_encode %{
 7777     __ getandsetw($res$$Register, $src$$Register, $mem_ptr$$Register,
 7778                   MacroAssembler::cmpxchgx_hint_atomic_update());
 7779     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7780       __ isync();
 7781     } else {
 7782       __ sync();
 7783     }
 7784   %}
 7785   ins_pipe(pipe_class_default);
 7786 %}
 7787 
 7788 //----------Arithmetic Instructions--------------------------------------------
 7789 // Addition Instructions
 7790 
 7791 // Register Addition
 7792 instruct addI_reg_reg(iRegIdst dst, iRegIsrc_iRegL2Isrc src1, iRegIsrc_iRegL2Isrc src2) %{
 7793   match(Set dst (AddI src1 src2));
 7794   format %{ "ADD     $dst, $src1, $src2" %}
 7795   size(4);
 7796   ins_encode %{
 7797     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 7798   %}
 7799   ins_pipe(pipe_class_default);
 7800 %}
 7801 
 7802 // Expand does not work with above instruct. (??)
 7803 instruct addI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 7804   // no match-rule
 7805   effect(DEF dst, USE src1, USE src2);
 7806   format %{ "ADD     $dst, $src1, $src2" %}
 7807   size(4);
 7808   ins_encode %{
 7809     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 7810   %}
 7811   ins_pipe(pipe_class_default);
 7812 %}
 7813 
 7814 instruct tree_addI_addI_addI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
 7815   match(Set dst (AddI (AddI (AddI src1 src2) src3) src4));
 7816   ins_cost(DEFAULT_COST*3);
 7817 
 7818   expand %{
 7819     // FIXME: we should do this in the ideal world.
 7820     iRegIdst tmp1;
 7821     iRegIdst tmp2;
 7822     addI_reg_reg(tmp1, src1, src2);
 7823     addI_reg_reg_2(tmp2, src3, src4); // Adlc complains about addI_reg_reg.
 7824     addI_reg_reg(dst, tmp1, tmp2);
 7825   %}
 7826 %}
 7827 
 7828 // Immediate Addition
 7829 instruct addI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
 7830   match(Set dst (AddI src1 src2));
 7831   format %{ "ADDI    $dst, $src1, $src2" %}
 7832   size(4);
 7833   ins_encode %{
 7834     __ addi($dst$$Register, $src1$$Register, $src2$$constant);
 7835   %}
 7836   ins_pipe(pipe_class_default);
 7837 %}
 7838 
 7839 // Immediate Addition with 16-bit shifted operand
 7840 instruct addI_reg_immhi16(iRegIdst dst, iRegIsrc src1, immIhi16 src2) %{
 7841   match(Set dst (AddI src1 src2));
 7842   format %{ "ADDIS   $dst, $src1, $src2" %}
 7843   size(4);
 7844   ins_encode %{
 7845     __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
 7846   %}
 7847   ins_pipe(pipe_class_default);
 7848 %}
 7849 
 7850 // Immediate Addition using prefixed addi
 7851 instruct addI_reg_imm32(iRegIdst dst, iRegIsrc src1, immI32 src2) %{
 7852   match(Set dst (AddI src1 src2));
 7853   predicate(PowerArchitecturePPC64 >= 10);
 7854   ins_cost(DEFAULT_COST+1);
 7855   format %{ "PADDI   $dst, $src1, $src2" %}
 7856   size(8);
 7857   ins_encode %{
 7858     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 7859     __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
 7860   %}
 7861   ins_pipe(pipe_class_default);
 7862   ins_alignment(2);
 7863 %}
 7864 
 7865 // Long Addition
 7866 instruct addL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 7867   match(Set dst (AddL src1 src2));
 7868   format %{ "ADD     $dst, $src1, $src2 \t// long" %}
 7869   size(4);
 7870   ins_encode %{
 7871     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 7872   %}
 7873   ins_pipe(pipe_class_default);
 7874 %}
 7875 
 7876 // Expand does not work with above instruct. (??)
 7877 instruct addL_reg_reg_2(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 7878   // no match-rule
 7879   effect(DEF dst, USE src1, USE src2);
 7880   format %{ "ADD     $dst, $src1, $src2 \t// long" %}
 7881   size(4);
 7882   ins_encode %{
 7883     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 7884   %}
 7885   ins_pipe(pipe_class_default);
 7886 %}
 7887 
 7888 instruct tree_addL_addL_addL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, iRegLsrc src3, iRegLsrc src4) %{
 7889   match(Set dst (AddL (AddL (AddL src1 src2) src3) src4));
 7890   ins_cost(DEFAULT_COST*3);
 7891 
 7892   expand %{
 7893     // FIXME: we should do this in the ideal world.
 7894     iRegLdst tmp1;
 7895     iRegLdst tmp2;
 7896     addL_reg_reg(tmp1, src1, src2);
 7897     addL_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg.
 7898     addL_reg_reg(dst, tmp1, tmp2);
 7899   %}
 7900 %}
 7901 
 7902 // AddL + ConvL2I.
 7903 instruct addI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
 7904   match(Set dst (ConvL2I (AddL src1 src2)));
 7905 
 7906   format %{ "ADD     $dst, $src1, $src2 \t// long + l2i" %}
 7907   size(4);
 7908   ins_encode %{
 7909     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 7910   %}
 7911   ins_pipe(pipe_class_default);
 7912 %}
 7913 
 7914 // No constant pool entries required.
 7915 instruct addL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
 7916   match(Set dst (AddL src1 src2));
 7917 
 7918   format %{ "ADDI    $dst, $src1, $src2" %}
 7919   size(4);
 7920   ins_encode %{
 7921     __ addi($dst$$Register, $src1$$Register, $src2$$constant);
 7922   %}
 7923   ins_pipe(pipe_class_default);
 7924 %}
 7925 
 7926 // Long Immediate Addition with 16-bit shifted operand.
 7927 // No constant pool entries required.
 7928 instruct addL_reg_immhi16(iRegLdst dst, iRegLsrc src1, immL32hi16 src2) %{
 7929   match(Set dst (AddL src1 src2));
 7930 
 7931   format %{ "ADDIS   $dst, $src1, $src2" %}
 7932   size(4);
 7933   ins_encode %{
 7934     __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
 7935   %}
 7936   ins_pipe(pipe_class_default);
 7937 %}
 7938 
 7939 // Long Immediate Addition using prefixed addi
 7940 // No constant pool entries required.
 7941 instruct addL_reg_imm34(iRegLdst dst, iRegLsrc src1, immL34 src2) %{
 7942   match(Set dst (AddL src1 src2));
 7943   predicate(PowerArchitecturePPC64 >= 10);
 7944   ins_cost(DEFAULT_COST+1);
 7945 
 7946   format %{ "PADDI   $dst, $src1, $src2" %}
 7947   size(8);
 7948   ins_encode %{
 7949     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 7950     __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
 7951   %}
 7952   ins_pipe(pipe_class_default);
 7953   ins_alignment(2);
 7954 %}
 7955 
 7956 // Pointer Register Addition
 7957 instruct addP_reg_reg(iRegPdst dst, iRegP_N2P src1, iRegLsrc src2) %{
 7958   match(Set dst (AddP src1 src2));
 7959   format %{ "ADD     $dst, $src1, $src2" %}
 7960   size(4);
 7961   ins_encode %{
 7962     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 7963   %}
 7964   ins_pipe(pipe_class_default);
 7965 %}
 7966 
 7967 // Pointer Immediate Addition
 7968 // No constant pool entries required.
 7969 instruct addP_reg_imm16(iRegPdst dst, iRegP_N2P src1, immL16 src2) %{
 7970   match(Set dst (AddP src1 src2));
 7971 
 7972   format %{ "ADDI    $dst, $src1, $src2" %}
 7973   size(4);
 7974   ins_encode %{
 7975     __ addi($dst$$Register, $src1$$Register, $src2$$constant);
 7976   %}
 7977   ins_pipe(pipe_class_default);
 7978 %}
 7979 
 7980 // Pointer Immediate Addition with 16-bit shifted operand.
 7981 // No constant pool entries required.
 7982 instruct addP_reg_immhi16(iRegPdst dst, iRegP_N2P src1, immL32hi16 src2) %{
 7983   match(Set dst (AddP src1 src2));
 7984 
 7985   format %{ "ADDIS   $dst, $src1, $src2" %}
 7986   size(4);
 7987   ins_encode %{
 7988     __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
 7989   %}
 7990   ins_pipe(pipe_class_default);
 7991 %}
 7992 
 7993 // Pointer Immediate Addition using prefixed addi
 7994 // No constant pool entries required.
 7995 instruct addP_reg_imm34(iRegPdst dst, iRegP_N2P src1, immL34 src2) %{
 7996   match(Set dst (AddP src1 src2));
 7997   predicate(PowerArchitecturePPC64 >= 10);
 7998   ins_cost(DEFAULT_COST+1);
 7999 
 8000   format %{ "PADDI    $dst, $src1, $src2" %}
 8001   size(8);
 8002   ins_encode %{
 8003     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 8004     __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
 8005   %}
 8006   ins_pipe(pipe_class_default);
 8007   ins_alignment(2);
 8008 %}
 8009 
 8010 //---------------------
 8011 // Subtraction Instructions
 8012 
 8013 // Register Subtraction
 8014 instruct subI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8015   match(Set dst (SubI src1 src2));
 8016   format %{ "SUBF    $dst, $src2, $src1" %}
 8017   size(4);
 8018   ins_encode %{
 8019     __ subf($dst$$Register, $src2$$Register, $src1$$Register);
 8020   %}
 8021   ins_pipe(pipe_class_default);
 8022 %}
 8023 
 8024 // Immediate Subtraction
 8025 // Immediate Subtraction: The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal),
 8026 // Don't try to use addi with - $src2$$constant since it can overflow when $src2$$constant == minI16.
 8027 
 8028 // SubI from constant (using subfic).
 8029 instruct subI_imm16_reg(iRegIdst dst, immI16 src1, iRegIsrc src2) %{
 8030   match(Set dst (SubI src1 src2));
 8031   format %{ "SUBI    $dst, $src1, $src2" %}
 8032 
 8033   size(4);
 8034   ins_encode %{
 8035     __ subfic($dst$$Register, $src2$$Register, $src1$$constant);
 8036   %}
 8037   ins_pipe(pipe_class_default);
 8038 %}
 8039 
 8040 // Turn the sign-bit of an integer into a 32-bit mask, 0x0...0 for
 8041 // positive integers and 0xF...F for negative ones.
 8042 instruct signmask32I_regI(iRegIdst dst, iRegIsrc src) %{
 8043   // no match-rule, false predicate
 8044   effect(DEF dst, USE src);
 8045   predicate(false);
 8046 
 8047   format %{ "SRAWI   $dst, $src, #31" %}
 8048   size(4);
 8049   ins_encode %{
 8050     __ srawi($dst$$Register, $src$$Register, 0x1f);
 8051   %}
 8052   ins_pipe(pipe_class_default);
 8053 %}
 8054 
 8055 instruct absI_reg_Ex(iRegIdst dst, iRegIsrc src) %{
 8056   match(Set dst (AbsI src));
 8057   ins_cost(DEFAULT_COST*3);
 8058 
 8059   expand %{
 8060     iRegIdst tmp1;
 8061     iRegIdst tmp2;
 8062     signmask32I_regI(tmp1, src);
 8063     xorI_reg_reg(tmp2, tmp1, src);
 8064     subI_reg_reg(dst, tmp2, tmp1);
 8065   %}
 8066 %}
 8067 
 8068 instruct negI_regI(iRegIdst dst, immI_0 zero, iRegIsrc src2) %{
 8069   match(Set dst (SubI zero src2));
 8070   format %{ "NEG     $dst, $src2" %}
 8071   size(4);
 8072   ins_encode %{
 8073     __ neg($dst$$Register, $src2$$Register);
 8074   %}
 8075   ins_pipe(pipe_class_default);
 8076 %}
 8077 
 8078 // Long subtraction
 8079 instruct subL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8080   match(Set dst (SubL src1 src2));
 8081   format %{ "SUBF    $dst, $src2, $src1 \t// long" %}
 8082   size(4);
 8083   ins_encode %{
 8084     __ subf($dst$$Register, $src2$$Register, $src1$$Register);
 8085   %}
 8086   ins_pipe(pipe_class_default);
 8087 %}
 8088 
 8089 // SubL + convL2I.
 8090 instruct subI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8091   match(Set dst (ConvL2I (SubL src1 src2)));
 8092 
 8093   format %{ "SUBF    $dst, $src2, $src1 \t// long + l2i" %}
 8094   size(4);
 8095   ins_encode %{
 8096     __ subf($dst$$Register, $src2$$Register, $src1$$Register);
 8097   %}
 8098   ins_pipe(pipe_class_default);
 8099 %}
 8100 
 8101 // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
 8102 // positive longs and 0xF...F for negative ones.
 8103 instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{
 8104   // no match-rule, false predicate
 8105   effect(DEF dst, USE src);
 8106   predicate(false);
 8107 
 8108   format %{ "SRADI   $dst, $src, #63" %}
 8109   size(4);
 8110   ins_encode %{
 8111     __ sradi($dst$$Register, $src$$Register, 0x3f);
 8112   %}
 8113   ins_pipe(pipe_class_default);
 8114 %}
 8115 
 8116 // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
 8117 // positive longs and 0xF...F for negative ones.
 8118 instruct signmask64L_regL(iRegLdst dst, iRegLsrc src) %{
 8119   // no match-rule, false predicate
 8120   effect(DEF dst, USE src);
 8121   predicate(false);
 8122 
 8123   format %{ "SRADI   $dst, $src, #63" %}
 8124   size(4);
 8125   ins_encode %{
 8126     __ sradi($dst$$Register, $src$$Register, 0x3f);
 8127   %}
 8128   ins_pipe(pipe_class_default);
 8129 %}
 8130 
 8131 instruct absL_reg_Ex(iRegLdst dst, iRegLsrc src) %{
 8132   match(Set dst (AbsL src));
 8133   ins_cost(DEFAULT_COST*3);
 8134 
 8135   expand %{
 8136     iRegLdst tmp1;
 8137     iRegLdst tmp2;
 8138     signmask64L_regL(tmp1, src);
 8139     xorL_reg_reg(tmp2, tmp1, src);
 8140     subL_reg_reg(dst, tmp2, tmp1);
 8141   %}
 8142 %}
 8143 
 8144 // Long negation
 8145 instruct negL_reg_reg(iRegLdst dst, immL_0 zero, iRegLsrc src2) %{
 8146   match(Set dst (SubL zero src2));
 8147   format %{ "NEG     $dst, $src2 \t// long" %}
 8148   size(4);
 8149   ins_encode %{
 8150     __ neg($dst$$Register, $src2$$Register);
 8151   %}
 8152   ins_pipe(pipe_class_default);
 8153 %}
 8154 
 8155 // NegL + ConvL2I.
 8156 instruct negI_con0_regL(iRegIdst dst, immL_0 zero, iRegLsrc src2) %{
 8157   match(Set dst (ConvL2I (SubL zero src2)));
 8158 
 8159   format %{ "NEG     $dst, $src2 \t// long + l2i" %}
 8160   size(4);
 8161   ins_encode %{
 8162     __ neg($dst$$Register, $src2$$Register);
 8163   %}
 8164   ins_pipe(pipe_class_default);
 8165 %}
 8166 
 8167 // Multiplication Instructions
 8168 // Integer Multiplication
 8169 
 8170 // Register Multiplication
 8171 instruct mulI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8172   match(Set dst (MulI src1 src2));
 8173   ins_cost(DEFAULT_COST);
 8174 
 8175   format %{ "MULLW   $dst, $src1, $src2" %}
 8176   size(4);
 8177   ins_encode %{
 8178     __ mullw($dst$$Register, $src1$$Register, $src2$$Register);
 8179   %}
 8180   ins_pipe(pipe_class_default);
 8181 %}
 8182 
 8183 // Immediate Multiplication
 8184 instruct mulI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
 8185   match(Set dst (MulI src1 src2));
 8186   ins_cost(DEFAULT_COST);
 8187 
 8188   format %{ "MULLI   $dst, $src1, $src2" %}
 8189   size(4);
 8190   ins_encode %{
 8191     __ mulli($dst$$Register, $src1$$Register, $src2$$constant);
 8192   %}
 8193   ins_pipe(pipe_class_default);
 8194 %}
 8195 
 8196 instruct mulL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8197   match(Set dst (MulL src1 src2));
 8198   ins_cost(DEFAULT_COST);
 8199 
 8200   format %{ "MULLD   $dst $src1, $src2 \t// long" %}
 8201   size(4);
 8202   ins_encode %{
 8203     __ mulld($dst$$Register, $src1$$Register, $src2$$Register);
 8204   %}
 8205   ins_pipe(pipe_class_default);
 8206 %}
 8207 
 8208 // Multiply high for optimized long division by constant.
 8209 instruct mulHighL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8210   match(Set dst (MulHiL src1 src2));
 8211   ins_cost(DEFAULT_COST);
 8212 
 8213   format %{ "MULHD   $dst $src1, $src2 \t// long" %}
 8214   size(4);
 8215   ins_encode %{
 8216     __ mulhd($dst$$Register, $src1$$Register, $src2$$Register);
 8217   %}
 8218   ins_pipe(pipe_class_default);
 8219 %}
 8220 
 8221 instruct uMulHighL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8222   match(Set dst (UMulHiL src1 src2));
 8223   ins_cost(DEFAULT_COST);
 8224 
 8225   format %{ "MULHDU   $dst $src1, $src2 \t// unsigned long" %}
 8226   size(4);
 8227   ins_encode %{
 8228     __ mulhdu($dst$$Register, $src1$$Register, $src2$$Register);
 8229   %}
 8230   ins_pipe(pipe_class_default);
 8231 %}
 8232 
 8233 // Immediate Multiplication
 8234 instruct mulL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
 8235   match(Set dst (MulL src1 src2));
 8236   ins_cost(DEFAULT_COST);
 8237 
 8238   format %{ "MULLI   $dst, $src1, $src2" %}
 8239   size(4);
 8240   ins_encode %{
 8241     __ mulli($dst$$Register, $src1$$Register, $src2$$constant);
 8242   %}
 8243   ins_pipe(pipe_class_default);
 8244 %}
 8245 
 8246 // Integer Division with Immediate -1: Negate.
 8247 instruct divI_reg_immIvalueMinus1(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{
 8248   match(Set dst (DivI src1 src2));
 8249   ins_cost(DEFAULT_COST);
 8250 
 8251   format %{ "NEG     $dst, $src1 \t// /-1" %}
 8252   size(4);
 8253   ins_encode %{
 8254     __ neg($dst$$Register, $src1$$Register);
 8255   %}
 8256   ins_pipe(pipe_class_default);
 8257 %}
 8258 
 8259 // Integer Division with constant, but not -1.
 8260 // We should be able to improve this by checking the type of src2.
 8261 // It might well be that src2 is known to be positive.
 8262 instruct divI_reg_regnotMinus1(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8263   match(Set dst (DivI src1 src2));
 8264   predicate(n->in(2)->find_int_con(-1) != -1); // src2 is a constant, but not -1
 8265   ins_cost(2*DEFAULT_COST);
 8266 
 8267   format %{ "DIVW    $dst, $src1, $src2 \t// /not-1" %}
 8268   size(4);
 8269   ins_encode %{
 8270     __ divw($dst$$Register, $src1$$Register, $src2$$Register);
 8271   %}
 8272   ins_pipe(pipe_class_default);
 8273 %}
 8274 
 8275 instruct cmovI_bne_negI_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src1) %{
 8276   effect(USE_DEF dst, USE src1, USE crx);
 8277   predicate(false);
 8278 
 8279   ins_variable_size_depending_on_alignment(true);
 8280 
 8281   format %{ "CMOVE   $dst, neg($src1), $crx" %}
 8282   size(8);
 8283   ins_encode %{
 8284     Label done;
 8285     __ bne($crx$$CondRegister, done);
 8286     __ neg($dst$$Register, $src1$$Register);
 8287     __ bind(done);
 8288   %}
 8289   ins_pipe(pipe_class_default);
 8290 %}
 8291 
 8292 // Integer Division with Registers not containing constants.
 8293 instruct divI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8294   match(Set dst (DivI src1 src2));
 8295   ins_cost(10*DEFAULT_COST);
 8296 
 8297   expand %{
 8298     immI16 imm %{ (int)-1 %}
 8299     flagsReg tmp1;
 8300     cmpI_reg_imm16(tmp1, src2, imm);          // check src2 == -1
 8301     divI_reg_regnotMinus1(dst, src1, src2);   // dst = src1 / src2
 8302     cmovI_bne_negI_reg(dst, tmp1, src1);      // cmove dst = neg(src1) if src2 == -1
 8303   %}
 8304 %}
 8305 
 8306 // Long Division with Immediate -1: Negate.
 8307 instruct divL_reg_immLvalueMinus1(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{
 8308   match(Set dst (DivL src1 src2));
 8309   ins_cost(DEFAULT_COST);
 8310 
 8311   format %{ "NEG     $dst, $src1 \t// /-1, long" %}
 8312   size(4);
 8313   ins_encode %{
 8314     __ neg($dst$$Register, $src1$$Register);
 8315   %}
 8316   ins_pipe(pipe_class_default);
 8317 %}
 8318 
 8319 // Long Division with constant, but not -1.
 8320 instruct divL_reg_regnotMinus1(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8321   match(Set dst (DivL src1 src2));
 8322   predicate(n->in(2)->find_long_con(-1L) != -1L); // Src2 is a constant, but not -1.
 8323   ins_cost(2*DEFAULT_COST);
 8324 
 8325   format %{ "DIVD    $dst, $src1, $src2 \t// /not-1, long" %}
 8326   size(4);
 8327   ins_encode %{
 8328     __ divd($dst$$Register, $src1$$Register, $src2$$Register);
 8329   %}
 8330   ins_pipe(pipe_class_default);
 8331 %}
 8332 
 8333 instruct cmovL_bne_negL_reg(iRegLdst dst, flagsRegSrc crx, iRegLsrc src1) %{
 8334   effect(USE_DEF dst, USE src1, USE crx);
 8335   predicate(false);
 8336 
 8337   ins_variable_size_depending_on_alignment(true);
 8338 
 8339   format %{ "CMOVE   $dst, neg($src1), $crx" %}
 8340   size(8);
 8341   ins_encode %{
 8342     Label done;
 8343     __ bne($crx$$CondRegister, done);
 8344     __ neg($dst$$Register, $src1$$Register);
 8345     __ bind(done);
 8346   %}
 8347   ins_pipe(pipe_class_default);
 8348 %}
 8349 
 8350 // Long Division with Registers not containing constants.
 8351 instruct divL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8352   match(Set dst (DivL src1 src2));
 8353   ins_cost(10*DEFAULT_COST);
 8354 
 8355   expand %{
 8356     immL16 imm %{ (int)-1 %}
 8357     flagsReg tmp1;
 8358     cmpL_reg_imm16(tmp1, src2, imm);          // check src2 == -1
 8359     divL_reg_regnotMinus1(dst, src1, src2);   // dst = src1 / src2
 8360     cmovL_bne_negL_reg(dst, tmp1, src1);      // cmove dst = neg(src1) if src2 == -1
 8361   %}
 8362 %}
 8363 
 8364 // Integer Remainder with registers.
 8365 instruct modI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8366   match(Set dst (ModI src1 src2));
 8367   ins_cost(10*DEFAULT_COST);
 8368 
 8369   expand %{
 8370     immI16 imm %{ (int)-1 %}
 8371     flagsReg tmp1;
 8372     iRegIdst tmp2;
 8373     iRegIdst tmp3;
 8374     cmpI_reg_imm16(tmp1, src2, imm);           // check src2 == -1
 8375     divI_reg_regnotMinus1(tmp2, src1, src2);   // tmp2 = src1 / src2
 8376     cmovI_bne_negI_reg(tmp2, tmp1, src1);      // cmove tmp2 = neg(src1) if src2 == -1
 8377     mulI_reg_reg(tmp3, src2, tmp2);            // tmp3 = src2 * tmp2
 8378     subI_reg_reg(dst, src1, tmp3);             // dst = src1 - tmp3
 8379   %}
 8380 %}
 8381 
 8382 // Long Remainder with registers
 8383 instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8384   match(Set dst (ModL src1 src2));
 8385   ins_cost(10*DEFAULT_COST);
 8386 
 8387   expand %{
 8388     immL16 imm %{ (int)-1 %}
 8389     flagsReg tmp1;
 8390     iRegLdst tmp2;
 8391     iRegLdst tmp3;
 8392     cmpL_reg_imm16(tmp1, src2, imm);             // check src2 == -1
 8393     divL_reg_regnotMinus1(tmp2, src1, src2);     // tmp2 = src1 / src2
 8394     cmovL_bne_negL_reg(tmp2, tmp1, src1);        // cmove tmp2 = neg(src1) if src2 == -1
 8395     mulL_reg_reg(tmp3, src2, tmp2);              // tmp3 = src2 * tmp2
 8396     subL_reg_reg(dst, src1, tmp3);               // dst = src1 - tmp3
 8397   %}
 8398 %}
 8399 
 8400 instruct udivI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8401   match(Set dst (UDivI src1 src2));
 8402   format %{ "DIVWU   $dst, $src1, $src2" %}
 8403   size(4);
 8404   ins_encode %{
 8405     __ divwu($dst$$Register, $src1$$Register, $src2$$Register);
 8406   %}
 8407   ins_pipe(pipe_class_default);
 8408 %}
 8409 
 8410 instruct umodI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8411   match(Set dst (UModI src1 src2));
 8412   expand %{
 8413     iRegIdst tmp1;
 8414     iRegIdst tmp2;
 8415     udivI_reg_reg(tmp1, src1, src2);
 8416     // Compute lower 32 bit result using signed instructions as suggested by ISA.
 8417     // Upper 32 bit will contain garbage.
 8418     mulI_reg_reg(tmp2, src2, tmp1);
 8419     subI_reg_reg(dst, src1, tmp2);
 8420   %}
 8421 %}
 8422 
 8423 instruct udivL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8424   match(Set dst (UDivL src1 src2));
 8425   format %{ "DIVDU   $dst, $src1, $src2" %}
 8426   size(4);
 8427   ins_encode %{
 8428     __ divdu($dst$$Register, $src1$$Register, $src2$$Register);
 8429   %}
 8430   ins_pipe(pipe_class_default);
 8431 %}
 8432 
 8433 instruct umodL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8434   match(Set dst (UModL src1 src2));
 8435   expand %{
 8436     iRegLdst tmp1;
 8437     iRegLdst tmp2;
 8438     udivL_reg_reg(tmp1, src1, src2);
 8439     mulL_reg_reg(tmp2, src2, tmp1);
 8440     subL_reg_reg(dst, src1, tmp2);
 8441   %}
 8442 %}
 8443 
 8444 // Integer Shift Instructions
 8445 
 8446 // Register Shift Left
 8447 
 8448 // Clear all but the lowest #mask bits.
 8449 // Used to normalize shift amounts in registers.
 8450 instruct maskI_reg_imm(iRegIdst dst, iRegIsrc src, uimmI6 mask) %{
 8451   // no match-rule, false predicate
 8452   effect(DEF dst, USE src, USE mask);
 8453   predicate(false);
 8454 
 8455   format %{ "MASK    $dst, $src, $mask \t// clear $mask upper bits" %}
 8456   size(4);
 8457   ins_encode %{
 8458     __ clrldi($dst$$Register, $src$$Register, $mask$$constant);
 8459   %}
 8460   ins_pipe(pipe_class_default);
 8461 %}
 8462 
 8463 instruct lShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8464   // no match-rule, false predicate
 8465   effect(DEF dst, USE src1, USE src2);
 8466   predicate(false);
 8467 
 8468   format %{ "SLW     $dst, $src1, $src2" %}
 8469   size(4);
 8470   ins_encode %{
 8471     __ slw($dst$$Register, $src1$$Register, $src2$$Register);
 8472   %}
 8473   ins_pipe(pipe_class_default);
 8474 %}
 8475 
 8476 instruct lShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8477   match(Set dst (LShiftI src1 src2));
 8478   ins_cost(DEFAULT_COST*2);
 8479   expand %{
 8480     uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
 8481     iRegIdst tmpI;
 8482     maskI_reg_imm(tmpI, src2, mask);
 8483     lShiftI_reg_reg(dst, src1, tmpI);
 8484   %}
 8485 %}
 8486 
 8487 // Register Shift Left Immediate
 8488 instruct lShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
 8489   match(Set dst (LShiftI src1 src2));
 8490 
 8491   format %{ "SLWI    $dst, $src1, ($src2 & 0x1f)" %}
 8492   size(4);
 8493   ins_encode %{
 8494     __ slwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
 8495   %}
 8496   ins_pipe(pipe_class_default);
 8497 %}
 8498 
 8499 // AndI with negpow2-constant + LShiftI
 8500 instruct lShiftI_andI_immInegpow2_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{
 8501   match(Set dst (LShiftI (AndI src1 src2) src3));
 8502   predicate(UseRotateAndMaskInstructionsPPC64);
 8503 
 8504   format %{ "RLWINM  $dst, lShiftI(AndI($src1, $src2), $src3)" %}
 8505   size(4);
 8506   ins_encode %{
 8507     long src3      = $src3$$constant;
 8508     long maskbits  = src3 + log2i_exact(-(juint)$src2$$constant);
 8509     if (maskbits >= 32) {
 8510       __ li($dst$$Register, 0); // addi
 8511     } else {
 8512       __ rlwinm($dst$$Register, $src1$$Register, src3 & 0x1f, 0, (31-maskbits) & 0x1f);
 8513     }
 8514   %}
 8515   ins_pipe(pipe_class_default);
 8516 %}
 8517 
 8518 // RShiftI + AndI with negpow2-constant + LShiftI
 8519 instruct lShiftI_andI_immInegpow2_rShiftI_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{
 8520   match(Set dst (LShiftI (AndI (RShiftI src1 src3) src2) src3));
 8521   predicate(UseRotateAndMaskInstructionsPPC64);
 8522 
 8523   format %{ "RLWINM  $dst, lShiftI(AndI(RShiftI($src1, $src3), $src2), $src3)" %}
 8524   size(4);
 8525   ins_encode %{
 8526     long src3      = $src3$$constant;
 8527     long maskbits  = src3 + log2i_exact(-(juint)$src2$$constant);
 8528     if (maskbits >= 32) {
 8529       __ li($dst$$Register, 0); // addi
 8530     } else {
 8531       __ rlwinm($dst$$Register, $src1$$Register, 0, 0, (31-maskbits) & 0x1f);
 8532     }
 8533   %}
 8534   ins_pipe(pipe_class_default);
 8535 %}
 8536 
 8537 instruct lShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 8538   // no match-rule, false predicate
 8539   effect(DEF dst, USE src1, USE src2);
 8540   predicate(false);
 8541 
 8542   format %{ "SLD     $dst, $src1, $src2" %}
 8543   size(4);
 8544   ins_encode %{
 8545     __ sld($dst$$Register, $src1$$Register, $src2$$Register);
 8546   %}
 8547   ins_pipe(pipe_class_default);
 8548 %}
 8549 
 8550 // Register Shift Left
 8551 instruct lShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 8552   match(Set dst (LShiftL src1 src2));
 8553   ins_cost(DEFAULT_COST*2);
 8554   expand %{
 8555     uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
 8556     iRegIdst tmpI;
 8557     maskI_reg_imm(tmpI, src2, mask);
 8558     lShiftL_regL_regI(dst, src1, tmpI);
 8559   %}
 8560 %}
 8561 
 8562 // Register Shift Left Immediate
 8563 instruct lshiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
 8564   match(Set dst (LShiftL src1 src2));
 8565   format %{ "SLDI    $dst, $src1, ($src2 & 0x3f)" %}
 8566   size(4);
 8567   ins_encode %{
 8568     __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 8569   %}
 8570   ins_pipe(pipe_class_default);
 8571 %}
 8572 
 8573 // If we shift more than 32 bits, we need not convert I2L.
 8574 instruct lShiftL_regI_immGE32(iRegLdst dst, iRegIsrc src1, uimmI6_ge32 src2) %{
 8575   match(Set dst (LShiftL (ConvI2L src1) src2));
 8576   ins_cost(DEFAULT_COST);
 8577 
 8578   size(4);
 8579   format %{ "SLDI    $dst, i2l($src1), $src2" %}
 8580   ins_encode %{
 8581     __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 8582   %}
 8583   ins_pipe(pipe_class_default);
 8584 %}
 8585 
 8586 // Shift a postivie int to the left.
 8587 // Clrlsldi clears the upper 32 bits and shifts.
 8588 instruct scaledPositiveI2L_lShiftL_convI2L_reg_imm6(iRegLdst dst, iRegIsrc src1, uimmI6 src2) %{
 8589   match(Set dst (LShiftL (ConvI2L src1) src2));
 8590   predicate(((ConvI2LNode*)(_kids[0]->_leaf))->type()->is_long()->is_positive_int());
 8591 
 8592   format %{ "SLDI    $dst, i2l(positive_int($src1)), $src2" %}
 8593   size(4);
 8594   ins_encode %{
 8595     __ clrlsldi($dst$$Register, $src1$$Register, 0x20, $src2$$constant);
 8596   %}
 8597   ins_pipe(pipe_class_default);
 8598 %}
 8599 
 8600 instruct arShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8601   // no match-rule, false predicate
 8602   effect(DEF dst, USE src1, USE src2);
 8603   predicate(false);
 8604 
 8605   format %{ "SRAW    $dst, $src1, $src2" %}
 8606   size(4);
 8607   ins_encode %{
 8608     __ sraw($dst$$Register, $src1$$Register, $src2$$Register);
 8609   %}
 8610   ins_pipe(pipe_class_default);
 8611 %}
 8612 
 8613 // Register Arithmetic Shift Right
 8614 instruct arShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8615   match(Set dst (RShiftI src1 src2));
 8616   ins_cost(DEFAULT_COST*2);
 8617   expand %{
 8618     uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
 8619     iRegIdst tmpI;
 8620     maskI_reg_imm(tmpI, src2, mask);
 8621     arShiftI_reg_reg(dst, src1, tmpI);
 8622   %}
 8623 %}
 8624 
 8625 // Register Arithmetic Shift Right Immediate
 8626 instruct arShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
 8627   match(Set dst (RShiftI src1 src2));
 8628 
 8629   format %{ "SRAWI   $dst, $src1, ($src2 & 0x1f)" %}
 8630   size(4);
 8631   ins_encode %{
 8632     __ srawi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
 8633   %}
 8634   ins_pipe(pipe_class_default);
 8635 %}
 8636 
 8637 instruct arShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 8638   // no match-rule, false predicate
 8639   effect(DEF dst, USE src1, USE src2);
 8640   predicate(false);
 8641 
 8642   format %{ "SRAD    $dst, $src1, $src2" %}
 8643   size(4);
 8644   ins_encode %{
 8645     __ srad($dst$$Register, $src1$$Register, $src2$$Register);
 8646   %}
 8647   ins_pipe(pipe_class_default);
 8648 %}
 8649 
 8650 // Register Shift Right Arithmetic Long
 8651 instruct arShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 8652   match(Set dst (RShiftL src1 src2));
 8653   ins_cost(DEFAULT_COST*2);
 8654 
 8655   expand %{
 8656     uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
 8657     iRegIdst tmpI;
 8658     maskI_reg_imm(tmpI, src2, mask);
 8659     arShiftL_regL_regI(dst, src1, tmpI);
 8660   %}
 8661 %}
 8662 
 8663 // Register Shift Right Immediate
 8664 instruct arShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
 8665   match(Set dst (RShiftL src1 src2));
 8666 
 8667   format %{ "SRADI   $dst, $src1, ($src2 & 0x3f)" %}
 8668   size(4);
 8669   ins_encode %{
 8670     __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 8671   %}
 8672   ins_pipe(pipe_class_default);
 8673 %}
 8674 
 8675 // RShiftL + ConvL2I
 8676 instruct convL2I_arShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{
 8677   match(Set dst (ConvL2I (RShiftL src1 src2)));
 8678 
 8679   format %{ "SRADI   $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %}
 8680   size(4);
 8681   ins_encode %{
 8682     __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 8683   %}
 8684   ins_pipe(pipe_class_default);
 8685 %}
 8686 
 8687 instruct urShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8688   // no match-rule, false predicate
 8689   effect(DEF dst, USE src1, USE src2);
 8690   predicate(false);
 8691 
 8692   format %{ "SRW     $dst, $src1, $src2" %}
 8693   size(4);
 8694   ins_encode %{
 8695     __ srw($dst$$Register, $src1$$Register, $src2$$Register);
 8696   %}
 8697   ins_pipe(pipe_class_default);
 8698 %}
 8699 
 8700 // Register Shift Right
 8701 instruct urShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8702   match(Set dst (URShiftI src1 src2));
 8703   ins_cost(DEFAULT_COST*2);
 8704 
 8705   expand %{
 8706     uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
 8707     iRegIdst tmpI;
 8708     maskI_reg_imm(tmpI, src2, mask);
 8709     urShiftI_reg_reg(dst, src1, tmpI);
 8710   %}
 8711 %}
 8712 
 8713 // Register Shift Right Immediate
 8714 instruct urShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
 8715   match(Set dst (URShiftI src1 src2));
 8716 
 8717   format %{ "SRWI    $dst, $src1, ($src2 & 0x1f)" %}
 8718   size(4);
 8719   ins_encode %{
 8720     __ srwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
 8721   %}
 8722   ins_pipe(pipe_class_default);
 8723 %}
 8724 
 8725 instruct urShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 8726   // no match-rule, false predicate
 8727   effect(DEF dst, USE src1, USE src2);
 8728   predicate(false);
 8729 
 8730   format %{ "SRD     $dst, $src1, $src2" %}
 8731   size(4);
 8732   ins_encode %{
 8733     __ srd($dst$$Register, $src1$$Register, $src2$$Register);
 8734   %}
 8735   ins_pipe(pipe_class_default);
 8736 %}
 8737 
 8738 // Register Shift Right
 8739 instruct urShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 8740   match(Set dst (URShiftL src1 src2));
 8741   ins_cost(DEFAULT_COST*2);
 8742 
 8743   expand %{
 8744     uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
 8745     iRegIdst tmpI;
 8746     maskI_reg_imm(tmpI, src2, mask);
 8747     urShiftL_regL_regI(dst, src1, tmpI);
 8748   %}
 8749 %}
 8750 
 8751 // Register Shift Right Immediate
 8752 instruct urShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
 8753   match(Set dst (URShiftL src1 src2));
 8754 
 8755   format %{ "SRDI    $dst, $src1, ($src2 & 0x3f)" %}
 8756   size(4);
 8757   ins_encode %{
 8758     __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 8759   %}
 8760   ins_pipe(pipe_class_default);
 8761 %}
 8762 
 8763 // URShiftL + ConvL2I.
 8764 instruct convL2I_urShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{
 8765   match(Set dst (ConvL2I (URShiftL src1 src2)));
 8766 
 8767   format %{ "SRDI    $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %}
 8768   size(4);
 8769   ins_encode %{
 8770     __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 8771   %}
 8772   ins_pipe(pipe_class_default);
 8773 %}
 8774 
 8775 // Register Shift Right Immediate with a CastP2X
 8776 instruct shrP_convP2X_reg_imm6(iRegLdst dst, iRegP_N2P src1, uimmI6 src2) %{
 8777   match(Set dst (URShiftL (CastP2X src1) src2));
 8778 
 8779   format %{ "SRDI    $dst, $src1, $src2 \t// Cast ptr $src1 to long and shift" %}
 8780   size(4);
 8781   ins_encode %{
 8782     __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 8783   %}
 8784   ins_pipe(pipe_class_default);
 8785 %}
 8786 
 8787 // Bitfield Extract: URShiftI + AndI
 8788 instruct andI_urShiftI_regI_immI_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immI src2, immIpow2minus1 src3) %{
 8789   match(Set dst (AndI (URShiftI src1 src2) src3));
 8790 
 8791   format %{ "EXTRDI  $dst, $src1, shift=$src2, mask=$src3 \t// int bitfield extract" %}
 8792   size(4);
 8793   ins_encode %{
 8794     int rshift = ($src2$$constant) & 0x1f;
 8795     int length = log2i_exact((juint)$src3$$constant + 1u);
 8796     if (rshift + length > 32) {
 8797       // if necessary, adjust mask to omit rotated bits.
 8798       length = 32 - rshift;
 8799     }
 8800     __ extrdi($dst$$Register, $src1$$Register, length, 64 - (rshift + length));
 8801   %}
 8802   ins_pipe(pipe_class_default);
 8803 %}
 8804 
 8805 // Bitfield Extract: URShiftL + AndL
 8806 instruct andL_urShiftL_regL_immI_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immI src2, immLpow2minus1 src3) %{
 8807   match(Set dst (AndL (URShiftL src1 src2) src3));
 8808 
 8809   format %{ "EXTRDI  $dst, $src1, shift=$src2, mask=$src3 \t// long bitfield extract" %}
 8810   size(4);
 8811   ins_encode %{
 8812     int rshift  = ($src2$$constant) & 0x3f;
 8813     int length = log2i_exact((julong)$src3$$constant + 1ull);
 8814     if (rshift + length > 64) {
 8815       // if necessary, adjust mask to omit rotated bits.
 8816       length = 64 - rshift;
 8817     }
 8818     __ extrdi($dst$$Register, $src1$$Register, length, 64 - (rshift + length));
 8819   %}
 8820   ins_pipe(pipe_class_default);
 8821 %}
 8822 
 8823 instruct sxtI_reg(iRegIdst dst, iRegIsrc src) %{
 8824   match(Set dst (ConvL2I (ConvI2L src)));
 8825 
 8826   format %{ "EXTSW   $dst, $src \t// int->int" %}
 8827   size(4);
 8828   ins_encode %{
 8829     __ extsw($dst$$Register, $src$$Register);
 8830   %}
 8831   ins_pipe(pipe_class_default);
 8832 %}
 8833 
 8834 //----------Rotate Instructions------------------------------------------------
 8835 
 8836 // Rotate Left by 8-bit immediate
 8837 instruct rotlI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 lshift, immI8 rshift) %{
 8838   match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift)));
 8839   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8840 
 8841   format %{ "ROTLWI  $dst, $src, $lshift" %}
 8842   size(4);
 8843   ins_encode %{
 8844     __ rotlwi($dst$$Register, $src$$Register, $lshift$$constant);
 8845   %}
 8846   ins_pipe(pipe_class_default);
 8847 %}
 8848 
 8849 // Rotate Right by 8-bit immediate
 8850 instruct rotrI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 rshift, immI8 lshift) %{
 8851   match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
 8852   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8853 
 8854   format %{ "ROTRWI  $dst, $rshift" %}
 8855   size(4);
 8856   ins_encode %{
 8857     __ rotrwi($dst$$Register, $src$$Register, $rshift$$constant);
 8858   %}
 8859   ins_pipe(pipe_class_default);
 8860 %}
 8861 
 8862 //----------Floating Point Arithmetic Instructions-----------------------------
 8863 
 8864 // Add float single precision
 8865 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
 8866   match(Set dst (AddF src1 src2));
 8867 
 8868   format %{ "FADDS   $dst, $src1, $src2" %}
 8869   size(4);
 8870   ins_encode %{
 8871     __ fadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 8872   %}
 8873   ins_pipe(pipe_class_default);
 8874 %}
 8875 
 8876 // Add float double precision
 8877 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
 8878   match(Set dst (AddD src1 src2));
 8879 
 8880   format %{ "FADD    $dst, $src1, $src2" %}
 8881   size(4);
 8882   ins_encode %{
 8883     __ fadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 8884   %}
 8885   ins_pipe(pipe_class_default);
 8886 %}
 8887 
 8888 // Sub float single precision
 8889 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
 8890   match(Set dst (SubF src1 src2));
 8891 
 8892   format %{ "FSUBS   $dst, $src1, $src2" %}
 8893   size(4);
 8894   ins_encode %{
 8895     __ fsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 8896   %}
 8897   ins_pipe(pipe_class_default);
 8898 %}
 8899 
 8900 // Sub float double precision
 8901 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
 8902   match(Set dst (SubD src1 src2));
 8903   format %{ "FSUB    $dst, $src1, $src2" %}
 8904   size(4);
 8905   ins_encode %{
 8906     __ fsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 8907   %}
 8908   ins_pipe(pipe_class_default);
 8909 %}
 8910 
 8911 // Mul float single precision
 8912 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
 8913   match(Set dst (MulF src1 src2));
 8914   format %{ "FMULS   $dst, $src1, $src2" %}
 8915   size(4);
 8916   ins_encode %{
 8917     __ fmuls($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 8918   %}
 8919   ins_pipe(pipe_class_default);
 8920 %}
 8921 
 8922 // Mul float double precision
 8923 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
 8924   match(Set dst (MulD src1 src2));
 8925   format %{ "FMUL    $dst, $src1, $src2" %}
 8926   size(4);
 8927   ins_encode %{
 8928     __ fmul($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 8929   %}
 8930   ins_pipe(pipe_class_default);
 8931 %}
 8932 
 8933 // Div float single precision
 8934 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
 8935   match(Set dst (DivF src1 src2));
 8936   format %{ "FDIVS   $dst, $src1, $src2" %}
 8937   size(4);
 8938   ins_encode %{
 8939     __ fdivs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 8940   %}
 8941   ins_pipe(pipe_class_default);
 8942 %}
 8943 
 8944 // Div float double precision
 8945 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
 8946   match(Set dst (DivD src1 src2));
 8947   format %{ "FDIV    $dst, $src1, $src2" %}
 8948   size(4);
 8949   ins_encode %{
 8950     __ fdiv($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 8951   %}
 8952   ins_pipe(pipe_class_default);
 8953 %}
 8954 
 8955 // Absolute float single precision
 8956 instruct absF_reg(regF dst, regF src) %{
 8957   match(Set dst (AbsF src));
 8958   format %{ "FABS    $dst, $src \t// float" %}
 8959   size(4);
 8960   ins_encode %{
 8961     __ fabs($dst$$FloatRegister, $src$$FloatRegister);
 8962   %}
 8963   ins_pipe(pipe_class_default);
 8964 %}
 8965 
 8966 // Absolute float double precision
 8967 instruct absD_reg(regD dst, regD src) %{
 8968   match(Set dst (AbsD src));
 8969   format %{ "FABS    $dst, $src \t// double" %}
 8970   size(4);
 8971   ins_encode %{
 8972     __ fabs($dst$$FloatRegister, $src$$FloatRegister);
 8973   %}
 8974   ins_pipe(pipe_class_default);
 8975 %}
 8976 
 8977 instruct negF_reg(regF dst, regF src) %{
 8978   match(Set dst (NegF src));
 8979   format %{ "FNEG    $dst, $src \t// float" %}
 8980   size(4);
 8981   ins_encode %{
 8982     __ fneg($dst$$FloatRegister, $src$$FloatRegister);
 8983   %}
 8984   ins_pipe(pipe_class_default);
 8985 %}
 8986 
 8987 instruct negD_reg(regD dst, regD src) %{
 8988   match(Set dst (NegD src));
 8989   format %{ "FNEG    $dst, $src \t// double" %}
 8990   size(4);
 8991   ins_encode %{
 8992     __ fneg($dst$$FloatRegister, $src$$FloatRegister);
 8993   %}
 8994   ins_pipe(pipe_class_default);
 8995 %}
 8996 
 8997 // AbsF + NegF.
 8998 instruct negF_absF_reg(regF dst, regF src) %{
 8999   match(Set dst (NegF (AbsF src)));
 9000   format %{ "FNABS   $dst, $src \t// float" %}
 9001   size(4);
 9002   ins_encode %{
 9003     __ fnabs($dst$$FloatRegister, $src$$FloatRegister);
 9004   %}
 9005   ins_pipe(pipe_class_default);
 9006 %}
 9007 
 9008 // AbsD + NegD.
 9009 instruct negD_absD_reg(regD dst, regD src) %{
 9010   match(Set dst (NegD (AbsD src)));
 9011   format %{ "FNABS   $dst, $src \t// double" %}
 9012   size(4);
 9013   ins_encode %{
 9014     __ fnabs($dst$$FloatRegister, $src$$FloatRegister);
 9015   %}
 9016   ins_pipe(pipe_class_default);
 9017 %}
 9018 
 9019 // Sqrt float double precision
 9020 instruct sqrtD_reg(regD dst, regD src) %{
 9021   match(Set dst (SqrtD src));
 9022   format %{ "FSQRT   $dst, $src" %}
 9023   size(4);
 9024   ins_encode %{
 9025     __ fsqrt($dst$$FloatRegister, $src$$FloatRegister);
 9026   %}
 9027   ins_pipe(pipe_class_default);
 9028 %}
 9029 
 9030 // Single-precision sqrt.
 9031 instruct sqrtF_reg(regF dst, regF src) %{
 9032   match(Set dst (SqrtF src));
 9033   ins_cost(DEFAULT_COST);
 9034 
 9035   format %{ "FSQRTS  $dst, $src" %}
 9036   size(4);
 9037   ins_encode %{
 9038     __ fsqrts($dst$$FloatRegister, $src$$FloatRegister);
 9039   %}
 9040   ins_pipe(pipe_class_default);
 9041 %}
 9042 
 9043 
 9044 // Multiply-Accumulate
 9045 // src1 * src2 + src3
 9046 instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9047   match(Set dst (FmaF src3 (Binary src1 src2)));
 9048 
 9049   format %{ "FMADDS  $dst, $src1, $src2, $src3" %}
 9050   size(4);
 9051   ins_encode %{
 9052     assert(UseFMA, "Needs FMA instructions support.");
 9053     __ fmadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9054   %}
 9055   ins_pipe(pipe_class_default);
 9056 %}
 9057 
 9058 // src1 * src2 + src3
 9059 instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9060   match(Set dst (FmaD src3 (Binary src1 src2)));
 9061 
 9062   format %{ "FMADD   $dst, $src1, $src2, $src3" %}
 9063   size(4);
 9064   ins_encode %{
 9065     assert(UseFMA, "Needs FMA instructions support.");
 9066     __ fmadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9067   %}
 9068   ins_pipe(pipe_class_default);
 9069 %}
 9070 
 9071 // src1 * (-src2) + src3 = -(src1*src2-src3)
 9072 // "(-src1) * src2 + src3" has been idealized to "src2 * (-src1) + src3"
 9073 instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9074   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
 9075 
 9076   format %{ "FNMSUBS $dst, $src1, $src2, $src3" %}
 9077   size(4);
 9078   ins_encode %{
 9079     assert(UseFMA, "Needs FMA instructions support.");
 9080     __ fnmsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9081   %}
 9082   ins_pipe(pipe_class_default);
 9083 %}
 9084 
 9085 // src1 * (-src2) + src3 = -(src1*src2-src3)
 9086 // "(-src1) * src2 + src3" has been idealized to "src2 * (-src1) + src3"
 9087 instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9088   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
 9089 
 9090   format %{ "FNMSUB  $dst, $src1, $src2, $src3" %}
 9091   size(4);
 9092   ins_encode %{
 9093     assert(UseFMA, "Needs FMA instructions support.");
 9094     __ fnmsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9095   %}
 9096   ins_pipe(pipe_class_default);
 9097 %}
 9098 
 9099 // src1 * (-src2) - src3 = -(src1*src2+src3)
 9100 // "(-src1) * src2 - src3" has been idealized to "src2 * (-src1) - src3"
 9101 instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9102   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
 9103 
 9104   format %{ "FNMADDS $dst, $src1, $src2, $src3" %}
 9105   size(4);
 9106   ins_encode %{
 9107     assert(UseFMA, "Needs FMA instructions support.");
 9108     __ fnmadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9109   %}
 9110   ins_pipe(pipe_class_default);
 9111 %}
 9112 
 9113 // src1 * (-src2) - src3 = -(src1*src2+src3)
 9114 // "(-src1) * src2 - src3" has been idealized to "src2 * (-src1) - src3"
 9115 instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9116   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
 9117 
 9118   format %{ "FNMADD  $dst, $src1, $src2, $src3" %}
 9119   size(4);
 9120   ins_encode %{
 9121     assert(UseFMA, "Needs FMA instructions support.");
 9122     __ fnmadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9123   %}
 9124   ins_pipe(pipe_class_default);
 9125 %}
 9126 
 9127 // src1 * src2 - src3
 9128 instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9129   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
 9130 
 9131   format %{ "FMSUBS  $dst, $src1, $src2, $src3" %}
 9132   size(4);
 9133   ins_encode %{
 9134     assert(UseFMA, "Needs FMA instructions support.");
 9135     __ fmsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9136   %}
 9137   ins_pipe(pipe_class_default);
 9138 %}
 9139 
 9140 // src1 * src2 - src3
 9141 instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9142   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
 9143 
 9144   format %{ "FMSUB   $dst, $src1, $src2, $src3" %}
 9145   size(4);
 9146   ins_encode %{
 9147     assert(UseFMA, "Needs FMA instructions support.");
 9148     __ fmsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9149   %}
 9150   ins_pipe(pipe_class_default);
 9151 %}
 9152 
 9153 
 9154 //----------Logical Instructions-----------------------------------------------
 9155 
 9156 // And Instructions
 9157 
 9158 // Register And
 9159 instruct andI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9160   match(Set dst (AndI src1 src2));
 9161   format %{ "AND     $dst, $src1, $src2" %}
 9162   size(4);
 9163   ins_encode %{
 9164     __ andr($dst$$Register, $src1$$Register, $src2$$Register);
 9165   %}
 9166   ins_pipe(pipe_class_default);
 9167 %}
 9168 
 9169 // Left shifted Immediate And
 9170 instruct andI_reg_immIhi16(iRegIdst dst, iRegIsrc src1, immIhi16  src2, flagsRegCR0 cr0) %{
 9171   match(Set dst (AndI src1 src2));
 9172   effect(KILL cr0);
 9173   format %{ "ANDIS   $dst, $src1, $src2.hi" %}
 9174   size(4);
 9175   ins_encode %{
 9176     __ andis_($dst$$Register, $src1$$Register, (int)((unsigned short)(($src2$$constant & 0xFFFF0000) >> 16)));
 9177   %}
 9178   ins_pipe(pipe_class_default);
 9179 %}
 9180 
 9181 // Immediate And
 9182 instruct andI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2, flagsRegCR0 cr0) %{
 9183   match(Set dst (AndI src1 src2));
 9184   effect(KILL cr0);
 9185 
 9186   format %{ "ANDI    $dst, $src1, $src2" %}
 9187   size(4);
 9188   ins_encode %{
 9189     // FIXME: avoid andi_ ?
 9190     __ andi_($dst$$Register, $src1$$Register, $src2$$constant);
 9191   %}
 9192   ins_pipe(pipe_class_default);
 9193 %}
 9194 
 9195 // Immediate And where the immediate is a negative power of 2.
 9196 instruct andI_reg_immInegpow2(iRegIdst dst, iRegIsrc src1, immInegpow2 src2) %{
 9197   match(Set dst (AndI src1 src2));
 9198   format %{ "ANDWI   $dst, $src1, $src2" %}
 9199   size(4);
 9200   ins_encode %{
 9201     __ clrrdi($dst$$Register, $src1$$Register, log2i_exact(-(juint)$src2$$constant));
 9202   %}
 9203   ins_pipe(pipe_class_default);
 9204 %}
 9205 
 9206 instruct andI_reg_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immIpow2minus1 src2) %{
 9207   match(Set dst (AndI src1 src2));
 9208   format %{ "ANDWI   $dst, $src1, $src2" %}
 9209   size(4);
 9210   ins_encode %{
 9211     __ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((juint)$src2$$constant + 1u));
 9212   %}
 9213   ins_pipe(pipe_class_default);
 9214 %}
 9215 
 9216 instruct andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src1, immIpowerOf2 src2) %{
 9217   match(Set dst (AndI src1 src2));
 9218   predicate(UseRotateAndMaskInstructionsPPC64);
 9219   format %{ "ANDWI   $dst, $src1, $src2" %}
 9220   size(4);
 9221   ins_encode %{
 9222     int bitpos = 31 - log2i_exact((juint)$src2$$constant);
 9223     __ rlwinm($dst$$Register, $src1$$Register, 0, bitpos, bitpos);
 9224   %}
 9225   ins_pipe(pipe_class_default);
 9226 %}
 9227 
 9228 // Register And Long
 9229 instruct andL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9230   match(Set dst (AndL src1 src2));
 9231   ins_cost(DEFAULT_COST);
 9232 
 9233   format %{ "AND     $dst, $src1, $src2 \t// long" %}
 9234   size(4);
 9235   ins_encode %{
 9236     __ andr($dst$$Register, $src1$$Register, $src2$$Register);
 9237   %}
 9238   ins_pipe(pipe_class_default);
 9239 %}
 9240 
 9241 // Immediate And long
 9242 instruct andL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2, flagsRegCR0 cr0) %{
 9243   match(Set dst (AndL src1 src2));
 9244   effect(KILL cr0);
 9245 
 9246   format %{ "ANDI    $dst, $src1, $src2 \t// long" %}
 9247   size(4);
 9248   ins_encode %{
 9249     // FIXME: avoid andi_ ?
 9250     __ andi_($dst$$Register, $src1$$Register, $src2$$constant);
 9251   %}
 9252   ins_pipe(pipe_class_default);
 9253 %}
 9254 
 9255 // Immediate And Long where the immediate is a negative power of 2.
 9256 instruct andL_reg_immLnegpow2(iRegLdst dst, iRegLsrc src1, immLnegpow2 src2) %{
 9257   match(Set dst (AndL src1 src2));
 9258   format %{ "ANDDI   $dst, $src1, $src2" %}
 9259   size(4);
 9260   ins_encode %{
 9261     __ clrrdi($dst$$Register, $src1$$Register, log2i_exact(-(julong)$src2$$constant));
 9262   %}
 9263   ins_pipe(pipe_class_default);
 9264 %}
 9265 
 9266 instruct andL_reg_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immLpow2minus1 src2) %{
 9267   match(Set dst (AndL src1 src2));
 9268   format %{ "ANDDI   $dst, $src1, $src2" %}
 9269   size(4);
 9270   ins_encode %{
 9271     __ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((julong)$src2$$constant + 1ull));
 9272   %}
 9273   ins_pipe(pipe_class_default);
 9274 %}
 9275 
 9276 // AndL + ConvL2I.
 9277 instruct convL2I_andL_reg_immLpow2minus1(iRegIdst dst, iRegLsrc src1, immLpow2minus1 src2) %{
 9278   match(Set dst (ConvL2I (AndL src1 src2)));
 9279   ins_cost(DEFAULT_COST);
 9280 
 9281   format %{ "ANDDI   $dst, $src1, $src2 \t// long + l2i" %}
 9282   size(4);
 9283   ins_encode %{
 9284     __ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((julong)$src2$$constant + 1ull));
 9285   %}
 9286   ins_pipe(pipe_class_default);
 9287 %}
 9288 
 9289 // Or Instructions
 9290 
 9291 // Register Or
 9292 instruct orI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9293   match(Set dst (OrI src1 src2));
 9294   format %{ "OR      $dst, $src1, $src2" %}
 9295   size(4);
 9296   ins_encode %{
 9297     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9298   %}
 9299   ins_pipe(pipe_class_default);
 9300 %}
 9301 
 9302 // Expand does not work with above instruct. (??)
 9303 instruct orI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9304   // no match-rule
 9305   effect(DEF dst, USE src1, USE src2);
 9306   format %{ "OR      $dst, $src1, $src2" %}
 9307   size(4);
 9308   ins_encode %{
 9309     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9310   %}
 9311   ins_pipe(pipe_class_default);
 9312 %}
 9313 
 9314 instruct tree_orI_orI_orI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
 9315   match(Set dst (OrI (OrI (OrI src1 src2) src3) src4));
 9316   ins_cost(DEFAULT_COST*3);
 9317 
 9318   expand %{
 9319     // FIXME: we should do this in the ideal world.
 9320     iRegIdst tmp1;
 9321     iRegIdst tmp2;
 9322     orI_reg_reg(tmp1, src1, src2);
 9323     orI_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg.
 9324     orI_reg_reg(dst, tmp1, tmp2);
 9325   %}
 9326 %}
 9327 
 9328 // Immediate Or
 9329 instruct orI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{
 9330   match(Set dst (OrI src1 src2));
 9331   format %{ "ORI     $dst, $src1, $src2" %}
 9332   size(4);
 9333   ins_encode %{
 9334     __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
 9335   %}
 9336   ins_pipe(pipe_class_default);
 9337 %}
 9338 
 9339 // Register Or Long
 9340 instruct orL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9341   match(Set dst (OrL src1 src2));
 9342   ins_cost(DEFAULT_COST);
 9343 
 9344   size(4);
 9345   format %{ "OR      $dst, $src1, $src2 \t// long" %}
 9346   ins_encode %{
 9347     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9348   %}
 9349   ins_pipe(pipe_class_default);
 9350 %}
 9351 
 9352 // OrL + ConvL2I.
 9353 instruct orI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9354   match(Set dst (ConvL2I (OrL src1 src2)));
 9355   ins_cost(DEFAULT_COST);
 9356 
 9357   format %{ "OR      $dst, $src1, $src2 \t// long + l2i" %}
 9358   size(4);
 9359   ins_encode %{
 9360     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9361   %}
 9362   ins_pipe(pipe_class_default);
 9363 %}
 9364 
 9365 // Immediate Or long
 9366 instruct orL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 con) %{
 9367   match(Set dst (OrL src1 con));
 9368   ins_cost(DEFAULT_COST);
 9369 
 9370   format %{ "ORI     $dst, $src1, $con \t// long" %}
 9371   size(4);
 9372   ins_encode %{
 9373     __ ori($dst$$Register, $src1$$Register, ($con$$constant) & 0xFFFF);
 9374   %}
 9375   ins_pipe(pipe_class_default);
 9376 %}
 9377 
 9378 // Xor Instructions
 9379 
 9380 // Register Xor
 9381 instruct xorI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9382   match(Set dst (XorI src1 src2));
 9383   format %{ "XOR     $dst, $src1, $src2" %}
 9384   size(4);
 9385   ins_encode %{
 9386     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
 9387   %}
 9388   ins_pipe(pipe_class_default);
 9389 %}
 9390 
 9391 // Expand does not work with above instruct. (??)
 9392 instruct xorI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9393   // no match-rule
 9394   effect(DEF dst, USE src1, USE src2);
 9395   format %{ "XOR     $dst, $src1, $src2" %}
 9396   size(4);
 9397   ins_encode %{
 9398     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
 9399   %}
 9400   ins_pipe(pipe_class_default);
 9401 %}
 9402 
 9403 instruct tree_xorI_xorI_xorI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
 9404   match(Set dst (XorI (XorI (XorI src1 src2) src3) src4));
 9405   ins_cost(DEFAULT_COST*3);
 9406 
 9407   expand %{
 9408     // FIXME: we should do this in the ideal world.
 9409     iRegIdst tmp1;
 9410     iRegIdst tmp2;
 9411     xorI_reg_reg(tmp1, src1, src2);
 9412     xorI_reg_reg_2(tmp2, src3, src4); // Adlc complains about xorI_reg_reg.
 9413     xorI_reg_reg(dst, tmp1, tmp2);
 9414   %}
 9415 %}
 9416 
 9417 // Immediate Xor
 9418 instruct xorI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{
 9419   match(Set dst (XorI src1 src2));
 9420   format %{ "XORI    $dst, $src1, $src2" %}
 9421   size(4);
 9422   ins_encode %{
 9423     __ xori($dst$$Register, $src1$$Register, $src2$$constant);
 9424   %}
 9425   ins_pipe(pipe_class_default);
 9426 %}
 9427 
 9428 // Register Xor Long
 9429 instruct xorL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9430   match(Set dst (XorL src1 src2));
 9431   ins_cost(DEFAULT_COST);
 9432 
 9433   format %{ "XOR     $dst, $src1, $src2 \t// long" %}
 9434   size(4);
 9435   ins_encode %{
 9436     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
 9437   %}
 9438   ins_pipe(pipe_class_default);
 9439 %}
 9440 
 9441 // XorL + ConvL2I.
 9442 instruct xorI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9443   match(Set dst (ConvL2I (XorL src1 src2)));
 9444   ins_cost(DEFAULT_COST);
 9445 
 9446   format %{ "XOR     $dst, $src1, $src2 \t// long + l2i" %}
 9447   size(4);
 9448   ins_encode %{
 9449     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
 9450   %}
 9451   ins_pipe(pipe_class_default);
 9452 %}
 9453 
 9454 // Immediate Xor Long
 9455 instruct xorL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2) %{
 9456   match(Set dst (XorL src1 src2));
 9457   ins_cost(DEFAULT_COST);
 9458 
 9459   format %{ "XORI    $dst, $src1, $src2 \t// long" %}
 9460   size(4);
 9461   ins_encode %{
 9462     __ xori($dst$$Register, $src1$$Register, $src2$$constant);
 9463   %}
 9464   ins_pipe(pipe_class_default);
 9465 %}
 9466 
 9467 instruct notI_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{
 9468   match(Set dst (XorI src1 src2));
 9469   ins_cost(DEFAULT_COST);
 9470 
 9471   format %{ "NOT     $dst, $src1 ($src2)" %}
 9472   size(4);
 9473   ins_encode %{
 9474     __ nor($dst$$Register, $src1$$Register, $src1$$Register);
 9475   %}
 9476   ins_pipe(pipe_class_default);
 9477 %}
 9478 
 9479 instruct notL_reg(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{
 9480   match(Set dst (XorL src1 src2));
 9481   ins_cost(DEFAULT_COST);
 9482 
 9483   format %{ "NOT     $dst, $src1 ($src2) \t// long" %}
 9484   size(4);
 9485   ins_encode %{
 9486     __ nor($dst$$Register, $src1$$Register, $src1$$Register);
 9487   %}
 9488   ins_pipe(pipe_class_default);
 9489 %}
 9490 
 9491 // And-complement
 9492 instruct andcI_reg_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2, iRegIsrc src3) %{
 9493   match(Set dst (AndI (XorI src1 src2) src3));
 9494   ins_cost(DEFAULT_COST);
 9495 
 9496   format %{ "ANDW    $dst, xori($src1, $src2), $src3" %}
 9497   size(4);
 9498   ins_encode( enc_andc(dst, src3, src1) );
 9499   ins_pipe(pipe_class_default);
 9500 %}
 9501 
 9502 // And-complement
 9503 instruct andcL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9504   // no match-rule, false predicate
 9505   effect(DEF dst, USE src1, USE src2);
 9506   predicate(false);
 9507 
 9508   format %{ "ANDC    $dst, $src1, $src2" %}
 9509   size(4);
 9510   ins_encode %{
 9511     __ andc($dst$$Register, $src1$$Register, $src2$$Register);
 9512   %}
 9513   ins_pipe(pipe_class_default);
 9514 %}
 9515 
 9516 //----------Moves between int/long and float/double----------------------------
 9517 //
 9518 // The following rules move values from int/long registers/stack-locations
 9519 // to float/double registers/stack-locations and vice versa, without doing any
 9520 // conversions. These rules are used to implement the bit-conversion methods
 9521 // of java.lang.Float etc., e.g.
 9522 //   int   floatToIntBits(float value)
 9523 //   float intBitsToFloat(int bits)
 9524 
 9525 instruct moveL2D_reg(regD dst, iRegLsrc src) %{
 9526   match(Set dst (MoveL2D src));
 9527 
 9528   format %{ "MTFPRD  $dst, $src" %}
 9529   size(4);
 9530   ins_encode %{
 9531     __ mtfprd($dst$$FloatRegister, $src$$Register);
 9532   %}
 9533   ins_pipe(pipe_class_default);
 9534 %}
 9535 
 9536 instruct moveI2D_reg(regD dst, iRegIsrc src) %{
 9537   // no match-rule, false predicate
 9538   effect(DEF dst, USE src);
 9539   predicate(false);
 9540 
 9541   format %{ "MTFPRWA $dst, $src" %}
 9542   size(4);
 9543   ins_encode %{
 9544     __ mtfprwa($dst$$FloatRegister, $src$$Register);
 9545   %}
 9546   ins_pipe(pipe_class_default);
 9547 %}
 9548 
 9549 //---------- Chain stack slots between similar types --------
 9550 
 9551 // These are needed so that the rules below can match.
 9552 
 9553 // Load integer from stack slot
 9554 instruct stkI_to_regI(iRegIdst dst, stackSlotI src) %{
 9555   match(Set dst src);
 9556   ins_cost(MEMORY_REF_COST);
 9557 
 9558   format %{ "LWZ     $dst, $src" %}
 9559   size(4);
 9560   ins_encode( enc_lwz(dst, src) );
 9561   ins_pipe(pipe_class_memory);
 9562 %}
 9563 
 9564 // Store integer to stack slot
 9565 instruct regI_to_stkI(stackSlotI dst, iRegIsrc src) %{
 9566   match(Set dst src);
 9567   ins_cost(MEMORY_REF_COST);
 9568 
 9569   format %{ "STW     $src, $dst \t// stk" %}
 9570   size(4);
 9571   ins_encode( enc_stw(src, dst) ); // rs=rt
 9572   ins_pipe(pipe_class_memory);
 9573 %}
 9574 
 9575 // Load long from stack slot
 9576 instruct stkL_to_regL(iRegLdst dst, stackSlotL src) %{
 9577   match(Set dst src);
 9578   ins_cost(MEMORY_REF_COST);
 9579 
 9580   format %{ "LD      $dst, $src \t// long" %}
 9581   size(4);
 9582   ins_encode( enc_ld(dst, src) );
 9583   ins_pipe(pipe_class_memory);
 9584 %}
 9585 
 9586 // Store long to stack slot
 9587 instruct regL_to_stkL(stackSlotL dst, iRegLsrc src) %{
 9588   match(Set dst src);
 9589   ins_cost(MEMORY_REF_COST);
 9590 
 9591   format %{ "STD     $src, $dst \t// long" %}
 9592   size(4);
 9593   ins_encode( enc_std(src, dst) ); // rs=rt
 9594   ins_pipe(pipe_class_memory);
 9595 %}
 9596 
 9597 //----------Moves between int and float
 9598 
 9599 // Move float value from float stack-location to integer register.
 9600 instruct moveF2I_stack_reg(iRegIdst dst, stackSlotF src) %{
 9601   match(Set dst (MoveF2I src));
 9602   ins_cost(MEMORY_REF_COST);
 9603 
 9604   format %{ "LWZ     $dst, $src \t// MoveF2I" %}
 9605   size(4);
 9606   ins_encode( enc_lwz(dst, src) );
 9607   ins_pipe(pipe_class_memory);
 9608 %}
 9609 
 9610 // Move float value from float register to integer stack-location.
 9611 instruct moveF2I_reg_stack(stackSlotI dst, regF src) %{
 9612   match(Set dst (MoveF2I src));
 9613   ins_cost(MEMORY_REF_COST);
 9614 
 9615   format %{ "STFS    $src, $dst \t// MoveF2I" %}
 9616   size(4);
 9617   ins_encode( enc_stfs(src, dst) );
 9618   ins_pipe(pipe_class_memory);
 9619 %}
 9620 
 9621 // Move integer value from integer stack-location to float register.
 9622 instruct moveI2F_stack_reg(regF dst, stackSlotI src) %{
 9623   match(Set dst (MoveI2F src));
 9624   ins_cost(MEMORY_REF_COST);
 9625 
 9626   format %{ "LFS     $dst, $src \t// MoveI2F" %}
 9627   size(4);
 9628   ins_encode %{
 9629     int Idisp = $src$$disp + frame_slots_bias($src$$base, ra_);
 9630     __ lfs($dst$$FloatRegister, Idisp, $src$$base$$Register);
 9631   %}
 9632   ins_pipe(pipe_class_memory);
 9633 %}
 9634 
 9635 // Move integer value from integer register to float stack-location.
 9636 instruct moveI2F_reg_stack(stackSlotF dst, iRegIsrc src) %{
 9637   match(Set dst (MoveI2F src));
 9638   ins_cost(MEMORY_REF_COST);
 9639 
 9640   format %{ "STW     $src, $dst \t// MoveI2F" %}
 9641   size(4);
 9642   ins_encode( enc_stw(src, dst) );
 9643   ins_pipe(pipe_class_memory);
 9644 %}
 9645 
 9646 
 9647 //----------Moves between long and double
 9648 
 9649 // Move double value from double stack-location to long register.
 9650 instruct moveD2L_stack_reg(iRegLdst dst, stackSlotD src) %{
 9651   match(Set dst (MoveD2L src));
 9652   ins_cost(MEMORY_REF_COST);
 9653   size(4);
 9654   format %{ "LD      $dst, $src \t// MoveD2L" %}
 9655   ins_encode( enc_ld(dst, src) );
 9656   ins_pipe(pipe_class_memory);
 9657 %}
 9658 
 9659 // Move double value from double register to long stack-location.
 9660 instruct moveD2L_reg_stack(stackSlotL dst, regD src) %{
 9661   match(Set dst (MoveD2L src));
 9662   effect(DEF dst, USE src);
 9663   ins_cost(MEMORY_REF_COST);
 9664 
 9665   format %{ "STFD    $src, $dst \t// MoveD2L" %}
 9666   size(4);
 9667   ins_encode( enc_stfd(src, dst) );
 9668   ins_pipe(pipe_class_memory);
 9669 %}
 9670 
 9671 
 9672 //----------Register Move Instructions-----------------------------------------
 9673 
 9674 // Replicate for Superword
 9675 
 9676 instruct moveReg(iRegLdst dst, iRegIsrc src) %{
 9677   predicate(false);
 9678   effect(DEF dst, USE src);
 9679 
 9680   format %{ "MR      $dst, $src \t// replicate " %}
 9681   // variable size, 0 or 4.
 9682   ins_encode %{
 9683     __ mr_if_needed($dst$$Register, $src$$Register);
 9684   %}
 9685   ins_pipe(pipe_class_default);
 9686 %}
 9687 
 9688 //----------Cast instructions (Java-level type cast)---------------------------
 9689 
 9690 // Cast Long to Pointer for unsafe natives.
 9691 instruct castX2P(iRegPdst dst, iRegLsrc src) %{
 9692   match(Set dst (CastX2P src));
 9693 
 9694   format %{ "MR      $dst, $src \t// Long->Ptr" %}
 9695   // variable size, 0 or 4.
 9696   ins_encode %{
 9697     __ mr_if_needed($dst$$Register, $src$$Register);
 9698   %}
 9699  ins_pipe(pipe_class_default);
 9700 %}
 9701 
 9702 // Cast Pointer to Long for unsafe natives.
 9703 instruct castP2X(iRegLdst dst, iRegP_N2P src) %{
 9704   match(Set dst (CastP2X src));
 9705 
 9706   format %{ "MR      $dst, $src \t// Ptr->Long" %}
 9707   // variable size, 0 or 4.
 9708   ins_encode %{
 9709     __ mr_if_needed($dst$$Register, $src$$Register);
 9710   %}
 9711   ins_pipe(pipe_class_default);
 9712 %}
 9713 
 9714 instruct castPP(iRegPdst dst) %{
 9715   match(Set dst (CastPP dst));
 9716   format %{ " -- \t// castPP of $dst" %}
 9717   size(0);
 9718   ins_encode( /*empty*/ );
 9719   ins_pipe(pipe_class_default);
 9720 %}
 9721 
 9722 instruct castII(iRegIdst dst) %{
 9723   match(Set dst (CastII dst));
 9724   format %{ " -- \t// castII of $dst" %}
 9725   size(0);
 9726   ins_encode( /*empty*/ );
 9727   ins_pipe(pipe_class_default);
 9728 %}
 9729 
 9730 instruct castLL(iRegLdst dst) %{
 9731   match(Set dst (CastLL dst));
 9732   format %{ " -- \t// castLL of $dst" %}
 9733   size(0);
 9734   ins_encode( /*empty*/ );
 9735   ins_pipe(pipe_class_default);
 9736 %}
 9737 
 9738 instruct castFF(regF dst) %{
 9739   match(Set dst (CastFF dst));
 9740   format %{ " -- \t// castFF of $dst" %}
 9741   size(0);
 9742   ins_encode( /*empty*/ );
 9743   ins_pipe(pipe_class_default);
 9744 %}
 9745 
 9746 instruct castDD(regD dst) %{
 9747   match(Set dst (CastDD dst));
 9748   format %{ " -- \t// castDD of $dst" %}
 9749   size(0);
 9750   ins_encode( /*empty*/ );
 9751   ins_pipe(pipe_class_default);
 9752 %}
 9753 
 9754 instruct castVV8(iRegLdst dst) %{
 9755   match(Set dst (CastVV dst));
 9756   format %{ " -- \t// castVV of $dst" %}
 9757   size(0);
 9758   ins_encode( /*empty*/ );
 9759   ins_pipe(pipe_class_default);
 9760 %}
 9761 
 9762 instruct castVV16(vecX dst) %{
 9763   match(Set dst (CastVV dst));
 9764   format %{ " -- \t// castVV of $dst" %}
 9765   size(0);
 9766   ins_encode( /*empty*/ );
 9767   ins_pipe(pipe_class_default);
 9768 %}
 9769 
 9770 instruct checkCastPP(iRegPdst dst) %{
 9771   match(Set dst (CheckCastPP dst));
 9772   format %{ " -- \t// checkcastPP of $dst" %}
 9773   size(0);
 9774   ins_encode( /*empty*/ );
 9775   ins_pipe(pipe_class_default);
 9776 %}
 9777 
 9778 //----------Convert instructions-----------------------------------------------
 9779 
 9780 // Convert to boolean.
 9781 
 9782 // int_to_bool(src) : { 1   if src != 0
 9783 //                    { 0   else
 9784 //
 9785 // strategy:
 9786 // 1) Count leading zeros of 32 bit-value src,
 9787 //    this returns 32 (0b10.0000) iff src == 0 and <32 otherwise.
 9788 // 2) Shift 5 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise.
 9789 // 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0.
 9790 
 9791 // convI2Bool
 9792 instruct convI2Bool_reg__cntlz_Ex(iRegIdst dst, iRegIsrc src) %{
 9793   match(Set dst (Conv2B src));
 9794   predicate(UseCountLeadingZerosInstructionsPPC64);
 9795   ins_cost(DEFAULT_COST);
 9796 
 9797   expand %{
 9798     immI shiftAmount %{ 0x5 %}
 9799     uimmI16 mask %{ 0x1 %}
 9800     iRegIdst tmp1;
 9801     iRegIdst tmp2;
 9802     countLeadingZerosI(tmp1, src);
 9803     urShiftI_reg_imm(tmp2, tmp1, shiftAmount);
 9804     xorI_reg_uimm16(dst, tmp2, mask);
 9805   %}
 9806 %}
 9807 
 9808 instruct convI2Bool_reg__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx) %{
 9809   match(Set dst (Conv2B src));
 9810   effect(TEMP crx);
 9811   predicate(!UseCountLeadingZerosInstructionsPPC64);
 9812   ins_cost(DEFAULT_COST);
 9813 
 9814   format %{ "CMPWI   $crx, $src, #0 \t// convI2B"
 9815             "LI      $dst, #0\n\t"
 9816             "BEQ     $crx, done\n\t"
 9817             "LI      $dst, #1\n"
 9818             "done:" %}
 9819   size(16);
 9820   ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x0, 0x1) );
 9821   ins_pipe(pipe_class_compare);
 9822 %}
 9823 
 9824 // ConvI2B + XorI
 9825 instruct xorI_convI2Bool_reg_immIvalue1__cntlz_Ex(iRegIdst dst, iRegIsrc src, immI_1 mask) %{
 9826   match(Set dst (XorI (Conv2B src) mask));
 9827   predicate(UseCountLeadingZerosInstructionsPPC64);
 9828   ins_cost(DEFAULT_COST);
 9829 
 9830   expand %{
 9831     immI shiftAmount %{ 0x5 %}
 9832     iRegIdst tmp1;
 9833     countLeadingZerosI(tmp1, src);
 9834     urShiftI_reg_imm(dst, tmp1, shiftAmount);
 9835   %}
 9836 %}
 9837 
 9838 instruct xorI_convI2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI_1 mask) %{
 9839   match(Set dst (XorI (Conv2B src) mask));
 9840   effect(TEMP crx);
 9841   predicate(!UseCountLeadingZerosInstructionsPPC64);
 9842   ins_cost(DEFAULT_COST);
 9843 
 9844   format %{ "CMPWI   $crx, $src, #0 \t// Xor(convI2B($src), $mask)"
 9845             "LI      $dst, #1\n\t"
 9846             "BEQ     $crx, done\n\t"
 9847             "LI      $dst, #0\n"
 9848             "done:" %}
 9849   size(16);
 9850   ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x1, 0x0) );
 9851   ins_pipe(pipe_class_compare);
 9852 %}
 9853 
 9854 // AndI 0b0..010..0 + ConvI2B
 9855 instruct convI2Bool_andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src, immIpowerOf2 mask) %{
 9856   match(Set dst (Conv2B (AndI src mask)));
 9857   predicate(UseRotateAndMaskInstructionsPPC64);
 9858   ins_cost(DEFAULT_COST);
 9859 
 9860   format %{ "RLWINM  $dst, $src, $mask \t// convI2B(AndI($src, $mask))" %}
 9861   size(4);
 9862   ins_encode %{
 9863     __ rlwinm($dst$$Register, $src$$Register, 32 - log2i_exact((juint)($mask$$constant)), 31, 31);
 9864   %}
 9865   ins_pipe(pipe_class_default);
 9866 %}
 9867 
 9868 // Convert pointer to boolean.
 9869 //
 9870 // ptr_to_bool(src) : { 1   if src != 0
 9871 //                    { 0   else
 9872 //
 9873 // strategy:
 9874 // 1) Count leading zeros of 64 bit-value src,
 9875 //    this returns 64 (0b100.0000) iff src == 0 and <64 otherwise.
 9876 // 2) Shift 6 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise.
 9877 // 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0.
 9878 
 9879 // ConvP2B
 9880 instruct convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src) %{
 9881   match(Set dst (Conv2B src));
 9882   predicate(UseCountLeadingZerosInstructionsPPC64);
 9883   ins_cost(DEFAULT_COST);
 9884 
 9885   expand %{
 9886     immI shiftAmount %{ 0x6 %}
 9887     uimmI16 mask %{ 0x1 %}
 9888     iRegIdst tmp1;
 9889     iRegIdst tmp2;
 9890     countLeadingZerosP(tmp1, src);
 9891     urShiftI_reg_imm(tmp2, tmp1, shiftAmount);
 9892     xorI_reg_uimm16(dst, tmp2, mask);
 9893   %}
 9894 %}
 9895 
 9896 instruct convP2Bool_reg__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx) %{
 9897   match(Set dst (Conv2B src));
 9898   effect(TEMP crx);
 9899   predicate(!UseCountLeadingZerosInstructionsPPC64);
 9900   ins_cost(DEFAULT_COST);
 9901 
 9902   format %{ "CMPDI   $crx, $src, #0 \t// convP2B"
 9903             "LI      $dst, #0\n\t"
 9904             "BEQ     $crx, done\n\t"
 9905             "LI      $dst, #1\n"
 9906             "done:" %}
 9907   size(16);
 9908   ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x0, 0x1) );
 9909   ins_pipe(pipe_class_compare);
 9910 %}
 9911 
 9912 // ConvP2B + XorI
 9913 instruct xorI_convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src, immI_1 mask) %{
 9914   match(Set dst (XorI (Conv2B src) mask));
 9915   predicate(UseCountLeadingZerosInstructionsPPC64);
 9916   ins_cost(DEFAULT_COST);
 9917 
 9918   expand %{
 9919     immI shiftAmount %{ 0x6 %}
 9920     iRegIdst tmp1;
 9921     countLeadingZerosP(tmp1, src);
 9922     urShiftI_reg_imm(dst, tmp1, shiftAmount);
 9923   %}
 9924 %}
 9925 
 9926 instruct xorI_convP2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx, immI_1 mask) %{
 9927   match(Set dst (XorI (Conv2B src) mask));
 9928   effect(TEMP crx);
 9929   predicate(!UseCountLeadingZerosInstructionsPPC64);
 9930   ins_cost(DEFAULT_COST);
 9931 
 9932   format %{ "CMPDI   $crx, $src, #0 \t// XorI(convP2B($src), $mask)"
 9933             "LI      $dst, #1\n\t"
 9934             "BEQ     $crx, done\n\t"
 9935             "LI      $dst, #0\n"
 9936             "done:" %}
 9937   size(16);
 9938   ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x1, 0x0) );
 9939   ins_pipe(pipe_class_compare);
 9940 %}
 9941 
 9942 // if src1 < src2, return -1 else return 0
 9943 instruct cmpLTMask_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9944   match(Set dst (CmpLTMask src1 src2));
 9945   ins_cost(DEFAULT_COST*4);
 9946 
 9947   expand %{
 9948     iRegLdst src1s;
 9949     iRegLdst src2s;
 9950     iRegLdst diff;
 9951     convI2L_reg(src1s, src1); // Ensure proper sign extension.
 9952     convI2L_reg(src2s, src2); // Ensure proper sign extension.
 9953     subL_reg_reg(diff, src1s, src2s);
 9954     // Need to consider >=33 bit result, therefore we need signmaskL.
 9955     signmask64I_regL(dst, diff);
 9956   %}
 9957 %}
 9958 
 9959 instruct cmpLTMask_reg_immI0(iRegIdst dst, iRegIsrc src1, immI_0 src2) %{
 9960   match(Set dst (CmpLTMask src1 src2)); // if src1 < src2, return -1 else return 0
 9961   format %{ "SRAWI   $dst, $src1, $src2 \t// CmpLTMask" %}
 9962   size(4);
 9963   ins_encode %{
 9964     __ srawi($dst$$Register, $src1$$Register, 0x1f);
 9965   %}
 9966   ins_pipe(pipe_class_default);
 9967 %}
 9968 
 9969 //----------Arithmetic Conversion Instructions---------------------------------
 9970 
 9971 // Convert to Byte  -- nop
 9972 // Convert to Short -- nop
 9973 
 9974 // Convert to Int
 9975 
 9976 instruct convB2I_reg(iRegIdst dst, iRegIsrc src, immI_24 amount) %{
 9977   match(Set dst (RShiftI (LShiftI src amount) amount));
 9978   format %{ "EXTSB   $dst, $src \t// byte->int" %}
 9979   size(4);
 9980   ins_encode %{
 9981     __ extsb($dst$$Register, $src$$Register);
 9982   %}
 9983   ins_pipe(pipe_class_default);
 9984 %}
 9985 
 9986 instruct extsh(iRegIdst dst, iRegIsrc src) %{
 9987   effect(DEF dst, USE src);
 9988 
 9989   size(4);
 9990   ins_encode %{
 9991     __ extsh($dst$$Register, $src$$Register);
 9992   %}
 9993   ins_pipe(pipe_class_default);
 9994 %}
 9995 
 9996 // LShiftI 16 + RShiftI 16 converts short to int.
 9997 instruct convS2I_reg(iRegIdst dst, iRegIsrc src, immI_16 amount) %{
 9998   match(Set dst (RShiftI (LShiftI src amount) amount));
 9999   format %{ "EXTSH   $dst, $src \t// short->int" %}
10000   size(4);
10001   ins_encode %{
10002     __ extsh($dst$$Register, $src$$Register);
10003   %}
10004   ins_pipe(pipe_class_default);
10005 %}
10006 
10007 // ConvL2I + ConvI2L: Sign extend int in long register.
10008 instruct sxtI_L2L_reg(iRegLdst dst, iRegLsrc src) %{
10009   match(Set dst (ConvI2L (ConvL2I src)));
10010 
10011   format %{ "EXTSW   $dst, $src \t// long->long" %}
10012   size(4);
10013   ins_encode %{
10014     __ extsw($dst$$Register, $src$$Register);
10015   %}
10016   ins_pipe(pipe_class_default);
10017 %}
10018 
10019 instruct convL2I_reg(iRegIdst dst, iRegLsrc src) %{
10020   match(Set dst (ConvL2I src));
10021   format %{ "MR      $dst, $src \t// long->int" %}
10022   // variable size, 0 or 4
10023   ins_encode %{
10024     __ mr_if_needed($dst$$Register, $src$$Register);
10025   %}
10026   ins_pipe(pipe_class_default);
10027 %}
10028 
10029 instruct convD2IRaw_regD(regD dst, regD src) %{
10030   // no match-rule, false predicate
10031   effect(DEF dst, USE src);
10032   predicate(false);
10033 
10034   format %{ "FCTIWZ $dst, $src \t// convD2I, $src != NaN" %}
10035   size(4);
10036   ins_encode %{
10037     __ fctiwz($dst$$FloatRegister, $src$$FloatRegister);
10038   %}
10039   ins_pipe(pipe_class_default);
10040 %}
10041 
10042 instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsRegSrc crx, stackSlotL src) %{
10043   // no match-rule, false predicate
10044   effect(DEF dst, USE crx, USE src);
10045   predicate(false);
10046 
10047   ins_variable_size_depending_on_alignment(true);
10048 
10049   format %{ "CMOVI   $crx, $dst, $src" %}
10050   size(8);
10051   ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) );
10052   ins_pipe(pipe_class_default);
10053 %}
10054 
10055 instruct cmovI_bso_reg(iRegIdst dst, flagsRegSrc crx, regD src) %{
10056   // no match-rule, false predicate
10057   effect(DEF dst, USE crx, USE src);
10058   predicate(false);
10059 
10060   ins_variable_size_depending_on_alignment(true);
10061 
10062   format %{ "CMOVI   $crx, $dst, $src" %}
10063   size(8);
10064   ins_encode( enc_cmove_bso_reg(dst, crx, src) );
10065   ins_pipe(pipe_class_default);
10066 %}
10067 
10068 
10069 instruct cmovI_bso_reg_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, regD src) %{
10070   // no match-rule, false predicate
10071   effect(DEF dst, USE crx, USE src);
10072   predicate(false);
10073 
10074   format %{ "CMOVI   $dst, $crx, $src \t// postalloc expanded" %}
10075   postalloc_expand %{
10076     //
10077     // replaces
10078     //
10079     //   region  dst  crx  src
10080     //    \       |    |   /
10081     //     dst=cmovI_bso_reg_conLvalue0
10082     //
10083     // with
10084     //
10085     //   region  dst
10086     //    \       /
10087     //     dst=loadConI16(0)
10088     //      |
10089     //      ^  region  dst  crx  src
10090     //      |   \       |    |    /
10091     //      dst=cmovI_bso_reg
10092     //
10093 
10094     // Create new nodes.
10095     MachNode *m1 = new loadConI16Node();
10096     MachNode *m2 = new cmovI_bso_regNode();
10097 
10098     // inputs for new nodes
10099     m1->add_req(n_region);
10100     m2->add_req(n_region, n_crx, n_src);
10101 
10102     // precedences for new nodes
10103     m2->add_prec(m1);
10104 
10105     // operands for new nodes
10106     m1->_opnds[0] = op_dst;
10107     m1->_opnds[1] = new immI16Oper(0);
10108 
10109     m2->_opnds[0] = op_dst;
10110     m2->_opnds[1] = op_crx;
10111     m2->_opnds[2] = op_src;
10112 
10113     // registers for new nodes
10114     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10115     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10116 
10117     // Insert new nodes.
10118     nodes->push(m1);
10119     nodes->push(m2);
10120   %}
10121 %}
10122 
10123 
10124 // Double to Int conversion, NaN is mapped to 0. Special version for Power8.
10125 instruct convD2I_reg_mffprd_ExEx(iRegIdst dst, regD src) %{
10126   match(Set dst (ConvD2I src));
10127   ins_cost(DEFAULT_COST);
10128 
10129   expand %{
10130     regD tmpD;
10131     flagsReg crx;
10132     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10133     convD2IRaw_regD(tmpD, src);                         // Convert float to int (speculated).
10134     cmovI_bso_reg_conLvalue0_Ex(dst, crx, tmpD);        // Cmove based on NaN check.
10135   %}
10136 %}
10137 
10138 instruct convF2IRaw_regF(regF dst, regF src) %{
10139   // no match-rule, false predicate
10140   effect(DEF dst, USE src);
10141   predicate(false);
10142 
10143   format %{ "FCTIWZ $dst, $src \t// convF2I, $src != NaN" %}
10144   size(4);
10145   ins_encode %{
10146     __ fctiwz($dst$$FloatRegister, $src$$FloatRegister);
10147   %}
10148   ins_pipe(pipe_class_default);
10149 %}
10150 
10151 
10152 // Float to Int conversion, NaN is mapped to 0. Special version for Power8.
10153 instruct convF2I_regF_mffprd_ExEx(iRegIdst dst, regF src) %{
10154   match(Set dst (ConvF2I src));
10155   ins_cost(DEFAULT_COST);
10156 
10157   expand %{
10158     regF tmpF;
10159     flagsReg crx;
10160     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10161     convF2IRaw_regF(tmpF, src);                         // Convert float to int (speculated).
10162     cmovI_bso_reg_conLvalue0_Ex(dst, crx, tmpF);        // Cmove based on NaN check.
10163   %}
10164 %}
10165 
10166 // Convert to Long
10167 
10168 instruct convI2L_reg(iRegLdst dst, iRegIsrc src) %{
10169   match(Set dst (ConvI2L src));
10170   format %{ "EXTSW   $dst, $src \t// int->long" %}
10171   size(4);
10172   ins_encode %{
10173     __ extsw($dst$$Register, $src$$Register);
10174   %}
10175   ins_pipe(pipe_class_default);
10176 %}
10177 
10178 // Zero-extend: convert unsigned int to long (convUI2L).
10179 instruct zeroExtendL_regI(iRegLdst dst, iRegIsrc src, immL_32bits mask) %{
10180   match(Set dst (AndL (ConvI2L src) mask));
10181   ins_cost(DEFAULT_COST);
10182 
10183   format %{ "CLRLDI  $dst, $src, #32 \t// zero-extend int to long" %}
10184   size(4);
10185   ins_encode %{
10186     __ clrldi($dst$$Register, $src$$Register, 32);
10187   %}
10188   ins_pipe(pipe_class_default);
10189 %}
10190 
10191 // Zero-extend: convert unsigned int to long in long register.
10192 instruct zeroExtendL_regL(iRegLdst dst, iRegLsrc src, immL_32bits mask) %{
10193   match(Set dst (AndL src mask));
10194   ins_cost(DEFAULT_COST);
10195 
10196   format %{ "CLRLDI  $dst, $src, #32 \t// zero-extend int to long" %}
10197   size(4);
10198   ins_encode %{
10199     __ clrldi($dst$$Register, $src$$Register, 32);
10200   %}
10201   ins_pipe(pipe_class_default);
10202 %}
10203 
10204 instruct convF2LRaw_regF(regF dst, regF src) %{
10205   // no match-rule, false predicate
10206   effect(DEF dst, USE src);
10207   predicate(false);
10208 
10209   format %{ "FCTIDZ $dst, $src \t// convF2L, $src != NaN" %}
10210   size(4);
10211   ins_encode %{
10212     __ fctidz($dst$$FloatRegister, $src$$FloatRegister);
10213   %}
10214   ins_pipe(pipe_class_default);
10215 %}
10216 
10217 instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL src) %{
10218   // no match-rule, false predicate
10219   effect(DEF dst, USE crx, USE src);
10220   predicate(false);
10221 
10222   ins_variable_size_depending_on_alignment(true);
10223 
10224   format %{ "CMOVL   $crx, $dst, $src" %}
10225   size(8);
10226   ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) );
10227   ins_pipe(pipe_class_default);
10228 %}
10229 
10230 instruct cmovL_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
10231   // no match-rule, false predicate
10232   effect(DEF dst, USE crx, USE src);
10233   predicate(false);
10234 
10235   ins_variable_size_depending_on_alignment(true);
10236 
10237   format %{ "CMOVL   $crx, $dst, $src" %}
10238   size(8);
10239   ins_encode( enc_cmove_bso_reg(dst, crx, src) );
10240   ins_pipe(pipe_class_default);
10241 %}
10242 
10243 
10244 instruct cmovL_bso_reg_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, regD src) %{
10245   // no match-rule, false predicate
10246   effect(DEF dst, USE crx, USE src);
10247   predicate(false);
10248 
10249   format %{ "CMOVL   $dst, $crx, $src \t// postalloc expanded" %}
10250   postalloc_expand %{
10251     //
10252     // replaces
10253     //
10254     //   region  dst  crx  src
10255     //    \       |    |   /
10256     //     dst=cmovL_bso_reg_conLvalue0
10257     //
10258     // with
10259     //
10260     //   region  dst
10261     //    \       /
10262     //     dst=loadConL16(0)
10263     //      |
10264     //      ^  region  dst  crx  src
10265     //      |   \       |    |    /
10266     //      dst=cmovL_bso_reg
10267     //
10268 
10269     // Create new nodes.
10270     MachNode *m1 = new loadConL16Node();
10271     MachNode *m2 = new cmovL_bso_regNode();
10272 
10273     // inputs for new nodes
10274     m1->add_req(n_region);
10275     m2->add_req(n_region, n_crx, n_src);
10276     m2->add_prec(m1);
10277 
10278     // operands for new nodes
10279     m1->_opnds[0] = op_dst;
10280     m1->_opnds[1] = new immL16Oper(0);
10281     m2->_opnds[0] = op_dst;
10282     m2->_opnds[1] = op_crx;
10283     m2->_opnds[2] = op_src;
10284 
10285     // registers for new nodes
10286     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10287     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10288 
10289     // Insert new nodes.
10290     nodes->push(m1);
10291     nodes->push(m2);
10292   %}
10293 %}
10294 
10295 
10296 // Float to Long conversion, NaN is mapped to 0. Special version for Power8.
10297 instruct convF2L_reg_mffprd_ExEx(iRegLdst dst, regF src) %{
10298   match(Set dst (ConvF2L src));
10299   ins_cost(DEFAULT_COST);
10300 
10301   expand %{
10302     regF tmpF;
10303     flagsReg crx;
10304     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10305     convF2LRaw_regF(tmpF, src);                         // Convert float to long (speculated).
10306     cmovL_bso_reg_conLvalue0_Ex(dst, crx, tmpF);        // Cmove based on NaN check.
10307   %}
10308 %}
10309 
10310 instruct convD2LRaw_regD(regD dst, regD src) %{
10311   // no match-rule, false predicate
10312   effect(DEF dst, USE src);
10313   predicate(false);
10314 
10315   format %{ "FCTIDZ $dst, $src \t// convD2L $src != NaN" %}
10316   size(4);
10317   ins_encode %{
10318     __ fctidz($dst$$FloatRegister, $src$$FloatRegister);
10319   %}
10320   ins_pipe(pipe_class_default);
10321 %}
10322 
10323 
10324 // Double to Long conversion, NaN is mapped to 0. Special version for Power8.
10325 instruct convD2L_reg_mffprd_ExEx(iRegLdst dst, regD src) %{
10326   match(Set dst (ConvD2L src));
10327   ins_cost(DEFAULT_COST);
10328 
10329   expand %{
10330     regD tmpD;
10331     flagsReg crx;
10332     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10333     convD2LRaw_regD(tmpD, src);                         // Convert float to long (speculated).
10334     cmovL_bso_reg_conLvalue0_Ex(dst, crx, tmpD);        // Cmove based on NaN check.
10335   %}
10336 %}
10337 
10338 // Convert to Float
10339 
10340 // Placed here as needed in expand.
10341 instruct convL2DRaw_regD(regD dst, regD src) %{
10342   // no match-rule, false predicate
10343   effect(DEF dst, USE src);
10344   predicate(false);
10345 
10346   format %{ "FCFID $dst, $src \t// convL2D" %}
10347   size(4);
10348   ins_encode %{
10349     __ fcfid($dst$$FloatRegister, $src$$FloatRegister);
10350   %}
10351   ins_pipe(pipe_class_default);
10352 %}
10353 
10354 // Placed here as needed in expand.
10355 instruct convD2F_reg(regF dst, regD src) %{
10356   match(Set dst (ConvD2F src));
10357   format %{ "FRSP    $dst, $src \t// convD2F" %}
10358   size(4);
10359   ins_encode %{
10360     __ frsp($dst$$FloatRegister, $src$$FloatRegister);
10361   %}
10362   ins_pipe(pipe_class_default);
10363 %}
10364 
10365 instruct convL2FRaw_regF(regF dst, regD src) %{
10366   // no match-rule, false predicate
10367   effect(DEF dst, USE src);
10368   predicate(false);
10369 
10370   format %{ "FCFIDS $dst, $src \t// convL2F" %}
10371   size(4);
10372   ins_encode %{
10373     __ fcfids($dst$$FloatRegister, $src$$FloatRegister);
10374   %}
10375   ins_pipe(pipe_class_default);
10376 %}
10377 
10378 
10379 // Integer to Float conversion. Special version for Power8.
10380 instruct convI2F_ireg_mtfprd_Ex(regF dst, iRegIsrc src) %{
10381   match(Set dst (ConvI2F src));
10382   ins_cost(DEFAULT_COST);
10383 
10384   expand %{
10385     regD tmpD;
10386     moveI2D_reg(tmpD, src);
10387     convL2FRaw_regF(dst, tmpD);          // Convert to float.
10388   %}
10389 %}
10390 
10391 
10392 // L2F to avoid runtime call.  Special version for Power8.
10393 instruct convL2F_ireg_mtfprd_Ex(regF dst, iRegLsrc src) %{
10394   match(Set dst (ConvL2F src));
10395   ins_cost(DEFAULT_COST);
10396 
10397   expand %{
10398     regD tmpD;
10399     moveL2D_reg(tmpD, src);
10400     convL2FRaw_regF(dst, tmpD);          // Convert to float.
10401   %}
10402 %}
10403 
10404 // Moved up as used in expand.
10405 //instruct convD2F_reg(regF dst, regD src) %{%}
10406 
10407 // Convert to Double
10408 
10409 
10410 // Integer to Double conversion. Special version for Power8.
10411 instruct convI2D_reg_mtfprd_Ex(regD dst, iRegIsrc src) %{
10412   match(Set dst (ConvI2D src));
10413   ins_cost(DEFAULT_COST);
10414 
10415   expand %{
10416     regD tmpD;
10417     moveI2D_reg(tmpD, src);
10418     convL2DRaw_regD(dst, tmpD);          // Convert to double.
10419   %}
10420 %}
10421 
10422 
10423 // Long to Double conversion. Special version for Power8.
10424 instruct convL2D_reg_mtfprd_Ex(regD dst, iRegLsrc src) %{
10425   match(Set dst (ConvL2D src));
10426   ins_cost(DEFAULT_COST);
10427 
10428   expand %{
10429     regD tmpD;
10430     moveL2D_reg(tmpD, src);
10431     convL2DRaw_regD(dst, tmpD);          // Convert to double.
10432   %}
10433 %}
10434 
10435 instruct convF2D_reg(regD dst, regF src) %{
10436   match(Set dst (ConvF2D src));
10437   format %{ "FMR     $dst, $src \t// float->double" %}
10438   // variable size, 0 or 4
10439   ins_encode %{
10440     __ fmr_if_needed($dst$$FloatRegister, $src$$FloatRegister);
10441   %}
10442   ins_pipe(pipe_class_default);
10443 %}
10444 
10445 instruct convF2HF_reg_reg(iRegIdst dst, regF src, regF tmp) %{
10446   match(Set dst (ConvF2HF src));
10447   effect(TEMP tmp);
10448   ins_cost(3 * DEFAULT_COST);
10449   size(12);
10450   format %{ "XSCVDPHP $tmp, $src\t# convert to half precision\n\t"
10451             "MFFPRD $dst, $tmp\t# move result from $tmp to $dst\n\t"
10452             "EXTSH $dst, $dst\t# make it a proper short"
10453   %}
10454   ins_encode %{
10455     __ f2hf($dst$$Register, $src$$FloatRegister, $tmp$$FloatRegister);
10456   %}
10457   ins_pipe(pipe_class_default);
10458 %}
10459 
10460 instruct convHF2F_reg_reg(regF dst, iRegIsrc src) %{
10461   match(Set dst (ConvHF2F src));
10462   ins_cost(2 * DEFAULT_COST);
10463   size(8);
10464   format %{ "MTFPRD $dst, $src\t# move source from $src to $dst\n\t"
10465             "XSCVHPDP $dst, $dst\t# convert from half precision"
10466   %}
10467   ins_encode %{
10468     __ hf2f($dst$$FloatRegister, $src$$Register);
10469   %}
10470   ins_pipe(pipe_class_default);
10471 %}
10472 
10473 //----------Control Flow Instructions------------------------------------------
10474 // Compare Instructions
10475 
10476 // Compare Integers
10477 instruct cmpI_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{
10478   match(Set crx (CmpI src1 src2));
10479   size(4);
10480   format %{ "CMPW    $crx, $src1, $src2" %}
10481   ins_encode %{
10482     __ cmpw($crx$$CondRegister, $src1$$Register, $src2$$Register);
10483   %}
10484   ins_pipe(pipe_class_compare);
10485 %}
10486 
10487 instruct cmpI_reg_imm16(flagsReg crx, iRegIsrc src1, immI16 src2) %{
10488   match(Set crx (CmpI src1 src2));
10489   format %{ "CMPWI   $crx, $src1, $src2" %}
10490   size(4);
10491   ins_encode %{
10492     __ cmpwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
10493   %}
10494   ins_pipe(pipe_class_compare);
10495 %}
10496 
10497 // (src1 & src2) == 0?
10498 instruct testI_reg_imm(flagsRegCR0 cr0, iRegIsrc src1, uimmI16 src2, immI_0 zero) %{
10499   match(Set cr0 (CmpI (AndI src1 src2) zero));
10500   // r0 is killed
10501   format %{ "ANDI    R0, $src1, $src2 \t// BTST int" %}
10502   size(4);
10503   ins_encode %{
10504     __ andi_(R0, $src1$$Register, $src2$$constant);
10505   %}
10506   ins_pipe(pipe_class_compare);
10507 %}
10508 
10509 instruct cmpL_reg_reg(flagsReg crx, iRegLsrc src1, iRegLsrc src2) %{
10510   match(Set crx (CmpL src1 src2));
10511   format %{ "CMPD    $crx, $src1, $src2" %}
10512   size(4);
10513   ins_encode %{
10514     __ cmpd($crx$$CondRegister, $src1$$Register, $src2$$Register);
10515   %}
10516   ins_pipe(pipe_class_compare);
10517 %}
10518 
10519 instruct cmpL_reg_imm16(flagsReg crx, iRegLsrc src1, immL16 src2) %{
10520   match(Set crx (CmpL src1 src2));
10521   format %{ "CMPDI   $crx, $src1, $src2" %}
10522   size(4);
10523   ins_encode %{
10524     __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant);
10525   %}
10526   ins_pipe(pipe_class_compare);
10527 %}
10528 
10529 // Added CmpUL for LoopPredicate.
10530 instruct cmpUL_reg_reg(flagsReg crx, iRegLsrc src1, iRegLsrc src2) %{
10531   match(Set crx (CmpUL src1 src2));
10532   format %{ "CMPLD   $crx, $src1, $src2" %}
10533   size(4);
10534   ins_encode %{
10535     __ cmpld($crx$$CondRegister, $src1$$Register, $src2$$Register);
10536   %}
10537   ins_pipe(pipe_class_compare);
10538 %}
10539 
10540 instruct cmpUL_reg_imm16(flagsReg crx, iRegLsrc src1, uimmL16 src2) %{
10541   match(Set crx (CmpUL src1 src2));
10542   format %{ "CMPLDI  $crx, $src1, $src2" %}
10543   size(4);
10544   ins_encode %{
10545     __ cmpldi($crx$$CondRegister, $src1$$Register, $src2$$constant);
10546   %}
10547   ins_pipe(pipe_class_compare);
10548 %}
10549 
10550 instruct testL_reg_reg(flagsRegCR0 cr0, iRegLsrc src1, iRegLsrc src2, immL_0 zero) %{
10551   match(Set cr0 (CmpL (AndL src1 src2) zero));
10552   // r0 is killed
10553   format %{ "AND     R0, $src1, $src2 \t// BTST long" %}
10554   size(4);
10555   ins_encode %{
10556     __ and_(R0, $src1$$Register, $src2$$Register);
10557   %}
10558   ins_pipe(pipe_class_compare);
10559 %}
10560 
10561 instruct testL_reg_imm(flagsRegCR0 cr0, iRegLsrc src1, uimmL16 src2, immL_0 zero) %{
10562   match(Set cr0 (CmpL (AndL src1 src2) zero));
10563   // r0 is killed
10564   format %{ "ANDI    R0, $src1, $src2 \t// BTST long" %}
10565   size(4);
10566   ins_encode %{
10567     __ andi_(R0, $src1$$Register, $src2$$constant);
10568   %}
10569   ins_pipe(pipe_class_compare);
10570 %}
10571 
10572 // Manifest a CmpL3 result in an integer register.
10573 instruct cmpL3_reg_reg(iRegIdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
10574   match(Set dst (CmpL3 src1 src2));
10575   effect(KILL cr0);
10576   ins_cost(DEFAULT_COST * 5);
10577   size((VM_Version::has_brw() ? 16 : 20));
10578 
10579   format %{ "cmpL3_reg_reg $dst, $src1, $src2" %}
10580 
10581   ins_encode %{
10582     __ cmpd(CR0, $src1$$Register, $src2$$Register);
10583     __ set_cmp3($dst$$Register);
10584   %}
10585   ins_pipe(pipe_class_default);
10586 %}
10587 
10588 instruct cmpU3_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
10589   match(Set dst (CmpU3 src1 src2));
10590   effect(KILL cr0);
10591   ins_cost(DEFAULT_COST * 5);
10592   size((VM_Version::has_brw() ? 16 : 20));
10593 
10594   format %{ "cmpU3_reg_reg $dst, $src1, $src2" %}
10595 
10596   ins_encode %{
10597     __ cmplw(CR0, $src1$$Register, $src2$$Register);
10598     __ set_cmp3($dst$$Register);
10599   %}
10600   ins_pipe(pipe_class_default);
10601 %}
10602 
10603 instruct cmpUL3_reg_reg(iRegIdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
10604   match(Set dst (CmpUL3 src1 src2));
10605   effect(KILL cr0);
10606   ins_cost(DEFAULT_COST * 5);
10607   size((VM_Version::has_brw() ? 16 : 20));
10608 
10609   format %{ "cmpUL3_reg_reg $dst, $src1, $src2" %}
10610 
10611   ins_encode %{
10612     __ cmpld(CR0, $src1$$Register, $src2$$Register);
10613     __ set_cmp3($dst$$Register);
10614   %}
10615   ins_pipe(pipe_class_default);
10616 %}
10617 
10618 // Implicit range checks.
10619 // A range check in the ideal world has one of the following shapes:
10620 //  - (If le (CmpU length index)), (IfTrue  throw exception)
10621 //  - (If lt (CmpU index length)), (IfFalse throw exception)
10622 //
10623 // Match range check 'If le (CmpU length index)'.
10624 instruct rangeCheck_iReg_uimm15(cmpOp cmp, iRegIsrc src_length, uimmI15 index, label labl) %{
10625   match(If cmp (CmpU src_length index));
10626   effect(USE labl);
10627   predicate(TrapBasedRangeChecks &&
10628             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le &&
10629             PROB_UNLIKELY(_leaf->as_If()->_prob) >= PROB_ALWAYS &&
10630             (Matcher::branches_to_uncommon_trap(_leaf)));
10631 
10632   ins_is_TrapBasedCheckNode(true);
10633 
10634   format %{ "TWI     $index $cmp $src_length \t// RangeCheck => trap $labl" %}
10635   size(4);
10636   ins_encode %{
10637     if ($cmp$$cmpcode == 0x1 /* less_equal */) {
10638       __ trap_range_check_le($src_length$$Register, $index$$constant);
10639     } else {
10640       // Both successors are uncommon traps, probability is 0.
10641       // Node got flipped during fixup flow.
10642       assert($cmp$$cmpcode == 0x9, "must be greater");
10643       __ trap_range_check_g($src_length$$Register, $index$$constant);
10644     }
10645   %}
10646   ins_pipe(pipe_class_trap);
10647 %}
10648 
10649 // Match range check 'If lt (CmpU index length)'.
10650 instruct rangeCheck_iReg_iReg(cmpOp cmp, iRegIsrc src_index, iRegIsrc src_length, label labl) %{
10651   match(If cmp (CmpU src_index src_length));
10652   effect(USE labl);
10653   predicate(TrapBasedRangeChecks &&
10654             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
10655             _leaf->as_If()->_prob >= PROB_ALWAYS &&
10656             (Matcher::branches_to_uncommon_trap(_leaf)));
10657 
10658   ins_is_TrapBasedCheckNode(true);
10659 
10660   format %{ "TW      $src_index $cmp $src_length \t// RangeCheck => trap $labl" %}
10661   size(4);
10662   ins_encode %{
10663     if ($cmp$$cmpcode == 0x0 /* greater_equal */) {
10664       __ trap_range_check_ge($src_index$$Register, $src_length$$Register);
10665     } else {
10666       // Both successors are uncommon traps, probability is 0.
10667       // Node got flipped during fixup flow.
10668       assert($cmp$$cmpcode == 0x8, "must be less");
10669       __ trap_range_check_l($src_index$$Register, $src_length$$Register);
10670     }
10671   %}
10672   ins_pipe(pipe_class_trap);
10673 %}
10674 
10675 // Match range check 'If lt (CmpU index length)'.
10676 instruct rangeCheck_uimm15_iReg(cmpOp cmp, iRegIsrc src_index, uimmI15 length, label labl) %{
10677   match(If cmp (CmpU src_index length));
10678   effect(USE labl);
10679   predicate(TrapBasedRangeChecks &&
10680             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
10681             _leaf->as_If()->_prob >= PROB_ALWAYS &&
10682             (Matcher::branches_to_uncommon_trap(_leaf)));
10683 
10684   ins_is_TrapBasedCheckNode(true);
10685 
10686   format %{ "TWI     $src_index $cmp $length \t// RangeCheck => trap $labl" %}
10687   size(4);
10688   ins_encode %{
10689     if ($cmp$$cmpcode == 0x0 /* greater_equal */) {
10690       __ trap_range_check_ge($src_index$$Register, $length$$constant);
10691     } else {
10692       // Both successors are uncommon traps, probability is 0.
10693       // Node got flipped during fixup flow.
10694       assert($cmp$$cmpcode == 0x8, "must be less");
10695       __ trap_range_check_l($src_index$$Register, $length$$constant);
10696     }
10697   %}
10698   ins_pipe(pipe_class_trap);
10699 %}
10700 
10701 instruct compU_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{
10702   match(Set crx (CmpU src1 src2));
10703   format %{ "CMPLW   $crx, $src1, $src2 \t// unsigned" %}
10704   size(4);
10705   ins_encode %{
10706     __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register);
10707   %}
10708   ins_pipe(pipe_class_compare);
10709 %}
10710 
10711 instruct compU_reg_uimm16(flagsReg crx, iRegIsrc src1, uimmI16 src2) %{
10712   match(Set crx (CmpU src1 src2));
10713   size(4);
10714   format %{ "CMPLWI  $crx, $src1, $src2" %}
10715   ins_encode %{
10716     __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
10717   %}
10718   ins_pipe(pipe_class_compare);
10719 %}
10720 
10721 // Implicit zero checks (more implicit null checks).
10722 // No constant pool entries required.
10723 instruct zeroCheckN_iReg_imm0(cmpOp cmp, iRegNsrc value, immN_0 zero, label labl) %{
10724   match(If cmp (CmpN value zero));
10725   effect(USE labl);
10726   predicate(TrapBasedNullChecks &&
10727             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
10728             _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) &&
10729             Matcher::branches_to_uncommon_trap(_leaf));
10730   ins_cost(1);
10731 
10732   ins_is_TrapBasedCheckNode(true);
10733 
10734   format %{ "TDI     $value $cmp $zero \t// ZeroCheckN => trap $labl" %}
10735   size(4);
10736   ins_encode %{
10737     if ($cmp$$cmpcode == 0xA) {
10738       __ trap_null_check($value$$Register);
10739     } else {
10740       // Both successors are uncommon traps, probability is 0.
10741       // Node got flipped during fixup flow.
10742       assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)");
10743       __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned);
10744     }
10745   %}
10746   ins_pipe(pipe_class_trap);
10747 %}
10748 
10749 // Compare narrow oops.
10750 instruct cmpN_reg_reg(flagsReg crx, iRegNsrc src1, iRegNsrc src2) %{
10751   match(Set crx (CmpN src1 src2));
10752 
10753   size(4);
10754   ins_cost(2);
10755   format %{ "CMPLW   $crx, $src1, $src2 \t// compressed ptr" %}
10756   ins_encode %{
10757     __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register);
10758   %}
10759   ins_pipe(pipe_class_compare);
10760 %}
10761 
10762 instruct cmpN_reg_imm0(flagsReg crx, iRegNsrc src1, immN_0 src2) %{
10763   match(Set crx (CmpN src1 src2));
10764   // Make this more expensive than zeroCheckN_iReg_imm0.
10765   ins_cost(2);
10766 
10767   format %{ "CMPLWI  $crx, $src1, $src2 \t// compressed ptr" %}
10768   size(4);
10769   ins_encode %{
10770     __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
10771   %}
10772   ins_pipe(pipe_class_compare);
10773 %}
10774 
10775 // Implicit zero checks (more implicit null checks).
10776 // No constant pool entries required.
10777 instruct zeroCheckP_reg_imm0(cmpOp cmp, iRegP_N2P value, immP_0 zero, label labl) %{
10778   match(If cmp (CmpP value zero));
10779   effect(USE labl);
10780   predicate(TrapBasedNullChecks &&
10781             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
10782             _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) &&
10783             Matcher::branches_to_uncommon_trap(_leaf));
10784   ins_cost(1); // Should not be cheaper than zeroCheckN.
10785 
10786   ins_is_TrapBasedCheckNode(true);
10787 
10788   format %{ "TDI     $value $cmp $zero \t// ZeroCheckP => trap $labl" %}
10789   size(4);
10790   ins_encode %{
10791     if ($cmp$$cmpcode == 0xA) {
10792       __ trap_null_check($value$$Register);
10793     } else {
10794       // Both successors are uncommon traps, probability is 0.
10795       // Node got flipped during fixup flow.
10796       assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)");
10797       __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned);
10798     }
10799   %}
10800   ins_pipe(pipe_class_trap);
10801 %}
10802 
10803 // Compare Pointers
10804 instruct cmpP_reg_reg(flagsReg crx, iRegP_N2P src1, iRegP_N2P src2) %{
10805   match(Set crx (CmpP src1 src2));
10806   format %{ "CMPLD   $crx, $src1, $src2 \t// ptr" %}
10807   size(4);
10808   ins_encode %{
10809     __ cmpld($crx$$CondRegister, $src1$$Register, $src2$$Register);
10810   %}
10811   ins_pipe(pipe_class_compare);
10812 %}
10813 
10814 instruct cmpP_reg_null(flagsReg crx, iRegP_N2P src1, immP_0or1 src2) %{
10815   match(Set crx (CmpP src1 src2));
10816   format %{ "CMPLDI   $crx, $src1, $src2 \t// ptr" %}
10817   size(4);
10818   ins_encode %{
10819     __ cmpldi($crx$$CondRegister, $src1$$Register, (int)((short)($src2$$constant & 0xFFFF)));
10820   %}
10821   ins_pipe(pipe_class_compare);
10822 %}
10823 
10824 // Used in postalloc expand.
10825 instruct cmpP_reg_imm16(flagsReg crx, iRegPsrc src1, immL16 src2) %{
10826   // This match rule prevents reordering of node before a safepoint.
10827   // This only makes sense if this instructions is used exclusively
10828   // for the expansion of EncodeP!
10829   match(Set crx (CmpP src1 src2));
10830   predicate(false);
10831 
10832   format %{ "CMPDI   $crx, $src1, $src2" %}
10833   size(4);
10834   ins_encode %{
10835     __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant);
10836   %}
10837   ins_pipe(pipe_class_compare);
10838 %}
10839 
10840 //----------Float Compares----------------------------------------------------
10841 
10842 instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{
10843   // Needs matchrule, see cmpDUnordered.
10844   match(Set crx (CmpF src1 src2));
10845   // no match-rule, false predicate
10846   predicate(false);
10847 
10848   format %{ "cmpFUrd $crx, $src1, $src2" %}
10849   size(4);
10850   ins_encode %{
10851     __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10852   %}
10853   ins_pipe(pipe_class_default);
10854 %}
10855 
10856 instruct cmov_bns_less(flagsReg crx) %{
10857   // no match-rule, false predicate
10858   effect(DEF crx);
10859   predicate(false);
10860 
10861   ins_variable_size_depending_on_alignment(true);
10862 
10863   format %{ "CMOV    $crx" %}
10864   size(12);
10865   ins_encode %{
10866     Label done;
10867     __ bns($crx$$CondRegister, done);        // not unordered -> keep crx
10868     __ li(R0, 0);
10869     __ cmpwi($crx$$CondRegister, R0, 1);     // unordered -> set crx to 'less'
10870     __ bind(done);
10871   %}
10872   ins_pipe(pipe_class_default);
10873 %}
10874 
10875 // Compare floating, generate condition code.
10876 instruct cmpF_reg_reg_Ex(flagsReg crx, regF src1, regF src2) %{
10877   // FIXME: should we match 'If cmp (CmpF src1 src2))' ??
10878   //
10879   // The following code sequence occurs a lot in mpegaudio:
10880   //
10881   // block BXX:
10882   // 0: instruct cmpFUnordered_reg_reg (cmpF_reg_reg-0):
10883   //    cmpFUrd CR6, F11, F9
10884   // 4: instruct cmov_bns_less (cmpF_reg_reg-1):
10885   //    cmov CR6
10886   // 8: instruct branchConSched:
10887   //    B_FARle CR6, B56  P=0.500000 C=-1.000000
10888   match(Set crx (CmpF src1 src2));
10889   ins_cost(DEFAULT_COST+BRANCH_COST);
10890 
10891   format %{ "CMPF    $crx, $src1, $src2 \t// postalloc expanded" %}
10892   postalloc_expand %{
10893     //
10894     // replaces
10895     //
10896     //   region  src1  src2
10897     //    \       |     |
10898     //     crx=cmpF_reg_reg
10899     //
10900     // with
10901     //
10902     //   region  src1  src2
10903     //    \       |     |
10904     //     crx=cmpFUnordered_reg_reg
10905     //      |
10906     //      ^  region
10907     //      |   \
10908     //      crx=cmov_bns_less
10909     //
10910 
10911     // Create new nodes.
10912     MachNode *m1 = new cmpFUnordered_reg_regNode();
10913     MachNode *m2 = new cmov_bns_lessNode();
10914 
10915     // inputs for new nodes
10916     m1->add_req(n_region, n_src1, n_src2);
10917     m2->add_req(n_region);
10918     m2->add_prec(m1);
10919 
10920     // operands for new nodes
10921     m1->_opnds[0] = op_crx;
10922     m1->_opnds[1] = op_src1;
10923     m1->_opnds[2] = op_src2;
10924     m2->_opnds[0] = op_crx;
10925 
10926     // registers for new nodes
10927     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
10928     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
10929 
10930     // Insert new nodes.
10931     nodes->push(m1);
10932     nodes->push(m2);
10933   %}
10934 %}
10935 
10936 // Compare float, generate -1,0,1
10937 instruct cmpF3_reg_reg(iRegIdst dst, regF src1, regF src2, flagsRegCR0 cr0) %{
10938   match(Set dst (CmpF3 src1 src2));
10939   effect(KILL cr0);
10940   ins_cost(DEFAULT_COST * 6);
10941   size((VM_Version::has_brw() ? 20 : 24));
10942 
10943   format %{ "cmpF3_reg_reg $dst, $src1, $src2" %}
10944 
10945   ins_encode %{
10946     __ fcmpu(CR0, $src1$$FloatRegister, $src2$$FloatRegister);
10947     __ set_cmpu3($dst$$Register, true); // C2 requires unordered to get treated like less
10948   %}
10949   ins_pipe(pipe_class_default);
10950 %}
10951 
10952 instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
10953   // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the
10954   // node right before the conditional move using it.
10955   // In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7,
10956   // compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle
10957   // crashed in register allocation where the flags Reg between cmpDUnoredered and a
10958   // conditional move was supposed to be spilled.
10959   match(Set crx (CmpD src1 src2));
10960   // False predicate, shall not be matched.
10961   predicate(false);
10962 
10963   format %{ "cmpFUrd $crx, $src1, $src2" %}
10964   size(4);
10965   ins_encode %{
10966     __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10967   %}
10968   ins_pipe(pipe_class_default);
10969 %}
10970 
10971 instruct cmpD_reg_reg_Ex(flagsReg crx, regD src1, regD src2) %{
10972   match(Set crx (CmpD src1 src2));
10973   ins_cost(DEFAULT_COST+BRANCH_COST);
10974 
10975   format %{ "CmpD    $crx, $src1, $src2 \t// postalloc expanded" %}
10976   postalloc_expand %{
10977     //
10978     // replaces
10979     //
10980     //   region  src1  src2
10981     //    \       |     |
10982     //     crx=cmpD_reg_reg
10983     //
10984     // with
10985     //
10986     //   region  src1  src2
10987     //    \       |     |
10988     //     crx=cmpDUnordered_reg_reg
10989     //      |
10990     //      ^  region
10991     //      |   \
10992     //      crx=cmov_bns_less
10993     //
10994 
10995     // create new nodes
10996     MachNode *m1 = new cmpDUnordered_reg_regNode();
10997     MachNode *m2 = new cmov_bns_lessNode();
10998 
10999     // inputs for new nodes
11000     m1->add_req(n_region, n_src1, n_src2);
11001     m2->add_req(n_region);
11002     m2->add_prec(m1);
11003 
11004     // operands for new nodes
11005     m1->_opnds[0] = op_crx;
11006     m1->_opnds[1] = op_src1;
11007     m1->_opnds[2] = op_src2;
11008     m2->_opnds[0] = op_crx;
11009 
11010     // registers for new nodes
11011     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11012     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11013 
11014     // Insert new nodes.
11015     nodes->push(m1);
11016     nodes->push(m2);
11017   %}
11018 %}
11019 
11020 // Compare double, generate -1,0,1
11021 instruct cmpD3_reg_reg(iRegIdst dst, regD src1, regD src2, flagsRegCR0 cr0) %{
11022   match(Set dst (CmpD3 src1 src2));
11023   effect(KILL cr0);
11024   ins_cost(DEFAULT_COST * 6);
11025   size((VM_Version::has_brw() ? 20 : 24));
11026 
11027   format %{ "cmpD3_reg_reg $dst, $src1, $src2" %}
11028 
11029   ins_encode %{
11030     __ fcmpu(CR0, $src1$$FloatRegister, $src2$$FloatRegister);
11031     __ set_cmpu3($dst$$Register, true); // C2 requires unordered to get treated like less
11032   %}
11033   ins_pipe(pipe_class_default);
11034 %}
11035 
11036 // Compare char
11037 instruct cmprb_Digit_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11038   match(Set dst (Digit src1));
11039   effect(TEMP src2, TEMP crx);
11040   ins_cost(3 * DEFAULT_COST);
11041 
11042   format %{ "LI      $src2, 0x3930\n\t"
11043             "CMPRB   $crx, 0, $src1, $src2\n\t"
11044             "SETB    $dst, $crx" %}
11045   size(12);
11046   ins_encode %{
11047     // 0x30: 0, 0x39: 9
11048     __ li($src2$$Register, 0x3930);
11049     // compare src1 with ranges 0x30 to 0x39
11050     __ cmprb($crx$$CondRegister, 0, $src1$$Register, $src2$$Register);
11051     __ setb($dst$$Register, $crx$$CondRegister);
11052   %}
11053   ins_pipe(pipe_class_default);
11054 %}
11055 
11056 instruct cmprb_LowerCase_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11057   match(Set dst (LowerCase src1));
11058   effect(TEMP src2, TEMP crx);
11059   ins_cost(12 * DEFAULT_COST);
11060 
11061   format %{ "LI      $src2, 0x7A61\n\t"
11062             "CMPRB   $crx, 0, $src1, $src2\n\t"
11063             "BGT     $crx, done\n\t"
11064             "LIS     $src2, (signed short)0xF6DF\n\t"
11065             "ORI     $src2, $src2, 0xFFF8\n\t"
11066             "CMPRB   $crx, 1, $src1, $src2\n\t"
11067             "BGT     $crx, done\n\t"
11068             "LIS     $src2, (signed short)0xAAB5\n\t"
11069             "ORI     $src2, $src2, 0xBABA\n\t"
11070             "INSRDI  $src2, $src2, 32, 0\n\t"
11071             "CMPEQB  $crx, 1, $src1, $src2\n"
11072             "done:\n\t"
11073             "SETB    $dst, $crx" %}
11074 
11075   size(48);
11076   ins_encode %{
11077     Label done;
11078     // 0x61: a, 0x7A: z
11079     __ li($src2$$Register, 0x7A61);
11080     // compare src1 with ranges 0x61 to 0x7A
11081     __ cmprb($crx$$CondRegister, 0, $src1$$Register, $src2$$Register);
11082     __ bgt($crx$$CondRegister, done);
11083 
11084     // 0xDF: sharp s, 0xFF: y with diaeresis, 0xF7 is not the lower case
11085     __ lis($src2$$Register, (signed short)0xF6DF);
11086     __ ori($src2$$Register, $src2$$Register, 0xFFF8);
11087     // compare src1 with ranges 0xDF to 0xF6 and 0xF8 to 0xFF
11088     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
11089     __ bgt($crx$$CondRegister, done);
11090 
11091     // 0xAA: feminine ordinal indicator
11092     // 0xB5: micro sign
11093     // 0xBA: masculine ordinal indicator
11094     __ lis($src2$$Register, (signed short)0xAAB5);
11095     __ ori($src2$$Register, $src2$$Register, 0xBABA);
11096     __ insrdi($src2$$Register, $src2$$Register, 32, 0);
11097     // compare src1 with 0xAA, 0xB5, and 0xBA
11098     __ cmpeqb($crx$$CondRegister, $src1$$Register, $src2$$Register);
11099 
11100     __ bind(done);
11101     __ setb($dst$$Register, $crx$$CondRegister);
11102   %}
11103   ins_pipe(pipe_class_default);
11104 %}
11105 
11106 instruct cmprb_UpperCase_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11107   match(Set dst (UpperCase src1));
11108   effect(TEMP src2, TEMP crx);
11109   ins_cost(7 * DEFAULT_COST);
11110 
11111   format %{ "LI      $src2, 0x5A41\n\t"
11112             "CMPRB   $crx, 0, $src1, $src2\n\t"
11113             "BGT     $crx, done\n\t"
11114             "LIS     $src2, (signed short)0xD6C0\n\t"
11115             "ORI     $src2, $src2, 0xDED8\n\t"
11116             "CMPRB   $crx, 1, $src1, $src2\n"
11117             "done:\n\t"
11118             "SETB    $dst, $crx" %}
11119 
11120   size(28);
11121   ins_encode %{
11122     Label done;
11123     // 0x41: A, 0x5A: Z
11124     __ li($src2$$Register, 0x5A41);
11125     // compare src1 with a range 0x41 to 0x5A
11126     __ cmprb($crx$$CondRegister, 0, $src1$$Register, $src2$$Register);
11127     __ bgt($crx$$CondRegister, done);
11128 
11129     // 0xC0: a with grave, 0xDE: thorn, 0xD7 is not the upper case
11130     __ lis($src2$$Register, (signed short)0xD6C0);
11131     __ ori($src2$$Register, $src2$$Register, 0xDED8);
11132     // compare src1 with ranges 0xC0 to 0xD6 and 0xD8 to 0xDE
11133     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
11134 
11135     __ bind(done);
11136     __ setb($dst$$Register, $crx$$CondRegister);
11137   %}
11138   ins_pipe(pipe_class_default);
11139 %}
11140 
11141 instruct cmprb_Whitespace_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11142   match(Set dst (Whitespace src1));
11143   predicate(PowerArchitecturePPC64 <= 9);
11144   effect(TEMP src2, TEMP crx);
11145   ins_cost(4 * DEFAULT_COST);
11146 
11147   format %{ "LI      $src2, 0x0D09\n\t"
11148             "ADDIS   $src2, 0x201C\n\t"
11149             "CMPRB   $crx, 1, $src1, $src2\n\t"
11150             "SETB    $dst, $crx" %}
11151   size(16);
11152   ins_encode %{
11153     // 0x09 to 0x0D, 0x1C to 0x20
11154     __ li($src2$$Register, 0x0D09);
11155     __ addis($src2$$Register, $src2$$Register, 0x0201C);
11156     // compare src with ranges 0x09 to 0x0D and 0x1C to 0x20
11157     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
11158     __ setb($dst$$Register, $crx$$CondRegister);
11159   %}
11160   ins_pipe(pipe_class_default);
11161 %}
11162 
11163 // Power 10 version, using prefixed addi to load 32-bit constant
11164 instruct cmprb_Whitespace_reg_reg_prefixed(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11165   match(Set dst (Whitespace src1));
11166   predicate(PowerArchitecturePPC64 >= 10);
11167   effect(TEMP src2, TEMP crx);
11168   ins_cost(3 * DEFAULT_COST);
11169 
11170   format %{ "PLI     $src2, 0x201C0D09\n\t"
11171             "CMPRB   $crx, 1, $src1, $src2\n\t"
11172             "SETB    $dst, $crx" %}
11173   size(16);
11174   ins_encode %{
11175     // 0x09 to 0x0D, 0x1C to 0x20
11176     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
11177     __ pli($src2$$Register, 0x201C0D09);
11178     // compare src with ranges 0x09 to 0x0D and 0x1C to 0x20
11179     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
11180     __ setb($dst$$Register, $crx$$CondRegister);
11181   %}
11182   ins_pipe(pipe_class_default);
11183   ins_alignment(2);
11184 %}
11185 
11186 //----------Branches---------------------------------------------------------
11187 // Jump
11188 
11189 // Direct Branch.
11190 instruct branch(label labl) %{
11191   match(Goto);
11192   effect(USE labl);
11193   ins_cost(BRANCH_COST);
11194 
11195   format %{ "B       $labl" %}
11196   size(4);
11197   ins_encode %{
11198      Label d;    // dummy
11199      __ bind(d);
11200      Label* p = $labl$$label;
11201      // `p' is `nullptr' when this encoding class is used only to
11202      // determine the size of the encoded instruction.
11203      Label& l = (nullptr == p)? d : *(p);
11204      __ b(l);
11205   %}
11206   ins_pipe(pipe_class_default);
11207 %}
11208 
11209 // Conditional Near Branch
11210 instruct branchCon(cmpOp cmp, flagsRegSrc crx, label lbl) %{
11211   // Same match rule as `branchConFar'.
11212   match(If cmp crx);
11213   effect(USE lbl);
11214   ins_cost(BRANCH_COST);
11215 
11216   // If set to 1 this indicates that the current instruction is a
11217   // short variant of a long branch. This avoids using this
11218   // instruction in first-pass matching. It will then only be used in
11219   // the `Shorten_branches' pass.
11220   ins_short_branch(1);
11221 
11222   format %{ "B$cmp     $crx, $lbl" %}
11223   size(4);
11224   ins_encode( enc_bc(crx, cmp, lbl) );
11225   ins_pipe(pipe_class_default);
11226 %}
11227 
11228 // This is for cases when the ppc64 `bc' instruction does not
11229 // reach far enough. So we emit a far branch here, which is more
11230 // expensive.
11231 //
11232 // Conditional Far Branch
11233 instruct branchConFar(cmpOp cmp, flagsRegSrc crx, label lbl) %{
11234   // Same match rule as `branchCon'.
11235   match(If cmp crx);
11236   effect(USE crx, USE lbl);
11237   // Higher cost than `branchCon'.
11238   ins_cost(5*BRANCH_COST);
11239 
11240   // This is not a short variant of a branch, but the long variant.
11241   ins_short_branch(0);
11242 
11243   format %{ "B_FAR$cmp $crx, $lbl" %}
11244   size(8);
11245   ins_encode( enc_bc_far(crx, cmp, lbl) );
11246   ins_pipe(pipe_class_default);
11247 %}
11248 
11249 instruct branchLoopEnd(cmpOp cmp, flagsRegSrc crx, label labl) %{
11250   match(CountedLoopEnd cmp crx);
11251   effect(USE labl);
11252   ins_cost(BRANCH_COST);
11253 
11254   // short variant.
11255   ins_short_branch(1);
11256 
11257   format %{ "B$cmp     $crx, $labl \t// counted loop end" %}
11258   size(4);
11259   ins_encode( enc_bc(crx, cmp, labl) );
11260   ins_pipe(pipe_class_default);
11261 %}
11262 
11263 instruct branchLoopEndFar(cmpOp cmp, flagsRegSrc crx, label labl) %{
11264   match(CountedLoopEnd cmp crx);
11265   effect(USE labl);
11266   ins_cost(BRANCH_COST);
11267 
11268   // Long variant.
11269   ins_short_branch(0);
11270 
11271   format %{ "B_FAR$cmp $crx, $labl \t// counted loop end" %}
11272   size(8);
11273   ins_encode( enc_bc_far(crx, cmp, labl) );
11274   ins_pipe(pipe_class_default);
11275 %}
11276 
11277 // ============================================================================
11278 // Java runtime operations, intrinsics and other complex operations.
11279 
11280 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
11281 // array for an instance of the superklass. Set a hidden internal cache on a
11282 // hit (cache is checked with exposed code in gen_subtype_check()). Return
11283 // not zero for a miss or zero for a hit. The encoding ALSO sets flags.
11284 //
11285 // GL TODO: Improve this.
11286 // - result should not be a TEMP
11287 // - Add match rule as on sparc avoiding additional Cmp.
11288 instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P superklass,
11289                              iRegPdst tmp_klass, iRegPdst tmp_arrayptr) %{
11290   match(Set result (PartialSubtypeCheck subklass superklass));
11291   predicate(!UseSecondarySupersTable);
11292   effect(TEMP_DEF result, TEMP tmp_klass, TEMP tmp_arrayptr);
11293   ins_cost(DEFAULT_COST*10);
11294 
11295   format %{ "PartialSubtypeCheck $result = ($subklass instanceOf $superklass) tmp: $tmp_klass, $tmp_arrayptr" %}
11296   ins_encode %{
11297     __ check_klass_subtype_slow_path($subklass$$Register, $superklass$$Register, $tmp_arrayptr$$Register,
11298                                      $tmp_klass$$Register, nullptr, $result$$Register);
11299   %}
11300   ins_pipe(pipe_class_default);
11301 %}
11302 
11303 // Two versions of partialSubtypeCheck, both used when we need to
11304 // search for a super class in the secondary supers array. The first
11305 // is used when we don't know _a priori_ the class being searched
11306 // for. The second, far more common, is used when we do know: this is
11307 // used for instanceof, checkcast, and any case where C2 can determine
11308 // it by constant propagation.
11309 instruct partialSubtypeCheckVarSuper(iRegPsrc sub, iRegPsrc super, iRegPdst result,
11310                                      iRegPdst tempR1, iRegPdst tempR2, iRegPdst tempR3, iRegPdst tempR4,
11311                                      flagsRegCR0 cr0, regCTR ctr)
11312 %{
11313   match(Set result (PartialSubtypeCheck sub super));
11314   predicate(UseSecondarySupersTable);
11315   effect(KILL cr0, KILL ctr, TEMP_DEF result, TEMP tempR1, TEMP tempR2, TEMP tempR3, TEMP tempR4);
11316 
11317   ins_cost(DEFAULT_COST * 10);  // slightly larger than the next version
11318   format %{ "partialSubtypeCheck $result, $sub, $super" %}
11319   ins_encode %{
11320     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register,
11321                                          $tempR1$$Register, $tempR2$$Register, $tempR3$$Register, $tempR4$$Register,
11322                                          $result$$Register);
11323   %}
11324   ins_pipe(pipe_class_memory);
11325 %}
11326 
11327 instruct partialSubtypeCheckConstSuper(rarg3RegP sub, rarg2RegP super_reg, immP super_con, rarg6RegP result,
11328                                        rarg1RegP tempR1, rarg5RegP tempR2, rarg4RegP tempR3, rscratch1RegP tempR4,
11329                                        flagsRegCR0 cr0, regCTR ctr)
11330 %{
11331   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
11332   predicate(UseSecondarySupersTable);
11333   effect(KILL cr0, KILL ctr, TEMP tempR1, TEMP tempR2, TEMP tempR3, TEMP tempR4);
11334 
11335   ins_cost(DEFAULT_COST*8);  // smaller than the other version
11336   format %{ "partialSubtypeCheck $result, $sub, $super_reg" %}
11337 
11338   ins_encode %{
11339     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
11340     if (InlineSecondarySupersTest) {
11341       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register,
11342                                              $tempR1$$Register, $tempR2$$Register, $tempR3$$Register, $tempR4$$Register,
11343                                              $result$$Register, super_klass_slot);
11344     } else {
11345       address stub = StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot);
11346       Register r_stub_addr = $tempR1$$Register;
11347       __ add_const_optimized(r_stub_addr, R29_TOC, MacroAssembler::offset_to_global_toc(stub), R0);
11348       __ mtctr(r_stub_addr);
11349       __ bctrl();
11350     }
11351   %}
11352 
11353   ins_pipe(pipe_class_memory);
11354 %}
11355 
11356 // inlined locking and unlocking
11357 
11358 instruct cmpFastLock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2) %{
11359   predicate(!UseObjectMonitorTable);
11360   match(Set crx (FastLock oop box));
11361   effect(TEMP tmp1, TEMP tmp2);
11362 
11363   format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2" %}
11364   ins_encode %{
11365     __ fast_lock($crx$$CondRegister, $oop$$Register, $box$$Register,
11366                  $tmp1$$Register, $tmp2$$Register, noreg /*tmp3*/);
11367     // If locking was successful, crx should indicate 'EQ'.
11368     // The compiler generates a branch to the runtime call to
11369     // _complete_monitor_locking_Java for the case where crx is 'NE'.
11370   %}
11371   ins_pipe(pipe_class_compare);
11372 %}
11373 
11374 instruct cmpFastLockMonitorTable(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3, flagsRegCR1 cr1) %{
11375   predicate(UseObjectMonitorTable);
11376   match(Set crx (FastLock oop box));
11377   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr1);
11378 
11379   format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2, $tmp3" %}
11380   ins_encode %{
11381     __ fast_lock($crx$$CondRegister, $oop$$Register, $box$$Register,
11382                  $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
11383     // If locking was successful, crx should indicate 'EQ'.
11384     // The compiler generates a branch to the runtime call to
11385     // _complete_monitor_locking_Java for the case where crx is 'NE'.
11386   %}
11387   ins_pipe(pipe_class_compare);
11388 %}
11389 
11390 instruct cmpFastUnlock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
11391   match(Set crx (FastUnlock oop box));
11392   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
11393 
11394   format %{ "FASTUNLOCK  $oop, $box, $tmp1, $tmp2" %}
11395   ins_encode %{
11396     __ fast_unlock($crx$$CondRegister, $oop$$Register, $box$$Register,
11397                    $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
11398     // If unlocking was successful, crx should indicate 'EQ'.
11399     // The compiler generates a branch to the runtime call to
11400     // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
11401   %}
11402   ins_pipe(pipe_class_compare);
11403 %}
11404 
11405 // Align address.
11406 instruct align_addr(iRegPdst dst, iRegPsrc src, immLnegpow2 mask) %{
11407   match(Set dst (CastX2P (AndL (CastP2X src) mask)));
11408 
11409   format %{ "ANDDI   $dst, $src, $mask \t// next aligned address" %}
11410   size(4);
11411   ins_encode %{
11412     __ clrrdi($dst$$Register, $src$$Register, log2i_exact(-(julong)$mask$$constant));
11413   %}
11414   ins_pipe(pipe_class_default);
11415 %}
11416 
11417 // Array size computation.
11418 instruct array_size(iRegLdst dst, iRegPsrc end, iRegPsrc start) %{
11419   match(Set dst (SubL (CastP2X end) (CastP2X start)));
11420 
11421   format %{ "SUB     $dst, $end, $start \t// array size in bytes" %}
11422   size(4);
11423   ins_encode %{
11424     __ subf($dst$$Register, $start$$Register, $end$$Register);
11425   %}
11426   ins_pipe(pipe_class_default);
11427 %}
11428 
11429 // Clear-array with constant short array length. The versions below can use dcbz with cnt > 30.
11430 instruct inlineCallClearArrayShort(immLmax30 cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{
11431   match(Set dummy (ClearArray cnt base));
11432   effect(USE_KILL base, KILL ctr);
11433   ins_cost(2 * MEMORY_REF_COST);
11434 
11435   format %{ "ClearArray $cnt, $base" %}
11436   ins_encode %{
11437     __ clear_memory_constlen($base$$Register, $cnt$$constant, R0); // kills base, R0
11438   %}
11439   ins_pipe(pipe_class_default);
11440 %}
11441 
11442 // Clear-array with constant large array length.
11443 instruct inlineCallClearArrayLarge(immL cnt, rarg2RegP base, Universe dummy, iRegLdst tmp, regCTR ctr) %{
11444   match(Set dummy (ClearArray cnt base));
11445   effect(USE_KILL base, TEMP tmp, KILL ctr);
11446   ins_cost(3 * MEMORY_REF_COST);
11447 
11448   format %{ "ClearArray $cnt, $base \t// KILL $tmp" %}
11449   ins_encode %{
11450     __ clear_memory_doubleword($base$$Register, $tmp$$Register, R0, $cnt$$constant); // kills base, R0
11451   %}
11452   ins_pipe(pipe_class_default);
11453 %}
11454 
11455 // Clear-array with dynamic array length.
11456 instruct inlineCallClearArray(rarg1RegL cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{
11457   match(Set dummy (ClearArray cnt base));
11458   effect(USE_KILL cnt, USE_KILL base, KILL ctr);
11459   ins_cost(4 * MEMORY_REF_COST);
11460 
11461   format %{ "ClearArray $cnt, $base" %}
11462   ins_encode %{
11463     __ clear_memory_doubleword($base$$Register, $cnt$$Register, R0); // kills cnt, base, R0
11464   %}
11465   ins_pipe(pipe_class_default);
11466 %}
11467 
11468 instruct string_compareL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
11469                          iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
11470   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11471   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11472   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
11473   ins_cost(300);
11474   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
11475   ins_encode %{
11476     __ string_compare($str1$$Register, $str2$$Register,
11477                       $cnt1$$Register, $cnt2$$Register,
11478                       $tmp$$Register,
11479                       $result$$Register, StrIntrinsicNode::LL);
11480   %}
11481   ins_pipe(pipe_class_default);
11482 %}
11483 
11484 instruct string_compareU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
11485                          iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
11486   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11487   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11488   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
11489   ins_cost(300);
11490   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
11491   ins_encode %{
11492     __ string_compare($str1$$Register, $str2$$Register,
11493                       $cnt1$$Register, $cnt2$$Register,
11494                       $tmp$$Register,
11495                       $result$$Register, StrIntrinsicNode::UU);
11496   %}
11497   ins_pipe(pipe_class_default);
11498 %}
11499 
11500 instruct string_compareLU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
11501                           iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
11502   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11503   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11504   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
11505   ins_cost(300);
11506   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
11507   ins_encode %{
11508     __ string_compare($str1$$Register, $str2$$Register,
11509                       $cnt1$$Register, $cnt2$$Register,
11510                       $tmp$$Register,
11511                       $result$$Register, StrIntrinsicNode::LU);
11512   %}
11513   ins_pipe(pipe_class_default);
11514 %}
11515 
11516 instruct string_compareUL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
11517                           iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
11518   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11519   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11520   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
11521   ins_cost(300);
11522   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
11523   ins_encode %{
11524     __ string_compare($str2$$Register, $str1$$Register,
11525                       $cnt2$$Register, $cnt1$$Register,
11526                       $tmp$$Register,
11527                       $result$$Register, StrIntrinsicNode::UL);
11528   %}
11529   ins_pipe(pipe_class_default);
11530 %}
11531 
11532 instruct string_equalsL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt, iRegIdst result,
11533                         iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
11534   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
11535   match(Set result (StrEquals (Binary str1 str2) cnt));
11536   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp, KILL ctr, KILL cr0);
11537   ins_cost(300);
11538   format %{ "String Equals byte[] $str1,$str2,$cnt -> $result \t// KILL $tmp" %}
11539   ins_encode %{
11540     __ array_equals(false, $str1$$Register, $str2$$Register,
11541                     $cnt$$Register, $tmp$$Register,
11542                     $result$$Register, true /* byte */);
11543   %}
11544   ins_pipe(pipe_class_default);
11545 %}
11546 
11547 instruct array_equalsB(rarg1RegP ary1, rarg2RegP ary2, iRegIdst result,
11548                        iRegIdst tmp1, iRegIdst tmp2, regCTR ctr, flagsRegCR0 cr0, flagsRegCR1 cr1) %{
11549   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11550   match(Set result (AryEq ary1 ary2));
11551   effect(TEMP_DEF result, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0, KILL cr1);
11552   ins_cost(300);
11553   format %{ "Array Equals $ary1,$ary2 -> $result \t// KILL $tmp1,$tmp2" %}
11554   ins_encode %{
11555     __ array_equals(true, $ary1$$Register, $ary2$$Register,
11556                     $tmp1$$Register, $tmp2$$Register,
11557                     $result$$Register, true /* byte */);
11558   %}
11559   ins_pipe(pipe_class_default);
11560 %}
11561 
11562 instruct array_equalsC(rarg1RegP ary1, rarg2RegP ary2, iRegIdst result,
11563                        iRegIdst tmp1, iRegIdst tmp2, regCTR ctr, flagsRegCR0 cr0, flagsRegCR1 cr1) %{
11564   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11565   match(Set result (AryEq ary1 ary2));
11566   effect(TEMP_DEF result, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0, KILL cr1);
11567   ins_cost(300);
11568   format %{ "Array Equals $ary1,$ary2 -> $result \t// KILL $tmp1,$tmp2" %}
11569   ins_encode %{
11570     __ array_equals(true, $ary1$$Register, $ary2$$Register,
11571                     $tmp1$$Register, $tmp2$$Register,
11572                     $result$$Register, false /* byte */);
11573   %}
11574   ins_pipe(pipe_class_default);
11575 %}
11576 
11577 instruct indexOf_imm1_char_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
11578                              immP needleImm, immL offsetImm, immI_1 needlecntImm,
11579                              iRegIdst tmp1, iRegIdst tmp2,
11580                              flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
11581   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
11582   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
11583   // Required for EA: check if it is still a type_array.
11584   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
11585   ins_cost(150);
11586 
11587   format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
11588             "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
11589 
11590   ins_encode %{
11591     immPOper *needleOper = (immPOper *)$needleImm;
11592     const TypeOopPtr *t = needleOper->type()->isa_oopptr();
11593     ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
11594     jchar chr;
11595 #ifdef VM_LITTLE_ENDIAN
11596     chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) |
11597            ((jchar)(unsigned char)needle_values->element_value(0).as_byte());
11598 #else
11599     chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
11600            ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
11601 #endif
11602     __ string_indexof_char($result$$Register,
11603                            $haystack$$Register, $haycnt$$Register,
11604                            R0, chr,
11605                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
11606   %}
11607   ins_pipe(pipe_class_compare);
11608 %}
11609 
11610 instruct indexOf_imm1_char_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
11611                              immP needleImm, immL offsetImm, immI_1 needlecntImm,
11612                              iRegIdst tmp1, iRegIdst tmp2,
11613                              flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
11614   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
11615   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
11616   // Required for EA: check if it is still a type_array.
11617   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
11618   ins_cost(150);
11619 
11620   format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
11621             "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
11622 
11623   ins_encode %{
11624     immPOper *needleOper = (immPOper *)$needleImm;
11625     const TypeOopPtr *t = needleOper->type()->isa_oopptr();
11626     ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
11627     jchar chr = (jchar)needle_values->element_value(0).as_byte();
11628     __ string_indexof_char($result$$Register,
11629                            $haystack$$Register, $haycnt$$Register,
11630                            R0, chr,
11631                            $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
11632   %}
11633   ins_pipe(pipe_class_compare);
11634 %}
11635 
11636 instruct indexOf_imm1_char_UL(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
11637                               immP needleImm, immL offsetImm, immI_1 needlecntImm,
11638                               iRegIdst tmp1, iRegIdst tmp2,
11639                               flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
11640   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
11641   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
11642   // Required for EA: check if it is still a type_array.
11643   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
11644   ins_cost(150);
11645 
11646   format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
11647             "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
11648 
11649   ins_encode %{
11650     immPOper *needleOper = (immPOper *)$needleImm;
11651     const TypeOopPtr *t = needleOper->type()->isa_oopptr();
11652     ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
11653     jchar chr = (jchar)needle_values->element_value(0).as_byte();
11654     __ string_indexof_char($result$$Register,
11655                            $haystack$$Register, $haycnt$$Register,
11656                            R0, chr,
11657                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
11658   %}
11659   ins_pipe(pipe_class_compare);
11660 %}
11661 
11662 instruct indexOf_imm1_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
11663                         rscratch2RegP needle, immI_1 needlecntImm,
11664                         iRegIdst tmp1, iRegIdst tmp2,
11665                         flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
11666   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
11667   effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
11668   // Required for EA: check if it is still a type_array.
11669   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU &&
11670             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
11671             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
11672   ins_cost(180);
11673 
11674   format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
11675             " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
11676   ins_encode %{
11677     Node *ndl = in(operand_index($needle));  // The node that defines needle.
11678     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
11679     guarantee(needle_values, "sanity");
11680     jchar chr;
11681 #ifdef VM_LITTLE_ENDIAN
11682     chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) |
11683            ((jchar)(unsigned char)needle_values->element_value(0).as_byte());
11684 #else
11685     chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
11686            ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
11687 #endif
11688     __ string_indexof_char($result$$Register,
11689                            $haystack$$Register, $haycnt$$Register,
11690                            R0, chr,
11691                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
11692   %}
11693   ins_pipe(pipe_class_compare);
11694 %}
11695 
11696 instruct indexOf_imm1_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
11697                         rscratch2RegP needle, immI_1 needlecntImm,
11698                         iRegIdst tmp1, iRegIdst tmp2,
11699                         flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
11700   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
11701   effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
11702   // Required for EA: check if it is still a type_array.
11703   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL &&
11704             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
11705             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
11706   ins_cost(180);
11707 
11708   format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
11709             " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
11710   ins_encode %{
11711     Node *ndl = in(operand_index($needle));  // The node that defines needle.
11712     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
11713     guarantee(needle_values, "sanity");
11714     jchar chr = (jchar)needle_values->element_value(0).as_byte();
11715     __ string_indexof_char($result$$Register,
11716                            $haystack$$Register, $haycnt$$Register,
11717                            R0, chr,
11718                            $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
11719   %}
11720   ins_pipe(pipe_class_compare);
11721 %}
11722 
11723 instruct indexOf_imm1_UL(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
11724                          rscratch2RegP needle, immI_1 needlecntImm,
11725                          iRegIdst tmp1, iRegIdst tmp2,
11726                          flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
11727   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
11728   effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
11729   // Required for EA: check if it is still a type_array.
11730   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL &&
11731             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
11732             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
11733   ins_cost(180);
11734 
11735   format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
11736             " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
11737   ins_encode %{
11738     Node *ndl = in(operand_index($needle));  // The node that defines needle.
11739     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
11740     guarantee(needle_values, "sanity");
11741     jchar chr = (jchar)needle_values->element_value(0).as_byte();
11742     __ string_indexof_char($result$$Register,
11743                            $haystack$$Register, $haycnt$$Register,
11744                            R0, chr,
11745                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
11746   %}
11747   ins_pipe(pipe_class_compare);
11748 %}
11749 
11750 instruct indexOfChar_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
11751                        iRegIsrc ch, iRegIdst tmp1, iRegIdst tmp2,
11752                        flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
11753   match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
11754   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
11755   predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
11756   ins_cost(180);
11757 
11758   format %{ "StringUTF16 IndexOfChar $haystack[0..$haycnt], $ch"
11759             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
11760   ins_encode %{
11761     __ string_indexof_char($result$$Register,
11762                            $haystack$$Register, $haycnt$$Register,
11763                            $ch$$Register, 0 /* this is not used if the character is already in a register */,
11764                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
11765   %}
11766   ins_pipe(pipe_class_compare);
11767 %}
11768 
11769 instruct indexOfChar_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
11770                        iRegIsrc ch, iRegIdst tmp1, iRegIdst tmp2,
11771                        flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
11772   match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
11773   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
11774   predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
11775   ins_cost(180);
11776 
11777   format %{ "StringLatin1 IndexOfChar $haystack[0..$haycnt], $ch"
11778             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
11779   ins_encode %{
11780     __ string_indexof_char($result$$Register,
11781                            $haystack$$Register, $haycnt$$Register,
11782                            $ch$$Register, 0 /* this is not used if the character is already in a register */,
11783                            $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
11784   %}
11785   ins_pipe(pipe_class_compare);
11786 %}
11787 
11788 instruct indexOf_imm_U(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
11789                        iRegPsrc needle, uimmI15 needlecntImm,
11790                        iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
11791                        flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
11792   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
11793   effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
11794          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
11795   // Required for EA: check if it is still a type_array.
11796   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU &&
11797             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
11798             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
11799   ins_cost(250);
11800 
11801   format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
11802             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
11803   ins_encode %{
11804     Node *ndl = in(operand_index($needle));  // The node that defines needle.
11805     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
11806 
11807     __ string_indexof($result$$Register,
11808                       $haystack$$Register, $haycnt$$Register,
11809                       $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
11810                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UU);
11811   %}
11812   ins_pipe(pipe_class_compare);
11813 %}
11814 
11815 instruct indexOf_imm_L(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
11816                        iRegPsrc needle, uimmI15 needlecntImm,
11817                        iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
11818                        flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
11819   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
11820   effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
11821          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
11822   // Required for EA: check if it is still a type_array.
11823   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL &&
11824             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
11825             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
11826   ins_cost(250);
11827 
11828   format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
11829             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
11830   ins_encode %{
11831     Node *ndl = in(operand_index($needle));  // The node that defines needle.
11832     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
11833 
11834     __ string_indexof($result$$Register,
11835                       $haystack$$Register, $haycnt$$Register,
11836                       $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
11837                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::LL);
11838   %}
11839   ins_pipe(pipe_class_compare);
11840 %}
11841 
11842 instruct indexOf_imm_UL(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
11843                         iRegPsrc needle, uimmI15 needlecntImm,
11844                         iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
11845                         flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
11846   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
11847   effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
11848          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
11849   // Required for EA: check if it is still a type_array.
11850   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL &&
11851             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
11852             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
11853   ins_cost(250);
11854 
11855   format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
11856             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
11857   ins_encode %{
11858     Node *ndl = in(operand_index($needle));  // The node that defines needle.
11859     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
11860 
11861     __ string_indexof($result$$Register,
11862                       $haystack$$Register, $haycnt$$Register,
11863                       $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
11864                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UL);
11865   %}
11866   ins_pipe(pipe_class_compare);
11867 %}
11868 
11869 instruct indexOf_U(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
11870                    iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
11871                    flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
11872   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
11873   effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
11874          TEMP_DEF result,
11875          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
11876   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
11877   ins_cost(300);
11878 
11879   format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
11880              " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
11881   ins_encode %{
11882     __ string_indexof($result$$Register,
11883                       $haystack$$Register, $haycnt$$Register,
11884                       $needle$$Register, nullptr, $needlecnt$$Register, 0,  // needlecnt not constant.
11885                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UU);
11886   %}
11887   ins_pipe(pipe_class_compare);
11888 %}
11889 
11890 instruct indexOf_L(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
11891                    iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
11892                    flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
11893   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
11894   effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
11895          TEMP_DEF result,
11896          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
11897   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
11898   ins_cost(300);
11899 
11900   format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
11901              " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
11902   ins_encode %{
11903     __ string_indexof($result$$Register,
11904                       $haystack$$Register, $haycnt$$Register,
11905                       $needle$$Register, nullptr, $needlecnt$$Register, 0,  // needlecnt not constant.
11906                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::LL);
11907   %}
11908   ins_pipe(pipe_class_compare);
11909 %}
11910 
11911 instruct indexOf_UL(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
11912                     iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
11913                     flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
11914   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
11915   effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
11916          TEMP_DEF result,
11917          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
11918   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
11919   ins_cost(300);
11920 
11921   format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
11922              " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
11923   ins_encode %{
11924     __ string_indexof($result$$Register,
11925                       $haystack$$Register, $haycnt$$Register,
11926                       $needle$$Register, nullptr, $needlecnt$$Register, 0,  // needlecnt not constant.
11927                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UL);
11928   %}
11929   ins_pipe(pipe_class_compare);
11930 %}
11931 
11932 // char[] to byte[] compression
11933 instruct string_compress(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1,
11934                          iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
11935   match(Set result (StrCompressedCopy src (Binary dst len)));
11936   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
11937          USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
11938   ins_cost(300);
11939   format %{ "String Compress $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
11940   ins_encode %{
11941     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register, $tmp2$$Register,
11942                         $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, $result$$Register, false);
11943   %}
11944   ins_pipe(pipe_class_default);
11945 %}
11946 
11947 // byte[] to char[] inflation
11948 instruct string_inflate(Universe dummy, rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegLdst tmp1,
11949                         iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
11950   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11951   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
11952   ins_cost(300);
11953   format %{ "String Inflate $src,$dst,$len \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
11954   ins_encode %{
11955     Label Ldone;
11956     __ string_inflate_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
11957                          $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register);
11958     __ rldicl_($tmp1$$Register, $len$$Register, 0, 64-3); // Remaining characters.
11959     __ beq(CR0, Ldone);
11960     __ string_inflate($src$$Register, $dst$$Register, $tmp1$$Register, $tmp2$$Register);
11961     __ bind(Ldone);
11962   %}
11963   ins_pipe(pipe_class_default);
11964 %}
11965 
11966 // StringCoding.java intrinsics
11967 instruct count_positives(iRegPsrc ary1, iRegIsrc len, iRegIdst result, iRegLdst tmp1, iRegLdst tmp2,
11968                          regCTR ctr, flagsRegCR0 cr0)
11969 %{
11970   match(Set result (CountPositives ary1 len));
11971   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0);
11972   ins_cost(300);
11973   format %{ "count positives byte[] $ary1,$len -> $result \t// KILL $tmp1, $tmp2" %}
11974   ins_encode %{
11975     __ count_positives($ary1$$Register, $len$$Register, $result$$Register,
11976                        $tmp1$$Register, $tmp2$$Register);
11977   %}
11978   ins_pipe(pipe_class_default);
11979 %}
11980 
11981 // encode char[] to byte[] in ISO_8859_1
11982 instruct encode_iso_array(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1,
11983                           iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
11984   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
11985   match(Set result (EncodeISOArray src (Binary dst len)));
11986   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
11987          USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
11988   ins_cost(300);
11989   format %{ "Encode iso array $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
11990   ins_encode %{
11991     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register, $tmp2$$Register,
11992                         $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, $result$$Register, false);
11993   %}
11994   ins_pipe(pipe_class_default);
11995 %}
11996 
11997 // encode char[] to byte[] in ASCII
11998 instruct encode_ascii_array(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1,
11999                           iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
12000   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12001   match(Set result (EncodeISOArray src (Binary dst len)));
12002   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
12003          USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
12004   ins_cost(300);
12005   format %{ "Encode ascii array $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
12006   ins_encode %{
12007     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register, $tmp2$$Register,
12008                         $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, $result$$Register, true);
12009   %}
12010   ins_pipe(pipe_class_default);
12011 %}
12012 
12013 
12014 //---------- Min/Max Instructions ---------------------------------------------
12015 
12016 
12017 instruct minI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
12018   match(Set dst (MinI src1 src2));
12019   effect(KILL cr0);
12020   ins_cost(DEFAULT_COST*2);
12021 
12022   size(8);
12023   ins_encode %{
12024     __ cmpw(CR0, $src1$$Register, $src2$$Register);
12025     __ isel($dst$$Register, CR0, Assembler::less, /*invert*/false, $src1$$Register, $src2$$Register);
12026   %}
12027   ins_pipe(pipe_class_default);
12028 %}
12029 
12030 
12031 instruct maxI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
12032   match(Set dst (MaxI src1 src2));
12033   effect(KILL cr0);
12034   ins_cost(DEFAULT_COST*2);
12035 
12036   size(8);
12037   ins_encode %{
12038     __ cmpw(CR0, $src1$$Register, $src2$$Register);
12039     __ isel($dst$$Register, CR0, Assembler::greater, /*invert*/false, $src1$$Register, $src2$$Register);
12040   %}
12041   ins_pipe(pipe_class_default);
12042 %}
12043 
12044 instruct minF(regF dst, regF src1, regF src2) %{
12045   match(Set dst (MinF src1 src2));
12046   predicate(PowerArchitecturePPC64 >= 9);
12047   ins_cost(DEFAULT_COST);
12048 
12049   format %{ "XSMINJDP $dst, $src1, $src2\t// MinF" %}
12050   size(4);
12051   ins_encode %{
12052     __ xsminjdp($dst$$FloatRegister->to_vsr(), $src1$$FloatRegister->to_vsr(), $src2$$FloatRegister->to_vsr());
12053   %}
12054   ins_pipe(pipe_class_default);
12055 %}
12056 
12057 instruct minD(regD dst, regD src1, regD src2) %{
12058   match(Set dst (MinD src1 src2));
12059   predicate(PowerArchitecturePPC64 >= 9);
12060   ins_cost(DEFAULT_COST);
12061 
12062   format %{ "XSMINJDP $dst, $src1, $src2\t// MinD" %}
12063   size(4);
12064   ins_encode %{
12065     __ xsminjdp($dst$$FloatRegister->to_vsr(), $src1$$FloatRegister->to_vsr(), $src2$$FloatRegister->to_vsr());
12066   %}
12067   ins_pipe(pipe_class_default);
12068 %}
12069 
12070 instruct maxF(regF dst, regF src1, regF src2) %{
12071   match(Set dst (MaxF src1 src2));
12072   predicate(PowerArchitecturePPC64 >= 9);
12073   ins_cost(DEFAULT_COST);
12074 
12075   format %{ "XSMAXJDP $dst, $src1, $src2\t// MaxF" %}
12076   size(4);
12077   ins_encode %{
12078     __ xsmaxjdp($dst$$FloatRegister->to_vsr(), $src1$$FloatRegister->to_vsr(), $src2$$FloatRegister->to_vsr());
12079   %}
12080   ins_pipe(pipe_class_default);
12081 %}
12082 
12083 instruct maxD(regD dst, regD src1, regD src2) %{
12084   match(Set dst (MaxD src1 src2));
12085   predicate(PowerArchitecturePPC64 >= 9);
12086   ins_cost(DEFAULT_COST);
12087 
12088   format %{ "XSMAXJDP $dst, $src1, $src2\t// MaxD" %}
12089   size(4);
12090   ins_encode %{
12091     __ xsmaxjdp($dst$$FloatRegister->to_vsr(), $src1$$FloatRegister->to_vsr(), $src2$$FloatRegister->to_vsr());
12092   %}
12093   ins_pipe(pipe_class_default);
12094 %}
12095 
12096 //---------- Population Count Instructions ------------------------------------
12097 
12098 instruct popCountI(iRegIdst dst, iRegIsrc src) %{
12099   match(Set dst (PopCountI src));
12100   predicate(UsePopCountInstruction);
12101   ins_cost(DEFAULT_COST);
12102 
12103   format %{ "POPCNTW $dst, $src" %}
12104   size(4);
12105   ins_encode %{
12106     __ popcntw($dst$$Register, $src$$Register);
12107   %}
12108   ins_pipe(pipe_class_default);
12109 %}
12110 
12111 instruct popCountL(iRegIdst dst, iRegLsrc src) %{
12112   predicate(UsePopCountInstruction);
12113   match(Set dst (PopCountL src));
12114   ins_cost(DEFAULT_COST);
12115 
12116   format %{ "POPCNTD $dst, $src" %}
12117   size(4);
12118   ins_encode %{
12119     __ popcntd($dst$$Register, $src$$Register);
12120   %}
12121   ins_pipe(pipe_class_default);
12122 %}
12123 
12124 instruct countLeadingZerosI(iRegIdst dst, iRegIsrc src) %{
12125   match(Set dst (CountLeadingZerosI src));
12126   predicate(UseCountLeadingZerosInstructionsPPC64);  // See Matcher::match_rule_supported.
12127   ins_cost(DEFAULT_COST);
12128 
12129   format %{ "CNTLZW  $dst, $src" %}
12130   size(4);
12131   ins_encode %{
12132     __ cntlzw($dst$$Register, $src$$Register);
12133   %}
12134   ins_pipe(pipe_class_default);
12135 %}
12136 
12137 instruct countLeadingZerosL(iRegIdst dst, iRegLsrc src) %{
12138   match(Set dst (CountLeadingZerosL src));
12139   predicate(UseCountLeadingZerosInstructionsPPC64);  // See Matcher::match_rule_supported.
12140   ins_cost(DEFAULT_COST);
12141 
12142   format %{ "CNTLZD  $dst, $src" %}
12143   size(4);
12144   ins_encode %{
12145     __ cntlzd($dst$$Register, $src$$Register);
12146   %}
12147   ins_pipe(pipe_class_default);
12148 %}
12149 
12150 instruct countLeadingZerosP(iRegIdst dst, iRegPsrc src) %{
12151   // no match-rule, false predicate
12152   effect(DEF dst, USE src);
12153   predicate(false);
12154 
12155   format %{ "CNTLZD  $dst, $src" %}
12156   size(4);
12157   ins_encode %{
12158     __ cntlzd($dst$$Register, $src$$Register);
12159   %}
12160   ins_pipe(pipe_class_default);
12161 %}
12162 
12163 instruct countTrailingZerosI_Ex(iRegIdst dst, iRegIsrc src) %{
12164   match(Set dst (CountTrailingZerosI src));
12165   predicate(UseCountLeadingZerosInstructionsPPC64 && !UseCountTrailingZerosInstructionsPPC64);
12166   ins_cost(DEFAULT_COST);
12167 
12168   expand %{
12169     immI16 imm1 %{ (int)-1 %}
12170     immI16 imm2 %{ (int)32 %}
12171     immI_minus1 m1 %{ -1 %}
12172     iRegIdst tmpI1;
12173     iRegIdst tmpI2;
12174     iRegIdst tmpI3;
12175     addI_reg_imm16(tmpI1, src, imm1);
12176     andcI_reg_reg(tmpI2, src, m1, tmpI1);
12177     countLeadingZerosI(tmpI3, tmpI2);
12178     subI_imm16_reg(dst, imm2, tmpI3);
12179   %}
12180 %}
12181 
12182 instruct countTrailingZerosI_cnttzw(iRegIdst dst, iRegIsrc src) %{
12183   match(Set dst (CountTrailingZerosI src));
12184   predicate(UseCountTrailingZerosInstructionsPPC64);
12185   ins_cost(DEFAULT_COST);
12186 
12187   format %{ "CNTTZW  $dst, $src" %}
12188   size(4);
12189   ins_encode %{
12190     __ cnttzw($dst$$Register, $src$$Register);
12191   %}
12192   ins_pipe(pipe_class_default);
12193 %}
12194 
12195 instruct countTrailingZerosL_Ex(iRegIdst dst, iRegLsrc src) %{
12196   match(Set dst (CountTrailingZerosL src));
12197   predicate(UseCountLeadingZerosInstructionsPPC64 && !UseCountTrailingZerosInstructionsPPC64);
12198   ins_cost(DEFAULT_COST);
12199 
12200   expand %{
12201     immL16 imm1 %{ (long)-1 %}
12202     immI16 imm2 %{ (int)64 %}
12203     iRegLdst tmpL1;
12204     iRegLdst tmpL2;
12205     iRegIdst tmpL3;
12206     addL_reg_imm16(tmpL1, src, imm1);
12207     andcL_reg_reg(tmpL2, tmpL1, src);
12208     countLeadingZerosL(tmpL3, tmpL2);
12209     subI_imm16_reg(dst, imm2, tmpL3);
12210  %}
12211 %}
12212 
12213 instruct countTrailingZerosL_cnttzd(iRegIdst dst, iRegLsrc src) %{
12214   match(Set dst (CountTrailingZerosL src));
12215   predicate(UseCountTrailingZerosInstructionsPPC64);
12216   ins_cost(DEFAULT_COST);
12217 
12218   format %{ "CNTTZD  $dst, $src" %}
12219   size(4);
12220   ins_encode %{
12221     __ cnttzd($dst$$Register, $src$$Register);
12222   %}
12223   ins_pipe(pipe_class_default);
12224 %}
12225 
12226 // Expand nodes for byte_reverse_int/ushort/short.
12227 instruct rlwinm(iRegIdst dst, iRegIsrc src, immI16 shift, immI16 mb, immI16 me) %{
12228   effect(DEF dst, USE src, USE shift, USE mb, USE me);
12229   predicate(false);
12230 
12231   format %{ "RLWINM  $dst, $src, $shift, $mb, $me" %}
12232   size(4);
12233   ins_encode %{
12234     __ rlwinm($dst$$Register, $src$$Register, $shift$$constant, $mb$$constant, $me$$constant);
12235   %}
12236   ins_pipe(pipe_class_default);
12237 %}
12238 
12239 // Expand nodes for byte_reverse_int.
12240 instruct insrwi_a(iRegIdst dst, iRegIsrc src, immI16 n, immI16 b) %{
12241   effect(DEF dst, USE src, USE n, USE b);
12242   predicate(false);
12243 
12244   format %{ "INSRWI  $dst, $src, $n, $b" %}
12245   size(4);
12246   ins_encode %{
12247     __ insrwi($dst$$Register, $src$$Register, $n$$constant, $b$$constant);
12248   %}
12249   ins_pipe(pipe_class_default);
12250 %}
12251 
12252 // As insrwi_a, but with USE_DEF.
12253 instruct insrwi(iRegIdst dst, iRegIsrc src, immI16 n, immI16 b) %{
12254   effect(USE_DEF dst, USE src, USE n, USE b);
12255   predicate(false);
12256 
12257   format %{ "INSRWI  $dst, $src, $n, $b" %}
12258   size(4);
12259   ins_encode %{
12260     __ insrwi($dst$$Register, $src$$Register, $n$$constant, $b$$constant);
12261   %}
12262   ins_pipe(pipe_class_default);
12263 %}
12264 
12265 // Just slightly faster than java implementation.
12266 instruct bytes_reverse_int_Ex(iRegIdst dst, iRegIsrc src) %{
12267   match(Set dst (ReverseBytesI src));
12268   predicate(!UseByteReverseInstructions);
12269   ins_cost(7*DEFAULT_COST);
12270 
12271   expand %{
12272     immI16 imm24 %{ (int) 24 %}
12273     immI16 imm16 %{ (int) 16 %}
12274     immI16  imm8 %{ (int)  8 %}
12275     immI16  imm4 %{ (int)  4 %}
12276     immI16  imm0 %{ (int)  0 %}
12277     iRegLdst tmpI1;
12278     iRegLdst tmpI2;
12279     iRegLdst tmpI3;
12280 
12281     urShiftI_reg_imm(tmpI1, src, imm24);
12282     insrwi_a(dst, tmpI1, imm8, imm24);
12283     urShiftI_reg_imm(tmpI2, src, imm16);
12284     insrwi(dst, tmpI2, imm16, imm8);
12285     urShiftI_reg_imm(tmpI3, src, imm8);
12286     insrwi(dst, tmpI3, imm8, imm8);
12287     insrwi(dst, src, imm8, imm0);
12288   %}
12289 %}
12290 
12291 instruct bytes_reverse_int_vec(iRegIdst dst, iRegIsrc src, vecX tmpV) %{
12292   match(Set dst (ReverseBytesI src));
12293   predicate(UseVectorByteReverseInstructionsPPC64);
12294   effect(TEMP tmpV);
12295   ins_cost(DEFAULT_COST*3);
12296   size(12);
12297   format %{ "MTVSRWZ $tmpV, $src\n"
12298             "\tXXBRW   $tmpV, $tmpV\n"
12299             "\tMFVSRWZ $dst, $tmpV" %}
12300 
12301   ins_encode %{
12302     __ mtvsrwz($tmpV$$VectorRegister.to_vsr(), $src$$Register);
12303     __ xxbrw($tmpV$$VectorRegister.to_vsr(), $tmpV$$VectorRegister->to_vsr());
12304     __ mfvsrwz($dst$$Register, $tmpV$$VectorRegister->to_vsr());
12305   %}
12306   ins_pipe(pipe_class_default);
12307 %}
12308 
12309 instruct bytes_reverse_int(iRegIdst dst, iRegIsrc src) %{
12310   match(Set dst (ReverseBytesI src));
12311   predicate(UseByteReverseInstructions);
12312   ins_cost(DEFAULT_COST);
12313   size(4);
12314 
12315   format %{ "BRW  $dst, $src" %}
12316 
12317   ins_encode %{
12318     __ brw($dst$$Register, $src$$Register);
12319   %}
12320   ins_pipe(pipe_class_default);
12321 %}
12322 
12323 instruct bytes_reverse_long_Ex(iRegLdst dst, iRegLsrc src) %{
12324   match(Set dst (ReverseBytesL src));
12325   predicate(!UseByteReverseInstructions);
12326   ins_cost(15*DEFAULT_COST);
12327 
12328   expand %{
12329     immI16 imm56 %{ (int) 56 %}
12330     immI16 imm48 %{ (int) 48 %}
12331     immI16 imm40 %{ (int) 40 %}
12332     immI16 imm32 %{ (int) 32 %}
12333     immI16 imm24 %{ (int) 24 %}
12334     immI16 imm16 %{ (int) 16 %}
12335     immI16  imm8 %{ (int)  8 %}
12336     immI16  imm0 %{ (int)  0 %}
12337     iRegLdst tmpL1;
12338     iRegLdst tmpL2;
12339     iRegLdst tmpL3;
12340     iRegLdst tmpL4;
12341     iRegLdst tmpL5;
12342     iRegLdst tmpL6;
12343 
12344                                         // src   : |a|b|c|d|e|f|g|h|
12345     rldicl(tmpL1, src, imm8, imm24);    // tmpL1 : | | | |e|f|g|h|a|
12346     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |a| | | |e|
12347     rldicl(tmpL3, tmpL2, imm32, imm0);  // tmpL3 : | | | |e| | | |a|
12348     rldicl(tmpL1, src, imm16, imm24);   // tmpL1 : | | | |f|g|h|a|b|
12349     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |b| | | |f|
12350     rldicl(tmpL4, tmpL2, imm40, imm0);  // tmpL4 : | | |f| | | |b| |
12351     orL_reg_reg(tmpL5, tmpL3, tmpL4);   // tmpL5 : | | |f|e| | |b|a|
12352     rldicl(tmpL1, src, imm24, imm24);   // tmpL1 : | | | |g|h|a|b|c|
12353     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |c| | | |g|
12354     rldicl(tmpL3, tmpL2, imm48, imm0);  // tmpL3 : | |g| | | |c| | |
12355     rldicl(tmpL1, src, imm32, imm24);   // tmpL1 : | | | |h|a|b|c|d|
12356     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |d| | | |h|
12357     rldicl(tmpL4, tmpL2, imm56, imm0);  // tmpL4 : |h| | | |d| | | |
12358     orL_reg_reg(tmpL6, tmpL3, tmpL4);   // tmpL6 : |h|g| | |d|c| | |
12359     orL_reg_reg(dst, tmpL5, tmpL6);     // dst   : |h|g|f|e|d|c|b|a|
12360   %}
12361 %}
12362 
12363 instruct bytes_reverse_long_vec(iRegLdst dst, iRegLsrc src, vecX tmpV) %{
12364   match(Set dst (ReverseBytesL src));
12365   predicate(UseVectorByteReverseInstructionsPPC64);
12366   effect(TEMP tmpV);
12367   ins_cost(DEFAULT_COST*3);
12368   size(12);
12369   format %{ "MTVSRD  $tmpV, $src\n"
12370             "\tXXBRD   $tmpV, $tmpV\n"
12371             "\tMFVSRD  $dst, $tmpV" %}
12372 
12373   ins_encode %{
12374     __ mtvsrd($tmpV$$VectorRegister->to_vsr(), $src$$Register);
12375     __ xxbrd($tmpV$$VectorRegister->to_vsr(), $tmpV$$VectorRegister->to_vsr());
12376     __ mfvsrd($dst$$Register, $tmpV$$VectorRegister->to_vsr());
12377   %}
12378   ins_pipe(pipe_class_default);
12379 %}
12380 
12381 instruct bytes_reverse_long(iRegLdst dst, iRegLsrc src) %{
12382   match(Set dst (ReverseBytesL src));
12383   predicate(UseByteReverseInstructions);
12384   ins_cost(DEFAULT_COST);
12385   size(4);
12386 
12387   format %{ "BRD  $dst, $src" %}
12388 
12389   ins_encode %{
12390     __ brd($dst$$Register, $src$$Register);
12391   %}
12392   ins_pipe(pipe_class_default);
12393 %}
12394 
12395 // Need zero extend. Must not use brh only.
12396 instruct bytes_reverse_ushort_Ex(iRegIdst dst, iRegIsrc src) %{
12397   match(Set dst (ReverseBytesUS src));
12398   ins_cost(2*DEFAULT_COST);
12399 
12400   expand %{
12401     immI16  imm31 %{ (int) 31 %}
12402     immI16  imm24 %{ (int) 24 %}
12403     immI16  imm16 %{ (int) 16 %}
12404     immI16   imm8 %{ (int)  8 %}
12405 
12406     rlwinm(dst, src, imm24, imm24, imm31);
12407     insrwi(dst, src, imm8, imm16);
12408   %}
12409 %}
12410 
12411 instruct bytes_reverse_short_Ex(iRegIdst dst, iRegIsrc src) %{
12412   match(Set dst (ReverseBytesS src));
12413   predicate(!UseByteReverseInstructions);
12414   ins_cost(3*DEFAULT_COST);
12415 
12416   expand %{
12417     immI16  imm16 %{ (int) 16 %}
12418     immI16   imm8 %{ (int)  8 %}
12419     iRegLdst tmpI1;
12420 
12421     urShiftI_reg_imm(tmpI1, src, imm8);
12422     insrwi(tmpI1, src, imm8, imm16);
12423     extsh(dst, tmpI1);
12424   %}
12425 %}
12426 
12427 instruct bytes_reverse_short(iRegIdst dst, iRegIsrc src) %{
12428   match(Set dst (ReverseBytesS src));
12429   predicate(UseByteReverseInstructions);
12430   ins_cost(DEFAULT_COST);
12431   size(8);
12432 
12433   format %{ "BRH   $dst, $src\n\t"
12434             "EXTSH $dst, $dst" %}
12435 
12436   ins_encode %{
12437     __ brh($dst$$Register, $src$$Register);
12438     __ extsh($dst$$Register, $dst$$Register);
12439   %}
12440   ins_pipe(pipe_class_default);
12441 %}
12442 
12443 // Load Integer reversed byte order
12444 instruct loadI_reversed(iRegIdst dst, indirect mem) %{
12445   match(Set dst (ReverseBytesI (LoadI mem)));
12446   predicate(n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)));
12447   ins_cost(MEMORY_REF_COST);
12448 
12449   size(4);
12450   ins_encode %{
12451     __ lwbrx($dst$$Register, $mem$$Register);
12452   %}
12453   ins_pipe(pipe_class_default);
12454 %}
12455 
12456 instruct loadI_reversed_acquire(iRegIdst dst, indirect mem) %{
12457   match(Set dst (ReverseBytesI (LoadI mem)));
12458   ins_cost(2 * MEMORY_REF_COST);
12459 
12460   size(12);
12461   ins_encode %{
12462     __ lwbrx($dst$$Register, $mem$$Register);
12463     __ twi_0($dst$$Register);
12464     __ isync();
12465   %}
12466   ins_pipe(pipe_class_default);
12467 %}
12468 
12469 // Load Long - aligned and reversed
12470 instruct loadL_reversed(iRegLdst dst, indirect mem) %{
12471   match(Set dst (ReverseBytesL (LoadL mem)));
12472   predicate((n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1))));
12473   ins_cost(MEMORY_REF_COST);
12474 
12475   size(4);
12476   ins_encode %{
12477     __ ldbrx($dst$$Register, $mem$$Register);
12478   %}
12479   ins_pipe(pipe_class_default);
12480 %}
12481 
12482 instruct loadL_reversed_acquire(iRegLdst dst, indirect mem) %{
12483   match(Set dst (ReverseBytesL (LoadL mem)));
12484   ins_cost(2 * MEMORY_REF_COST);
12485 
12486   size(12);
12487   ins_encode %{
12488     __ ldbrx($dst$$Register, $mem$$Register);
12489     __ twi_0($dst$$Register);
12490     __ isync();
12491   %}
12492   ins_pipe(pipe_class_default);
12493 %}
12494 
12495 // Load unsigned short / char reversed byte order
12496 instruct loadUS_reversed(iRegIdst dst, indirect mem) %{
12497   match(Set dst (ReverseBytesUS (LoadUS mem)));
12498   predicate(n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)));
12499   ins_cost(MEMORY_REF_COST);
12500 
12501   size(4);
12502   ins_encode %{
12503     __ lhbrx($dst$$Register, $mem$$Register);
12504   %}
12505   ins_pipe(pipe_class_default);
12506 %}
12507 
12508 instruct loadUS_reversed_acquire(iRegIdst dst, indirect mem) %{
12509   match(Set dst (ReverseBytesUS (LoadUS mem)));
12510   ins_cost(2 * MEMORY_REF_COST);
12511 
12512   size(12);
12513   ins_encode %{
12514     __ lhbrx($dst$$Register, $mem$$Register);
12515     __ twi_0($dst$$Register);
12516     __ isync();
12517   %}
12518   ins_pipe(pipe_class_default);
12519 %}
12520 
12521 // Load short reversed byte order
12522 instruct loadS_reversed(iRegIdst dst, indirect mem) %{
12523   match(Set dst (ReverseBytesS (LoadS mem)));
12524   predicate(n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)));
12525   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
12526 
12527   size(8);
12528   ins_encode %{
12529     __ lhbrx($dst$$Register, $mem$$Register);
12530     __ extsh($dst$$Register, $dst$$Register);
12531   %}
12532   ins_pipe(pipe_class_default);
12533 %}
12534 
12535 instruct loadS_reversed_acquire(iRegIdst dst, indirect mem) %{
12536   match(Set dst (ReverseBytesS (LoadS mem)));
12537   ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
12538 
12539   size(16);
12540   ins_encode %{
12541     __ lhbrx($dst$$Register, $mem$$Register);
12542     __ twi_0($dst$$Register);
12543     __ extsh($dst$$Register, $dst$$Register);
12544     __ isync();
12545   %}
12546   ins_pipe(pipe_class_default);
12547 %}
12548 
12549 // Store Integer reversed byte order
12550 instruct storeI_reversed(iRegIsrc src, indirect mem) %{
12551   match(Set mem (StoreI mem (ReverseBytesI src)));
12552   ins_cost(MEMORY_REF_COST);
12553 
12554   size(4);
12555   ins_encode %{
12556     __ stwbrx($src$$Register, $mem$$Register);
12557   %}
12558   ins_pipe(pipe_class_default);
12559 %}
12560 
12561 // Store Long reversed byte order
12562 instruct storeL_reversed(iRegLsrc src, indirect mem) %{
12563   match(Set mem (StoreL mem (ReverseBytesL src)));
12564   ins_cost(MEMORY_REF_COST);
12565 
12566   size(4);
12567   ins_encode %{
12568     __ stdbrx($src$$Register, $mem$$Register);
12569   %}
12570   ins_pipe(pipe_class_default);
12571 %}
12572 
12573 // Store unsigned short / char reversed byte order
12574 instruct storeUS_reversed(iRegIsrc src, indirect mem) %{
12575   match(Set mem (StoreC mem (ReverseBytesUS src)));
12576   ins_cost(MEMORY_REF_COST);
12577 
12578   size(4);
12579   ins_encode %{
12580     __ sthbrx($src$$Register, $mem$$Register);
12581   %}
12582   ins_pipe(pipe_class_default);
12583 %}
12584 
12585 // Store short reversed byte order
12586 instruct storeS_reversed(iRegIsrc src, indirect mem) %{
12587   match(Set mem (StoreC mem (ReverseBytesS src)));
12588   ins_cost(MEMORY_REF_COST);
12589 
12590   size(4);
12591   ins_encode %{
12592     __ sthbrx($src$$Register, $mem$$Register);
12593   %}
12594   ins_pipe(pipe_class_default);
12595 %}
12596 
12597 instruct mtvsrwz(vecX temp1, iRegIsrc src) %{
12598   effect(DEF temp1, USE src);
12599 
12600   format %{ "MTVSRWZ $temp1, $src \t// Move to 16-byte register" %}
12601   size(4);
12602   ins_encode %{
12603     __ mtvsrwz($temp1$$VectorRegister->to_vsr(), $src$$Register);
12604   %}
12605   ins_pipe(pipe_class_default);
12606 %}
12607 
12608 instruct xxspltw(vecX dst, vecX src, immI8 imm1) %{
12609   effect(DEF dst, USE src, USE imm1);
12610 
12611   format %{ "XXSPLTW $dst, $src, $imm1 \t// Splat word" %}
12612   size(4);
12613   ins_encode %{
12614     __ xxspltw($dst$$VectorRegister->to_vsr(), $src$$VectorRegister->to_vsr(), $imm1$$constant);
12615   %}
12616   ins_pipe(pipe_class_default);
12617 %}
12618 
12619 instruct xscvdpspn_regF(vecX dst, regF src) %{
12620   effect(DEF dst, USE src);
12621 
12622   format %{ "XSCVDPSPN $dst, $src \t// Convert scalar single precision to vector single precision" %}
12623   size(4);
12624   ins_encode %{
12625     __ xscvdpspn($dst$$VectorRegister->to_vsr(), $src$$FloatRegister->to_vsr());
12626   %}
12627   ins_pipe(pipe_class_default);
12628 %}
12629 
12630 //---------- Replicate Vector Instructions ------------------------------------
12631 
12632 // Insrdi does replicate if src == dst.
12633 instruct repl32(iRegLdst dst) %{
12634   predicate(false);
12635   effect(USE_DEF dst);
12636 
12637   format %{ "INSRDI  $dst, #0, $dst, #32 \t// replicate" %}
12638   size(4);
12639   ins_encode %{
12640     __ insrdi($dst$$Register, $dst$$Register, 32, 0);
12641   %}
12642   ins_pipe(pipe_class_default);
12643 %}
12644 
12645 // Insrdi does replicate if src == dst.
12646 instruct repl48(iRegLdst dst) %{
12647   predicate(false);
12648   effect(USE_DEF dst);
12649 
12650   format %{ "INSRDI  $dst, #0, $dst, #48 \t// replicate" %}
12651   size(4);
12652   ins_encode %{
12653     __ insrdi($dst$$Register, $dst$$Register, 48, 0);
12654   %}
12655   ins_pipe(pipe_class_default);
12656 %}
12657 
12658 // Insrdi does replicate if src == dst.
12659 instruct repl56(iRegLdst dst) %{
12660   predicate(false);
12661   effect(USE_DEF dst);
12662 
12663   format %{ "INSRDI  $dst, #0, $dst, #56 \t// replicate" %}
12664   size(4);
12665   ins_encode %{
12666     __ insrdi($dst$$Register, $dst$$Register, 56, 0);
12667   %}
12668   ins_pipe(pipe_class_default);
12669 %}
12670 
12671 instruct repl8B_reg_Ex(iRegLdst dst, iRegIsrc src) %{
12672   match(Set dst (Replicate src));
12673   predicate(n->as_Vector()->length() == 8 &&
12674             Matcher::vector_element_basic_type(n) == T_BYTE);
12675   expand %{
12676     moveReg(dst, src);
12677     repl56(dst);
12678     repl48(dst);
12679     repl32(dst);
12680   %}
12681 %}
12682 
12683 instruct repl8B_immI0(iRegLdst dst, immI_0 zero) %{
12684   match(Set dst (Replicate zero));
12685   predicate(n->as_Vector()->length() == 8 &&
12686             Matcher::vector_element_basic_type(n) == T_BYTE);
12687   format %{ "LI      $dst, #0 \t// replicate8B" %}
12688   size(4);
12689   ins_encode %{
12690     __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
12691   %}
12692   ins_pipe(pipe_class_default);
12693 %}
12694 
12695 instruct repl8B_immIminus1(iRegLdst dst, immI_minus1 src) %{
12696   match(Set dst (Replicate src));
12697   predicate(n->as_Vector()->length() == 8 &&
12698             Matcher::vector_element_basic_type(n) == T_BYTE);
12699   format %{ "LI      $dst, #-1 \t// replicate8B" %}
12700   size(4);
12701   ins_encode %{
12702     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
12703   %}
12704   ins_pipe(pipe_class_default);
12705 %}
12706 
12707 instruct repl16B_reg_Ex(vecX dst, iRegIsrc src) %{
12708   match(Set dst (Replicate src));
12709   predicate(n->as_Vector()->length() == 16 &&
12710             Matcher::vector_element_basic_type(n) == T_BYTE);
12711 
12712   expand %{
12713     iRegLdst tmpL;
12714     vecX tmpV;
12715     immI8  imm1 %{ (int)  1 %}
12716     moveReg(tmpL, src);
12717     repl56(tmpL);
12718     repl48(tmpL);
12719     mtvsrwz(tmpV, tmpL);
12720     xxspltw(dst, tmpV, imm1);
12721   %}
12722 %}
12723 
12724 instruct repl16B_immI0(vecX dst, immI_0 zero) %{
12725   match(Set dst (Replicate zero));
12726   predicate(n->as_Vector()->length() == 16 &&
12727             Matcher::vector_element_basic_type(n) == T_BYTE);
12728 
12729   format %{ "XXLXOR      $dst, $zero \t// replicate16B" %}
12730   size(4);
12731   ins_encode %{
12732     __ xxlxor($dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr());
12733   %}
12734   ins_pipe(pipe_class_default);
12735 %}
12736 
12737 instruct repl16B_immIminus1(vecX dst, immI_minus1 src) %{
12738   match(Set dst (Replicate src));
12739   predicate(n->as_Vector()->length() == 16 &&
12740             Matcher::vector_element_basic_type(n) == T_BYTE);
12741 
12742   format %{ "XXLEQV      $dst, $src \t// replicate16B" %}
12743   size(4);
12744   ins_encode %{
12745     __ xxleqv($dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr());
12746   %}
12747   ins_pipe(pipe_class_default);
12748 %}
12749 
12750 instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{
12751   match(Set dst (Replicate src));
12752   predicate(n->as_Vector()->length() == 4 &&
12753             Matcher::vector_element_basic_type(n) == T_SHORT);
12754   expand %{
12755     moveReg(dst, src);
12756     repl48(dst);
12757     repl32(dst);
12758   %}
12759 %}
12760 
12761 instruct repl4S_immI0(iRegLdst dst, immI_0 zero) %{
12762   match(Set dst (Replicate zero));
12763   predicate(n->as_Vector()->length() == 4 &&
12764             Matcher::vector_element_basic_type(n) == T_SHORT);
12765   format %{ "LI      $dst, #0 \t// replicate4S" %}
12766   size(4);
12767   ins_encode %{
12768     __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
12769   %}
12770   ins_pipe(pipe_class_default);
12771 %}
12772 
12773 instruct repl4S_immIminus1(iRegLdst dst, immI_minus1 src) %{
12774   match(Set dst (Replicate src));
12775   predicate(n->as_Vector()->length() == 4 &&
12776             Matcher::vector_element_basic_type(n) == T_SHORT);
12777   format %{ "LI      $dst, -1 \t// replicate4S" %}
12778   size(4);
12779   ins_encode %{
12780     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
12781   %}
12782   ins_pipe(pipe_class_default);
12783 %}
12784 
12785 instruct repl8S_reg_Ex(vecX dst, iRegIsrc src) %{
12786   match(Set dst (Replicate src));
12787   predicate(n->as_Vector()->length() == 8 &&
12788             Matcher::vector_element_basic_type(n) == T_SHORT);
12789 
12790   expand %{
12791     iRegLdst tmpL;
12792     vecX tmpV;
12793     immI8  zero %{ (int)  0 %}
12794     moveReg(tmpL, src);
12795     repl48(tmpL);
12796     repl32(tmpL);
12797     mtvsrd(tmpV, tmpL);
12798     xxpermdi(dst, tmpV, tmpV, zero);
12799   %}
12800 %}
12801 
12802 instruct repl8S_immI0(vecX dst, immI_0 zero) %{
12803   match(Set dst (Replicate zero));
12804   predicate(n->as_Vector()->length() == 8 &&
12805             Matcher::vector_element_basic_type(n) == T_SHORT);
12806 
12807   format %{ "XXLXOR      $dst, $zero \t// replicate8S" %}
12808   size(4);
12809   ins_encode %{
12810     __ xxlxor($dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr());
12811   %}
12812   ins_pipe(pipe_class_default);
12813 %}
12814 
12815 instruct repl8S_immIminus1(vecX dst, immI_minus1 src) %{
12816   match(Set dst (Replicate src));
12817   predicate(n->as_Vector()->length() == 8 &&
12818             Matcher::vector_element_basic_type(n) == T_SHORT);
12819 
12820   format %{ "XXLEQV      $dst, $src \t// replicate8S" %}
12821   size(4);
12822   ins_encode %{
12823     __ xxleqv($dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr());
12824   %}
12825   ins_pipe(pipe_class_default);
12826 %}
12827 
12828 instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{
12829   match(Set dst (Replicate src));
12830   predicate(n->as_Vector()->length() == 2 &&
12831             Matcher::vector_element_basic_type(n) == T_INT);
12832   ins_cost(2 * DEFAULT_COST);
12833   expand %{
12834     moveReg(dst, src);
12835     repl32(dst);
12836   %}
12837 %}
12838 
12839 instruct repl2I_immI0(iRegLdst dst, immI_0 zero) %{
12840   match(Set dst (Replicate zero));
12841   predicate(n->as_Vector()->length() == 2 &&
12842             Matcher::vector_element_basic_type(n) == T_INT);
12843   format %{ "LI      $dst, #0 \t// replicate2I" %}
12844   size(4);
12845   ins_encode %{
12846     __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
12847   %}
12848   ins_pipe(pipe_class_default);
12849 %}
12850 
12851 instruct repl2I_immIminus1(iRegLdst dst, immI_minus1 src) %{
12852   match(Set dst (Replicate src));
12853   predicate(n->as_Vector()->length() == 2 &&
12854             Matcher::vector_element_basic_type(n) == T_INT);
12855   format %{ "LI      $dst, -1 \t// replicate2I" %}
12856   size(4);
12857   ins_encode %{
12858     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
12859   %}
12860   ins_pipe(pipe_class_default);
12861 %}
12862 
12863 instruct repl4I_reg_Ex(vecX dst, iRegIsrc src) %{
12864   match(Set dst (Replicate src));
12865   predicate(n->as_Vector()->length() == 4 &&
12866             Matcher::vector_element_basic_type(n) == T_INT);
12867   ins_cost(2 * DEFAULT_COST);
12868 
12869   expand %{
12870     iRegLdst tmpL;
12871     vecX tmpV;
12872     immI8  zero %{ (int)  0 %}
12873     moveReg(tmpL, src);
12874     repl32(tmpL);
12875     mtvsrd(tmpV, tmpL);
12876     xxpermdi(dst, tmpV, tmpV, zero);
12877   %}
12878 %}
12879 
12880 instruct repl4I_immI0(vecX dst, immI_0 zero) %{
12881   match(Set dst (Replicate zero));
12882   predicate(n->as_Vector()->length() == 4 &&
12883             Matcher::vector_element_basic_type(n) == T_INT);
12884 
12885   format %{ "XXLXOR      $dst, $zero \t// replicate4I" %}
12886   size(4);
12887   ins_encode %{
12888     __ xxlxor($dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr());
12889   %}
12890   ins_pipe(pipe_class_default);
12891 %}
12892 
12893 instruct repl4I_immIminus1(vecX dst, immI_minus1 src) %{
12894   match(Set dst (Replicate src));
12895   predicate(n->as_Vector()->length() == 4 &&
12896             Matcher::vector_element_basic_type(n) == T_INT);
12897 
12898   format %{ "XXLEQV      $dst, $dst, $dst \t// replicate4I" %}
12899   size(4);
12900   ins_encode %{
12901     __ xxleqv($dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr());
12902   %}
12903   ins_pipe(pipe_class_default);
12904 %}
12905 
12906 // Move float to int register via stack, replicate.
12907 instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{
12908   match(Set dst (Replicate src));
12909   predicate(n->as_Vector()->length() == 2 &&
12910             Matcher::vector_element_basic_type(n) == T_FLOAT);
12911   ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
12912   expand %{
12913     stackSlotL tmpS;
12914     iRegIdst tmpI;
12915     moveF2I_reg_stack(tmpS, src);   // Move float to stack.
12916     moveF2I_stack_reg(tmpI, tmpS);  // Move stack to int reg.
12917     moveReg(dst, tmpI);             // Move int to long reg.
12918     repl32(dst);                    // Replicate bitpattern.
12919   %}
12920 %}
12921 
12922 // Replicate scalar constant to packed float values in Double register
12923 instruct repl2F_immF_Ex(iRegLdst dst, immF src) %{
12924   match(Set dst (Replicate src));
12925   predicate(n->as_Vector()->length() == 2 &&
12926             Matcher::vector_element_basic_type(n) == T_FLOAT);
12927   ins_cost(5 * DEFAULT_COST);
12928 
12929   format %{ "LD      $dst, offset, $constanttablebase\t// load replicated float $src $src from table, postalloc expanded" %}
12930   postalloc_expand( postalloc_expand_load_replF_constant(dst, src, constanttablebase) );
12931 %}
12932 
12933 // Replicate scalar zero constant to packed float values in Double register
12934 instruct repl2F_immF0(iRegLdst dst, immF_0 zero) %{
12935   match(Set dst (Replicate zero));
12936   predicate(n->as_Vector()->length() == 2 &&
12937             Matcher::vector_element_basic_type(n) == T_FLOAT);
12938 
12939   format %{ "LI      $dst, #0 \t// replicate2F" %}
12940   size(4);
12941   ins_encode %{
12942     __ li($dst$$Register, 0x0);
12943   %}
12944   ins_pipe(pipe_class_default);
12945 %}
12946 
12947 
12948 //----------Vector Arithmetic Instructions--------------------------------------
12949 
12950 // Vector Addition Instructions
12951 
12952 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
12953   match(Set dst (AddVB src1 src2));
12954   predicate(n->as_Vector()->length() == 16);
12955   format %{ "VADDUBM  $dst,$src1,$src2\t// add packed16B" %}
12956   size(4);
12957   ins_encode %{
12958     __ vaddubm($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
12959   %}
12960   ins_pipe(pipe_class_default);
12961 %}
12962 
12963 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
12964   match(Set dst (AddVS src1 src2));
12965   predicate(n->as_Vector()->length() == 8);
12966   format %{ "VADDUHM  $dst,$src1,$src2\t// add packed8S" %}
12967   size(4);
12968   ins_encode %{
12969     __ vadduhm($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
12970   %}
12971   ins_pipe(pipe_class_default);
12972 %}
12973 
12974 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
12975   match(Set dst (AddVI src1 src2));
12976   predicate(n->as_Vector()->length() == 4);
12977   format %{ "VADDUWM  $dst,$src1,$src2\t// add packed4I" %}
12978   size(4);
12979   ins_encode %{
12980     __ vadduwm($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
12981   %}
12982   ins_pipe(pipe_class_default);
12983 %}
12984 
12985 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
12986   match(Set dst (AddVF src1 src2));
12987   predicate(n->as_Vector()->length() == 4);
12988   format %{ "VADDFP  $dst,$src1,$src2\t// add packed4F" %}
12989   size(4);
12990   ins_encode %{
12991     __ vaddfp($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
12992   %}
12993   ins_pipe(pipe_class_default);
12994 %}
12995 
12996 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
12997   match(Set dst (AddVL src1 src2));
12998   predicate(n->as_Vector()->length() == 2);
12999   format %{ "VADDUDM  $dst,$src1,$src2\t// add packed2L" %}
13000   size(4);
13001   ins_encode %{
13002     __ vaddudm($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13003   %}
13004   ins_pipe(pipe_class_default);
13005 %}
13006 
13007 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
13008   match(Set dst (AddVD src1 src2));
13009   predicate(n->as_Vector()->length() == 2);
13010   format %{ "XVADDDP  $dst,$src1,$src2\t// add packed2D" %}
13011   size(4);
13012   ins_encode %{
13013     __ xvadddp($dst$$VectorRegister->to_vsr(), $src1$$VectorRegister->to_vsr(), $src2$$VectorRegister->to_vsr());
13014   %}
13015   ins_pipe(pipe_class_default);
13016 %}
13017 
13018 // Vector Subtraction Instructions
13019 
13020 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
13021   match(Set dst (SubVB src1 src2));
13022   predicate(n->as_Vector()->length() == 16);
13023   format %{ "VSUBUBM  $dst,$src1,$src2\t// sub packed16B" %}
13024   size(4);
13025   ins_encode %{
13026     __ vsububm($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13027   %}
13028   ins_pipe(pipe_class_default);
13029 %}
13030 
13031 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
13032   match(Set dst (SubVS src1 src2));
13033   predicate(n->as_Vector()->length() == 8);
13034   format %{ "VSUBUHM  $dst,$src1,$src2\t// sub packed8S" %}
13035   size(4);
13036   ins_encode %{
13037     __ vsubuhm($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13038   %}
13039   ins_pipe(pipe_class_default);
13040 %}
13041 
13042 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
13043   match(Set dst (SubVI src1 src2));
13044   predicate(n->as_Vector()->length() == 4);
13045   format %{ "VSUBUWM  $dst,$src1,$src2\t// sub packed4I" %}
13046   size(4);
13047   ins_encode %{
13048     __ vsubuwm($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13049   %}
13050   ins_pipe(pipe_class_default);
13051 %}
13052 
13053 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
13054   match(Set dst (SubVF src1 src2));
13055   predicate(n->as_Vector()->length() == 4);
13056   format %{ "VSUBFP  $dst,$src1,$src2\t// sub packed4F" %}
13057   size(4);
13058   ins_encode %{
13059     __ vsubfp($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13060   %}
13061   ins_pipe(pipe_class_default);
13062 %}
13063 
13064 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
13065   match(Set dst (SubVL src1 src2));
13066   predicate(n->as_Vector()->length() == 2);
13067   format %{ "VSUBUDM  $dst,$src1,$src2\t// sub packed2L" %}
13068   size(4);
13069   ins_encode %{
13070     __ vsubudm($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13071   %}
13072   ins_pipe(pipe_class_default);
13073 %}
13074 
13075 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
13076   match(Set dst (SubVD src1 src2));
13077   predicate(n->as_Vector()->length() == 2);
13078   format %{ "XVSUBDP  $dst,$src1,$src2\t// sub packed2D" %}
13079   size(4);
13080   ins_encode %{
13081     __ xvsubdp($dst$$VectorRegister->to_vsr(), $src1$$VectorRegister->to_vsr(), $src2$$VectorRegister->to_vsr());
13082   %}
13083   ins_pipe(pipe_class_default);
13084 %}
13085 
13086 // Vector Multiplication Instructions
13087 
13088 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2, vecX tmp) %{
13089   match(Set dst (MulVS src1 src2));
13090   predicate(n->as_Vector()->length() == 8);
13091   effect(TEMP tmp);
13092   format %{ "VSPLTISH  $tmp,0\t// mul packed8S" %}
13093   format %{ "VMLADDUHM  $dst,$src1,$src2\t// mul packed8S" %}
13094   size(8);
13095   ins_encode %{
13096     __ vspltish($tmp$$VectorRegister, 0);
13097     __ vmladduhm($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister, $tmp$$VectorRegister);
13098   %}
13099   ins_pipe(pipe_class_default);
13100 %}
13101 
13102 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
13103   match(Set dst (MulVI src1 src2));
13104   predicate(n->as_Vector()->length() == 4);
13105   format %{ "VMULUWM  $dst,$src1,$src2\t// mul packed4I" %}
13106   size(4);
13107   ins_encode %{
13108     __ vmuluwm($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13109   %}
13110   ins_pipe(pipe_class_default);
13111 %}
13112 
13113 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
13114   match(Set dst (MulVF src1 src2));
13115   predicate(n->as_Vector()->length() == 4);
13116   format %{ "XVMULSP  $dst,$src1,$src2\t// mul packed4F" %}
13117   size(4);
13118   ins_encode %{
13119     __ xvmulsp($dst$$VectorRegister->to_vsr(), $src1$$VectorRegister->to_vsr(), $src2$$VectorRegister->to_vsr());
13120   %}
13121   ins_pipe(pipe_class_default);
13122 %}
13123 
13124 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
13125   match(Set dst (MulVD src1 src2));
13126   predicate(n->as_Vector()->length() == 2);
13127   format %{ "XVMULDP  $dst,$src1,$src2\t// mul packed2D" %}
13128   size(4);
13129   ins_encode %{
13130     __ xvmuldp($dst$$VectorRegister->to_vsr(), $src1$$VectorRegister->to_vsr(), $src2$$VectorRegister->to_vsr());
13131   %}
13132   ins_pipe(pipe_class_default);
13133 %}
13134 
13135 // Vector Division Instructions
13136 
13137 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
13138   match(Set dst (DivVF src1 src2));
13139   predicate(n->as_Vector()->length() == 4);
13140   format %{ "XVDIVSP  $dst,$src1,$src2\t// div packed4F" %}
13141   size(4);
13142   ins_encode %{
13143     __ xvdivsp($dst$$VectorRegister->to_vsr(), $src1$$VectorRegister->to_vsr(), $src2$$VectorRegister->to_vsr());
13144   %}
13145   ins_pipe(pipe_class_default);
13146 %}
13147 
13148 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
13149   match(Set dst (DivVD src1 src2));
13150   predicate(n->as_Vector()->length() == 2);
13151   format %{ "XVDIVDP  $dst,$src1,$src2\t// div packed2D" %}
13152   size(4);
13153   ins_encode %{
13154     __ xvdivdp($dst$$VectorRegister->to_vsr(), $src1$$VectorRegister->to_vsr(), $src2$$VectorRegister->to_vsr());
13155   %}
13156   ins_pipe(pipe_class_default);
13157 %}
13158 
13159 // Vector Min / Max Instructions
13160 
13161 instruct vmin_reg(vecX dst, vecX src1, vecX src2) %{
13162   match(Set dst (MinV src1 src2));
13163   format %{ "VMIN  $dst,$src1,$src2\t// vector min" %}
13164   size(4);
13165   ins_encode %{
13166     BasicType bt = Matcher::vector_element_basic_type(this);
13167     switch (bt) {
13168       case T_INT:
13169         __ vminsw($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13170         break;
13171       case T_LONG:
13172         __ vminsd($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13173         break;
13174       default:
13175         ShouldNotReachHere();
13176     }
13177   %}
13178   ins_pipe(pipe_class_default);
13179 %}
13180 
13181 instruct vmax_reg(vecX dst, vecX src1, vecX src2) %{
13182   match(Set dst (MaxV src1 src2));
13183   format %{ "VMAX  $dst,$src1,$src2\t// vector max" %}
13184   size(4);
13185   ins_encode %{
13186     BasicType bt = Matcher::vector_element_basic_type(this);
13187     switch (bt) {
13188       case T_INT:
13189         __ vmaxsw($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13190         break;
13191       case T_LONG:
13192         __ vmaxsd($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13193         break;
13194       default:
13195         ShouldNotReachHere();
13196     }
13197   %}
13198   ins_pipe(pipe_class_default);
13199 %}
13200 
13201 instruct vminu_reg(vecX dst, vecX src1, vecX src2) %{
13202   match(Set dst (UMinV src1 src2));
13203   format %{ "VMINU  $dst,$src1,$src2\t// vector unsigned min" %}
13204   size(4);
13205   ins_encode %{
13206     BasicType bt = Matcher::vector_element_basic_type(this);
13207     switch (bt) {
13208       case T_INT:
13209         __ vminuw($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13210         break;
13211       case T_LONG:
13212         __ vminud($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13213         break;
13214       default:
13215         ShouldNotReachHere();
13216     }
13217   %}
13218   ins_pipe(pipe_class_default);
13219 %}
13220 
13221 instruct vmaxu_reg(vecX dst, vecX src1, vecX src2) %{
13222   match(Set dst (UMaxV src1 src2));
13223   format %{ "VMAXU  $dst,$src1,$src2\t// vector unsigned max" %}
13224   size(4);
13225   ins_encode %{
13226     BasicType bt = Matcher::vector_element_basic_type(this);
13227     switch (bt) {
13228       case T_INT:
13229         __ vmaxuw($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13230         break;
13231       case T_LONG:
13232         __ vmaxud($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13233         break;
13234       default:
13235         ShouldNotReachHere();
13236     }
13237   %}
13238   ins_pipe(pipe_class_default);
13239 %}
13240 
13241 instruct vand(vecX dst, vecX src1, vecX src2) %{
13242   match(Set dst (AndV src1 src2));
13243   size(4);
13244   format %{ "VAND   $dst,$src1,$src2\t// and vectors" %}
13245   ins_encode %{
13246     __ vand($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13247   %}
13248   ins_pipe(pipe_class_default);
13249 %}
13250 
13251 instruct vor(vecX dst, vecX src1, vecX src2) %{
13252   match(Set dst (OrV src1 src2));
13253   size(4);
13254   format %{ "VOR   $dst,$src1,$src2\t// or vectors" %}
13255   ins_encode %{
13256     __ vor($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13257   %}
13258   ins_pipe(pipe_class_default);
13259 %}
13260 
13261 instruct vxor(vecX dst, vecX src1, vecX src2) %{
13262   match(Set dst (XorV src1 src2));
13263   size(4);
13264   format %{ "VXOR   $dst,$src1,$src2\t// xor vectors" %}
13265   ins_encode %{
13266     __ vxor($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
13267   %}
13268   ins_pipe(pipe_class_default);
13269 %}
13270 
13271 instruct reductionI_arith_logic(iRegIdst dst, iRegIsrc srcInt, vecX srcVec, vecX tmp1, vecX tmp2) %{
13272   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT);
13273   match(Set dst (AddReductionVI srcInt srcVec));
13274   match(Set dst (MulReductionVI srcInt srcVec));
13275   match(Set dst (AndReductionV  srcInt srcVec));
13276   match(Set dst ( OrReductionV  srcInt srcVec));
13277   match(Set dst (XorReductionV  srcInt srcVec));
13278   effect(TEMP tmp1, TEMP tmp2);
13279   ins_cost(DEFAULT_COST * 6);
13280   format %{ "REDUCEI_ARITH_LOGIC // $dst,$srcInt,$srcVec,$tmp1,$tmp2\t// reduce vector int add/mul/and/or/xor" %}
13281   size(24);
13282   ins_encode %{
13283     int opcode = this->ideal_Opcode();
13284     __ reduceI(opcode, $dst$$Register, $srcInt$$Register, $srcVec$$VectorRegister,
13285         $tmp1$$VectorRegister, $tmp2$$VectorRegister);
13286   %}
13287   ins_pipe(pipe_class_default);
13288 %}
13289 
13290 instruct reductionI_min_max(iRegIdst dst, iRegIsrc srcInt, vecX srcVec, vecX tmp1, vecX tmp2, flagsRegCR0 cr0) %{
13291   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT);
13292   match(Set dst (MinReductionV srcInt srcVec));
13293   match(Set dst (MaxReductionV srcInt srcVec));
13294   effect(TEMP tmp1, TEMP tmp2, KILL cr0);
13295   ins_cost(DEFAULT_COST * 7);
13296   format %{ "REDUCEI_MINMAX // $dst,$srcInt,$srcVec,$tmp1,$tmp2,cr0\t// reduce vector int min/max" %}
13297   size(28);
13298   ins_encode %{
13299     int opcode = this->ideal_Opcode();
13300     __ reduceI(opcode, $dst$$Register, $srcInt$$Register, $srcVec$$VectorRegister,
13301         $tmp1$$VectorRegister, $tmp2$$VectorRegister);
13302   %}
13303   ins_pipe(pipe_class_default);
13304 %}
13305 
13306 // Vector Absolute Instructions
13307 
13308 instruct vabs4F_reg(vecX dst, vecX src) %{
13309   match(Set dst (AbsVF src));
13310   predicate(n->as_Vector()->length() == 4);
13311   format %{ "XVABSSP $dst,$src\t// absolute packed4F" %}
13312   size(4);
13313   ins_encode %{
13314     __ xvabssp($dst$$VectorRegister->to_vsr(), $src$$VectorRegister->to_vsr());
13315   %}
13316   ins_pipe(pipe_class_default);
13317 %}
13318 
13319 instruct vabs2D_reg(vecX dst, vecX src) %{
13320   match(Set dst (AbsVD src));
13321   predicate(n->as_Vector()->length() == 2);
13322   format %{ "XVABSDP $dst,$src\t// absolute packed2D" %}
13323   size(4);
13324   ins_encode %{
13325     __ xvabsdp($dst$$VectorRegister->to_vsr(), $src$$VectorRegister->to_vsr());
13326   %}
13327   ins_pipe(pipe_class_default);
13328 %}
13329 
13330 // Round Instructions
13331 instruct roundD_reg(regD dst, regD src, immI8 rmode) %{
13332   match(Set dst (RoundDoubleMode src rmode));
13333   format %{ "RoundDoubleMode $src,$rmode" %}
13334   size(4);
13335   ins_encode %{
13336     switch ($rmode$$constant) {
13337       case RoundDoubleModeNode::rmode_rint:
13338         __ xvrdpic($dst$$FloatRegister->to_vsr(), $src$$FloatRegister->to_vsr());
13339         break;
13340       case RoundDoubleModeNode::rmode_floor:
13341         __ frim($dst$$FloatRegister, $src$$FloatRegister);
13342         break;
13343       case RoundDoubleModeNode::rmode_ceil:
13344         __ frip($dst$$FloatRegister, $src$$FloatRegister);
13345         break;
13346       default:
13347         ShouldNotReachHere();
13348     }
13349   %}
13350   ins_pipe(pipe_class_default);
13351 %}
13352 
13353 // Vector Round Instructions
13354 instruct vround2D_reg(vecX dst, vecX src, immI8 rmode) %{
13355   match(Set dst (RoundDoubleModeV src rmode));
13356   predicate(n->as_Vector()->length() == 2);
13357   format %{ "RoundDoubleModeV $src,$rmode" %}
13358   size(4);
13359   ins_encode %{
13360     switch ($rmode$$constant) {
13361       case RoundDoubleModeNode::rmode_rint:
13362         __ xvrdpic($dst$$VectorRegister->to_vsr(), $src$$VectorRegister->to_vsr());
13363         break;
13364       case RoundDoubleModeNode::rmode_floor:
13365         __ xvrdpim($dst$$VectorRegister->to_vsr(), $src$$VectorRegister->to_vsr());
13366         break;
13367       case RoundDoubleModeNode::rmode_ceil:
13368         __ xvrdpip($dst$$VectorRegister->to_vsr(), $src$$VectorRegister->to_vsr());
13369         break;
13370       default:
13371         ShouldNotReachHere();
13372     }
13373   %}
13374   ins_pipe(pipe_class_default);
13375 %}
13376 
13377 // Vector Negate Instructions
13378 
13379 instruct vneg4F_reg(vecX dst, vecX src) %{
13380   match(Set dst (NegVF src));
13381   predicate(n->as_Vector()->length() == 4);
13382   format %{ "XVNEGSP $dst,$src\t// negate packed4F" %}
13383   size(4);
13384   ins_encode %{
13385     __ xvnegsp($dst$$VectorRegister->to_vsr(), $src$$VectorRegister->to_vsr());
13386   %}
13387   ins_pipe(pipe_class_default);
13388 %}
13389 
13390 instruct vneg2D_reg(vecX dst, vecX src) %{
13391   match(Set dst (NegVD src));
13392   predicate(n->as_Vector()->length() == 2);
13393   format %{ "XVNEGDP $dst,$src\t// negate packed2D" %}
13394   size(4);
13395   ins_encode %{
13396     __ xvnegdp($dst$$VectorRegister->to_vsr(), $src$$VectorRegister->to_vsr());
13397   %}
13398   ins_pipe(pipe_class_default);
13399 %}
13400 
13401 instruct vneg4I_reg(vecX dst, vecX src) %{
13402   match(Set dst (NegVI src));
13403   predicate(Matcher::vector_element_basic_type(n) == T_INT);
13404   format %{ "VNEGW $dst,$src\t// negate int vector" %}
13405   size(4);
13406   ins_encode %{
13407     __ vnegw($dst$$VectorRegister, $src$$VectorRegister);
13408   %}
13409   ins_pipe(pipe_class_default);
13410 %}
13411 
13412 // Vector Square Root Instructions
13413 
13414 instruct vsqrt4F_reg(vecX dst, vecX src) %{
13415   match(Set dst (SqrtVF src));
13416   predicate(n->as_Vector()->length() == 4);
13417   format %{ "XVSQRTSP $dst,$src\t// sqrt packed4F" %}
13418   size(4);
13419   ins_encode %{
13420     __ xvsqrtsp($dst$$VectorRegister->to_vsr(), $src$$VectorRegister->to_vsr());
13421   %}
13422   ins_pipe(pipe_class_default);
13423 %}
13424 
13425 instruct vsqrt2D_reg(vecX dst, vecX src) %{
13426   match(Set dst (SqrtVD src));
13427   predicate(n->as_Vector()->length() == 2);
13428   format %{ "XVSQRTDP  $dst,$src\t// sqrt packed2D" %}
13429   size(4);
13430   ins_encode %{
13431     __ xvsqrtdp($dst$$VectorRegister->to_vsr(), $src$$VectorRegister->to_vsr());
13432   %}
13433   ins_pipe(pipe_class_default);
13434 %}
13435 
13436 // Vector Population Count and Zeros Count Instructions
13437 
13438 instruct vpopcnt_reg(vecX dst, vecX src) %{
13439   match(Set dst (PopCountVI src));
13440   match(Set dst (PopCountVL src));
13441   format %{ "VPOPCNT $dst,$src\t// pop count packed" %}
13442   size(4);
13443   ins_encode %{
13444     BasicType bt = Matcher::vector_element_basic_type(this);
13445     switch (bt) {
13446       case T_BYTE:
13447         __ vpopcntb($dst$$VectorRegister, $src$$VectorRegister);
13448         break;
13449       case T_SHORT:
13450         __ vpopcnth($dst$$VectorRegister, $src$$VectorRegister);
13451         break;
13452       case T_INT:
13453         __ vpopcntw($dst$$VectorRegister, $src$$VectorRegister);
13454         break;
13455       case T_LONG:
13456         __ vpopcntd($dst$$VectorRegister, $src$$VectorRegister);
13457         break;
13458       default:
13459         ShouldNotReachHere();
13460     }
13461   %}
13462   ins_pipe(pipe_class_default);
13463 %}
13464 
13465 instruct vcount_leading_zeros_reg(vecX dst, vecX src) %{
13466   match(Set dst (CountLeadingZerosV src));
13467   format %{ "VCLZ $dst,$src\t// leading zeros count packed" %}
13468   size(4);
13469   ins_encode %{
13470     BasicType bt = Matcher::vector_element_basic_type(this);
13471     switch (bt) {
13472       case T_BYTE:
13473         __ vclzb($dst$$VectorRegister, $src$$VectorRegister);
13474         break;
13475       case T_SHORT:
13476         __ vclzh($dst$$VectorRegister, $src$$VectorRegister);
13477         break;
13478       case T_INT:
13479         __ vclzw($dst$$VectorRegister, $src$$VectorRegister);
13480         break;
13481       case T_LONG:
13482         __ vclzd($dst$$VectorRegister, $src$$VectorRegister);
13483         break;
13484       default:
13485         ShouldNotReachHere();
13486     }
13487   %}
13488   ins_pipe(pipe_class_default);
13489 %}
13490 
13491 instruct vcount_trailing_zeros_reg(vecX dst, vecX src) %{
13492   match(Set dst (CountTrailingZerosV src));
13493   format %{ "VCTZ $dst,$src\t// trailing zeros count packed" %}
13494   size(4);
13495   ins_encode %{
13496     BasicType bt = Matcher::vector_element_basic_type(this);
13497     switch (bt) {
13498       case T_BYTE:
13499         __ vctzb($dst$$VectorRegister, $src$$VectorRegister);
13500         break;
13501       case T_SHORT:
13502         __ vctzh($dst$$VectorRegister, $src$$VectorRegister);
13503         break;
13504       case T_INT:
13505         __ vctzw($dst$$VectorRegister, $src$$VectorRegister);
13506         break;
13507       case T_LONG:
13508         __ vctzd($dst$$VectorRegister, $src$$VectorRegister);
13509         break;
13510       default:
13511         ShouldNotReachHere();
13512     }
13513   %}
13514   ins_pipe(pipe_class_default);
13515 %}
13516 
13517 // --------------------------------- FMA --------------------------------------
13518 // src1 * src2 + dst
13519 instruct vfma4F(vecX dst, vecX src1, vecX src2) %{
13520   match(Set dst (FmaVF dst (Binary src1 src2)));
13521   predicate(n->as_Vector()->length() == 4);
13522 
13523   format %{ "XVMADDASP   $dst, $src1, $src2" %}
13524 
13525   size(4);
13526   ins_encode %{
13527     assert(UseFMA, "Needs FMA instructions support.");
13528     __ xvmaddasp($dst$$VectorRegister->to_vsr(), $src1$$VectorRegister->to_vsr(), $src2$$VectorRegister->to_vsr());
13529   %}
13530   ins_pipe(pipe_class_default);
13531 %}
13532 
13533 // src1 * (-src2) + dst
13534 // "(-src1) * src2 + dst" has been idealized to "src2 * (-src1) + dst"
13535 instruct vfma4F_neg1(vecX dst, vecX src1, vecX src2) %{
13536   match(Set dst (FmaVF dst (Binary src1 (NegVF src2))));
13537   predicate(n->as_Vector()->length() == 4);
13538 
13539   format %{ "XVNMSUBASP   $dst, $src1, $src2" %}
13540 
13541   size(4);
13542   ins_encode %{
13543     assert(UseFMA, "Needs FMA instructions support.");
13544     __ xvnmsubasp($dst$$VectorRegister->to_vsr(), $src1$$VectorRegister->to_vsr(), $src2$$VectorRegister->to_vsr());
13545   %}
13546   ins_pipe(pipe_class_default);
13547 %}
13548 
13549 // src1 * src2 - dst
13550 instruct vfma4F_neg2(vecX dst, vecX src1, vecX src2) %{
13551   match(Set dst (FmaVF (NegVF dst) (Binary src1 src2)));
13552   predicate(n->as_Vector()->length() == 4);
13553 
13554   format %{ "XVMSUBASP   $dst, $src1, $src2" %}
13555 
13556   size(4);
13557   ins_encode %{
13558     assert(UseFMA, "Needs FMA instructions support.");
13559     __ xvmsubasp($dst$$VectorRegister->to_vsr(), $src1$$VectorRegister->to_vsr(), $src2$$VectorRegister->to_vsr());
13560   %}
13561   ins_pipe(pipe_class_default);
13562 %}
13563 
13564 // src1 * src2 + dst
13565 instruct vfma2D(vecX dst, vecX src1, vecX src2) %{
13566   match(Set dst (FmaVD  dst (Binary src1 src2)));
13567   predicate(n->as_Vector()->length() == 2);
13568 
13569   format %{ "XVMADDADP   $dst, $src1, $src2" %}
13570 
13571   size(4);
13572   ins_encode %{
13573     assert(UseFMA, "Needs FMA instructions support.");
13574     __ xvmaddadp($dst$$VectorRegister->to_vsr(), $src1$$VectorRegister->to_vsr(), $src2$$VectorRegister->to_vsr());
13575   %}
13576   ins_pipe(pipe_class_default);
13577 %}
13578 
13579 // src1 * (-src2) + dst
13580 // "(-src1) * src2 + dst" has been idealized to "src2 * (-src1) + dst"
13581 instruct vfma2D_neg1(vecX dst, vecX src1, vecX src2) %{
13582   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
13583   predicate(n->as_Vector()->length() == 2);
13584 
13585   format %{ "XVNMSUBADP   $dst, $src1, $src2" %}
13586 
13587   size(4);
13588   ins_encode %{
13589     assert(UseFMA, "Needs FMA instructions support.");
13590     __ xvnmsubadp($dst$$VectorRegister->to_vsr(), $src1$$VectorRegister->to_vsr(), $src2$$VectorRegister->to_vsr());
13591   %}
13592   ins_pipe(pipe_class_default);
13593 %}
13594 
13595 // src1 * src2 - dst
13596 instruct vfma2D_neg2(vecX dst, vecX src1, vecX src2) %{
13597   match(Set dst (FmaVD (NegVD dst) (Binary src1 src2)));
13598   predicate(n->as_Vector()->length() == 2);
13599 
13600   format %{ "XVMSUBADP   $dst, $src1, $src2" %}
13601 
13602   size(4);
13603   ins_encode %{
13604     assert(UseFMA, "Needs FMA instructions support.");
13605     __ xvmsubadp($dst$$VectorRegister->to_vsr(), $src1$$VectorRegister->to_vsr(), $src2$$VectorRegister->to_vsr());
13606   %}
13607   ins_pipe(pipe_class_default);
13608 %}
13609 
13610 //----------Overflow Math Instructions-----------------------------------------
13611 
13612 // Note that we have to make sure that XER.SO is reset before using overflow instructions.
13613 // Simple Overflow operations can be matched by very few instructions (e.g. addExact: xor, and_, bc).
13614 // Seems like only Long intrinsincs have an advantage. (The only expensive one is OverflowMulL.)
13615 
13616 instruct overflowAddL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
13617   match(Set cr0 (OverflowAddL op1 op2));
13618 
13619   format %{ "ADD_    $op1, $op2\t# overflow check long" %}
13620   size(12);
13621   ins_encode %{
13622     __ li(R0, 0);
13623     __ mtxer(R0); // clear XER.SO
13624     __ addo_(R0, $op1$$Register, $op2$$Register);
13625   %}
13626   ins_pipe(pipe_class_default);
13627 %}
13628 
13629 instruct overflowSubL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
13630   match(Set cr0 (OverflowSubL op1 op2));
13631 
13632   format %{ "SUBFO_  R0, $op2, $op1\t# overflow check long" %}
13633   size(12);
13634   ins_encode %{
13635     __ li(R0, 0);
13636     __ mtxer(R0); // clear XER.SO
13637     __ subfo_(R0, $op2$$Register, $op1$$Register);
13638   %}
13639   ins_pipe(pipe_class_default);
13640 %}
13641 
13642 instruct overflowNegL_reg(flagsRegCR0 cr0, immL_0 zero, iRegLsrc op2) %{
13643   match(Set cr0 (OverflowSubL zero op2));
13644 
13645   format %{ "NEGO_   R0, $op2\t# overflow check long" %}
13646   size(12);
13647   ins_encode %{
13648     __ li(R0, 0);
13649     __ mtxer(R0); // clear XER.SO
13650     __ nego_(R0, $op2$$Register);
13651   %}
13652   ins_pipe(pipe_class_default);
13653 %}
13654 
13655 instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
13656   match(Set cr0 (OverflowMulL op1 op2));
13657 
13658   format %{ "MULLDO_ R0, $op1, $op2\t# overflow check long" %}
13659   size(12);
13660   ins_encode %{
13661     __ li(R0, 0);
13662     __ mtxer(R0); // clear XER.SO
13663     __ mulldo_(R0, $op1$$Register, $op2$$Register);
13664   %}
13665   ins_pipe(pipe_class_default);
13666 %}
13667 
13668 instruct repl4F_reg_Ex(vecX dst, regF src) %{
13669   match(Set dst (Replicate src));
13670   predicate(n->as_Vector()->length() == 4 &&
13671             Matcher::vector_element_basic_type(n) == T_FLOAT);
13672   ins_cost(DEFAULT_COST);
13673   expand %{
13674     vecX tmpV;
13675     immI8  zero %{ (int)  0 %}
13676 
13677     xscvdpspn_regF(tmpV, src);
13678     xxspltw(dst, tmpV, zero);
13679   %}
13680 %}
13681 
13682 instruct repl4F_immF_Ex(vecX dst, immF src, iRegLdst tmp) %{
13683   match(Set dst (Replicate src));
13684   predicate(n->as_Vector()->length() == 4 &&
13685             Matcher::vector_element_basic_type(n) == T_FLOAT);
13686   effect(TEMP tmp);
13687   ins_cost(10 * DEFAULT_COST);
13688 
13689   postalloc_expand( postalloc_expand_load_replF_constant_vsx(dst, src, constanttablebase, tmp) );
13690 %}
13691 
13692 instruct repl4F_immF0(vecX dst, immF_0 zero) %{
13693   match(Set dst (Replicate zero));
13694   predicate(n->as_Vector()->length() == 4 &&
13695             Matcher::vector_element_basic_type(n) == T_FLOAT);
13696 
13697   format %{ "XXLXOR      $dst, $zero \t// replicate4F" %}
13698   size(4);
13699   ins_encode %{
13700     __ xxlxor($dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr());
13701   %}
13702   ins_pipe(pipe_class_default);
13703 %}
13704 
13705 instruct repl2D_reg_Ex(vecX dst, regD src) %{
13706   match(Set dst (Replicate src));
13707   predicate(n->as_Vector()->length() == 2 &&
13708             Matcher::vector_element_basic_type(n) == T_DOUBLE);
13709 
13710   format %{ "XXPERMDI      $dst, $src, $src, 0 \t// Splat doubleword" %}
13711   size(4);
13712   ins_encode %{
13713     __ xxpermdi($dst$$VectorRegister->to_vsr(), $src$$FloatRegister->to_vsr(), $src$$FloatRegister->to_vsr(), 0);
13714   %}
13715   ins_pipe(pipe_class_default);
13716 %}
13717 
13718 instruct repl2D_immD0(vecX dst, immD_0 zero) %{
13719   match(Set dst (Replicate zero));
13720   predicate(n->as_Vector()->length() == 2 &&
13721             Matcher::vector_element_basic_type(n) == T_DOUBLE);
13722 
13723   format %{ "XXLXOR      $dst, $zero \t// replicate2D" %}
13724   size(4);
13725   ins_encode %{
13726     __ xxlxor($dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr());
13727   %}
13728   ins_pipe(pipe_class_default);
13729 %}
13730 
13731 instruct mtvsrd(vecX dst, iRegLsrc src) %{
13732   predicate(false);
13733   effect(DEF dst, USE src);
13734 
13735   format %{ "MTVSRD      $dst, $src \t// Move to 16-byte register" %}
13736   size(4);
13737   ins_encode %{
13738     __ mtvsrd($dst$$VectorRegister->to_vsr(), $src$$Register);
13739   %}
13740   ins_pipe(pipe_class_default);
13741 %}
13742 
13743 instruct xxspltd(vecX dst, vecX src, immI8 zero) %{
13744   effect(DEF dst, USE src, USE zero);
13745 
13746   format %{ "XXSPLATD      $dst, $src, $zero \t// Splat doubleword" %}
13747   size(4);
13748   ins_encode %{
13749     __ xxpermdi($dst$$VectorRegister->to_vsr(), $src$$VectorRegister->to_vsr(), $src$$VectorRegister->to_vsr(), $zero$$constant);
13750   %}
13751   ins_pipe(pipe_class_default);
13752 %}
13753 
13754 instruct xxpermdi(vecX dst, vecX src1, vecX src2, immI8 zero) %{
13755   effect(DEF dst, USE src1, USE src2, USE zero);
13756 
13757   format %{ "XXPERMDI      $dst, $src1, $src2, $zero \t// Splat doubleword" %}
13758   size(4);
13759   ins_encode %{
13760     __ xxpermdi($dst$$VectorRegister->to_vsr(), $src1$$VectorRegister->to_vsr(), $src2$$VectorRegister->to_vsr(), $zero$$constant);
13761   %}
13762   ins_pipe(pipe_class_default);
13763 %}
13764 
13765 instruct repl2L_reg_Ex(vecX dst, iRegLsrc src) %{
13766   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
13767   match(Set dst (Replicate src));
13768   predicate(n->as_Vector()->length() == 2);
13769   expand %{
13770     vecX tmpV;
13771     immI8  zero %{ (int)  0 %}
13772     mtvsrd(tmpV, src);
13773     xxpermdi(dst, tmpV, tmpV, zero);
13774   %}
13775 %}
13776 
13777 instruct repl2L_immI0(vecX dst, immI_0 zero) %{
13778   match(Set dst (Replicate zero));
13779   predicate(n->as_Vector()->length() == 2 &&
13780             Matcher::vector_element_basic_type(n) == T_LONG);
13781 
13782   format %{ "XXLXOR      $dst, $zero \t// replicate2L" %}
13783   size(4);
13784   ins_encode %{
13785     __ xxlxor($dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr());
13786   %}
13787   ins_pipe(pipe_class_default);
13788 %}
13789 
13790 instruct repl2L_immIminus1(vecX dst, immI_minus1 src) %{
13791   match(Set dst (Replicate src));
13792   predicate(n->as_Vector()->length() == 2 &&
13793             Matcher::vector_element_basic_type(n) == T_LONG);
13794 
13795   format %{ "XXLEQV      $dst, $src \t// replicate2L" %}
13796   size(4);
13797   ins_encode %{
13798     __ xxleqv($dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr(), $dst$$VectorRegister->to_vsr());
13799   %}
13800   ins_pipe(pipe_class_default);
13801 %}
13802 
13803 // ============================================================================
13804 // Safepoint Instruction
13805 
13806 instruct safePoint_poll(iRegPdst poll) %{
13807   match(SafePoint poll);
13808 
13809   // It caused problems to add the effect that r0 is killed, but this
13810   // effect no longer needs to be mentioned, since r0 is not contained
13811   // in a reg_class.
13812 
13813   format %{ "LD      R0, #0, $poll \t// Safepoint poll for GC" %}
13814   size(4);
13815   ins_encode( enc_poll(0x0, poll) );
13816   ins_pipe(pipe_class_default);
13817 %}
13818 
13819 // ============================================================================
13820 // Call Instructions
13821 
13822 source %{
13823 
13824 #include "runtime/continuation.hpp"
13825 
13826 %}
13827 
13828 // Call Java Static Instruction
13829 
13830 instruct CallStaticJavaDirect(method meth) %{
13831   match(CallStaticJava);
13832   effect(USE meth);
13833   ins_cost(CALL_COST);
13834 
13835   ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */);
13836 
13837   format %{ "CALL,static $meth \t// ==> " %}
13838   size((Continuations::enabled() ? 8 : 4));
13839   ins_encode( enc_java_static_call(meth) );
13840   ins_pipe(pipe_class_call);
13841 %}
13842 
13843 // Call Java Dynamic Instruction
13844 
13845 instruct CallDynamicJavaDirect(method meth) %{
13846   match(CallDynamicJava);
13847   effect(USE meth);
13848   ins_cost(CALL_COST);
13849 
13850   // Enc_java_to_runtime_call needs up to 4 constants (method data oop).
13851   ins_num_consts(4);
13852 
13853   format %{ "CALL,dynamic $meth \t// ==> " %}
13854   ins_encode( enc_java_dynamic_call(meth, constanttablebase) );
13855   ins_pipe(pipe_class_call);
13856 %}
13857 
13858 // Call Runtime Instruction
13859 
13860 instruct CallRuntimeDirect(method meth) %{
13861   match(CallRuntime);
13862   effect(USE meth);
13863   ins_cost(CALL_COST);
13864 
13865   // Enc_java_to_runtime_call needs up to 3 constants: call target,
13866   // env for callee, C-toc.
13867   ins_num_consts(3);
13868 
13869   format %{ "CALL,runtime" %}
13870   ins_encode( enc_java_to_runtime_call(meth) );
13871   ins_pipe(pipe_class_call);
13872 %}
13873 
13874 // Call Leaf
13875 
13876 // Used by postalloc expand of CallLeafDirect_Ex (mtctr).
13877 instruct CallLeafDirect_mtctr(iRegLdst dst, iRegLsrc src) %{
13878   effect(DEF dst, USE src);
13879 
13880   ins_num_consts(1);
13881 
13882   format %{ "MTCTR   $src" %}
13883   size(4);
13884   ins_encode( enc_leaf_call_mtctr(src) );
13885   ins_pipe(pipe_class_default);
13886 %}
13887 
13888 // Used by postalloc expand of CallLeafDirect_Ex (actual call).
13889 instruct CallLeafDirect(method meth) %{
13890   match(CallLeaf);   // To get the data all the data fields we need ...
13891   effect(USE meth);
13892   predicate(false);  // but never match.
13893 
13894   format %{ "BCTRL     \t// leaf call $meth ==> " %}
13895   size((Continuations::enabled() ? 8 : 4));
13896   ins_encode %{
13897     __ bctrl();
13898     __ post_call_nop();
13899   %}
13900   ins_pipe(pipe_class_call);
13901 %}
13902 
13903 // postalloc expand of CallLeafDirect.
13904 // Load address to call from TOC, then bl to it.
13905 instruct CallLeafDirect_Ex(method meth) %{
13906   match(CallLeaf);
13907   effect(USE meth);
13908   ins_cost(CALL_COST);
13909 
13910   // Postalloc_expand_java_to_runtime_call needs up to 3 constants: call target,
13911   // env for callee, C-toc.
13912   ins_num_consts(3);
13913 
13914   format %{ "CALL,runtime leaf $meth \t// postalloc expanded" %}
13915   postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) );
13916 %}
13917 
13918 // Call runtime without safepoint - same as CallLeaf.
13919 // postalloc expand of CallLeafNoFPDirect.
13920 // Load address to call from TOC, then bl to it.
13921 instruct CallLeafNoFPDirect_Ex(method meth) %{
13922   match(CallLeafNoFP);
13923   effect(USE meth);
13924   ins_cost(CALL_COST);
13925 
13926   // Enc_java_to_runtime_call needs up to 3 constants: call target,
13927   // env for callee, C-toc.
13928   ins_num_consts(3);
13929 
13930   format %{ "CALL,runtime leaf nofp $meth \t// postalloc expanded" %}
13931   postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) );
13932 %}
13933 
13934 // Tail Call; Jump from runtime stub to Java code.
13935 // Also known as an 'interprocedural jump'.
13936 // Target of jump will eventually return to caller.
13937 // TailJump below removes the return address.
13938 instruct TailCalljmpInd(iRegPdstNoScratch jump_target, inline_cache_regP method_ptr) %{
13939   match(TailCall jump_target method_ptr);
13940   ins_cost(CALL_COST);
13941 
13942   format %{ "MTCTR   $jump_target \t// $method_ptr holds method\n\t"
13943             "BCTR         \t// tail call" %}
13944   size(8);
13945   ins_encode %{
13946     __ mtctr($jump_target$$Register);
13947     __ bctr();
13948   %}
13949   ins_pipe(pipe_class_call);
13950 %}
13951 
13952 // Return Instruction
13953 instruct Ret() %{
13954   match(Return);
13955   format %{ "BLR      \t// branch to link register" %}
13956   size(4);
13957   ins_encode %{
13958     // LR is restored in MachEpilogNode. Just do the RET here.
13959     __ blr();
13960   %}
13961   ins_pipe(pipe_class_default);
13962 %}
13963 
13964 // Tail Jump; remove the return address; jump to target.
13965 // TailCall above leaves the return address around.
13966 // TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
13967 // ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
13968 // "restore" before this instruction (in Epilogue), we need to materialize it
13969 // in %i0.
13970 instruct tailjmpInd(iRegPdstNoScratch jump_target, rarg1RegP ex_oop) %{
13971   match(TailJump jump_target ex_oop);
13972   ins_cost(CALL_COST);
13973 
13974   format %{ "LD      R4_ARG2 = LR\n\t"
13975             "MTCTR   $jump_target\n\t"
13976             "BCTR     \t// TailJump, exception oop: $ex_oop" %}
13977   size(12);
13978   ins_encode %{
13979     __ ld(R4_ARG2/* issuing pc */, _abi0(lr), R1_SP);
13980     __ mtctr($jump_target$$Register);
13981     __ bctr();
13982   %}
13983   ins_pipe(pipe_class_call);
13984 %}
13985 
13986 // Forward exception.
13987 instruct ForwardExceptionjmp()
13988 %{
13989   match(ForwardException);
13990   ins_cost(CALL_COST);
13991 
13992   format %{ "JMP     forward_exception_stub" %}
13993   ins_encode %{
13994     __ set_inst_mark();
13995     __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
13996     __ clear_inst_mark();
13997   %}
13998   ins_pipe(pipe_class_call);
13999 %}
14000 
14001 // Create exception oop: created by stack-crawling runtime code.
14002 // Created exception is now available to this handler, and is setup
14003 // just prior to jumping to this handler. No code emitted.
14004 instruct CreateException(rarg1RegP ex_oop) %{
14005   match(Set ex_oop (CreateEx));
14006   ins_cost(0);
14007 
14008   format %{ " -- \t// exception oop; no code emitted" %}
14009   size(0);
14010   ins_encode( /*empty*/ );
14011   ins_pipe(pipe_class_default);
14012 %}
14013 
14014 // Rethrow exception: The exception oop will come in the first
14015 // argument position. Then JUMP (not call) to the rethrow stub code.
14016 instruct RethrowException() %{
14017   match(Rethrow);
14018   ins_cost(CALL_COST);
14019 
14020   format %{ "JMP     rethrow_stub" %}
14021   ins_encode %{
14022     __ set_inst_mark();
14023     __ b64_patchable((address)OptoRuntime::rethrow_stub(), relocInfo::runtime_call_type);
14024     __ clear_inst_mark();
14025   %}
14026   ins_pipe(pipe_class_call);
14027 %}
14028 
14029 // Die now.
14030 instruct ShouldNotReachHere() %{
14031   match(Halt);
14032   ins_cost(CALL_COST);
14033 
14034   format %{ "ShouldNotReachHere" %}
14035   ins_encode %{
14036     if (is_reachable()) {
14037       const char* str = __ code_string(_halt_reason);
14038       __ stop(str);
14039     }
14040   %}
14041   ins_pipe(pipe_class_default);
14042 %}
14043 
14044 // This name is KNOWN by the ADLC and cannot be changed.  The ADLC
14045 // forces a 'TypeRawPtr::BOTTOM' output type for this guy.
14046 // Get a DEF on threadRegP, no costs, no encoding, use
14047 // 'ins_should_rematerialize(true)' to avoid spilling.
14048 instruct tlsLoadP(threadRegP dst) %{
14049   match(Set dst (ThreadLocal));
14050   ins_cost(0);
14051 
14052   ins_should_rematerialize(true);
14053 
14054   format %{ " -- \t// $dst=Thread::current(), empty" %}
14055   size(0);
14056   ins_encode( /*empty*/ );
14057   ins_pipe(pipe_class_empty);
14058 %}
14059 
14060 //---Some PPC specific nodes---------------------------------------------------
14061 
14062 // Nop instructions
14063 
14064 instruct fxNop() %{
14065   ins_cost(0);
14066 
14067   ins_is_nop(true);
14068 
14069   format %{ "fxNop" %}
14070   size(4);
14071   ins_encode %{
14072     __ nop();
14073   %}
14074   ins_pipe(pipe_class_default);
14075 %}
14076 
14077 instruct fpNop0() %{
14078   ins_cost(0);
14079 
14080   ins_is_nop(true);
14081 
14082   format %{ "fpNop0" %}
14083   size(4);
14084   ins_encode %{
14085     __ fpnop0();
14086   %}
14087   ins_pipe(pipe_class_default);
14088 %}
14089 
14090 instruct fpNop1() %{
14091   ins_cost(0);
14092 
14093   ins_is_nop(true);
14094 
14095   format %{ "fpNop1" %}
14096   size(4);
14097   ins_encode %{
14098     __ fpnop1();
14099   %}
14100   ins_pipe(pipe_class_default);
14101 %}
14102 
14103 instruct brNop0() %{
14104   ins_cost(0);
14105   size(4);
14106   format %{ "brNop0" %}
14107   ins_encode %{
14108     __ brnop0();
14109   %}
14110   ins_is_nop(true);
14111   ins_pipe(pipe_class_default);
14112 %}
14113 
14114 instruct brNop1() %{
14115   ins_cost(0);
14116 
14117   ins_is_nop(true);
14118 
14119   format %{ "brNop1" %}
14120   size(4);
14121   ins_encode %{
14122     __ brnop1();
14123   %}
14124   ins_pipe(pipe_class_default);
14125 %}
14126 
14127 instruct brNop2() %{
14128   ins_cost(0);
14129 
14130   ins_is_nop(true);
14131 
14132   format %{ "brNop2" %}
14133   size(4);
14134   ins_encode %{
14135     __ brnop2();
14136   %}
14137   ins_pipe(pipe_class_default);
14138 %}
14139 
14140 instruct cacheWB(indirect addr)
14141 %{
14142   match(CacheWB addr);
14143 
14144   ins_cost(100);
14145   format %{ "cache writeback, address = $addr" %}
14146   ins_encode %{
14147     assert($addr->index_position() < 0, "should be");
14148     assert($addr$$disp == 0, "should be");
14149     __ cache_wb(Address($addr$$base$$Register));
14150   %}
14151   ins_pipe(pipe_class_default);
14152 %}
14153 
14154 instruct cacheWBPreSync()
14155 %{
14156   match(CacheWBPreSync);
14157 
14158   ins_cost(0);
14159   format %{ "cache writeback presync" %}
14160   ins_encode %{
14161     __ cache_wbsync(true);
14162   %}
14163   ins_pipe(pipe_class_default);
14164 %}
14165 
14166 instruct cacheWBPostSync()
14167 %{
14168   match(CacheWBPostSync);
14169 
14170   ins_cost(100);
14171   format %{ "cache writeback postsync" %}
14172   ins_encode %{
14173     __ cache_wbsync(false);
14174   %}
14175   ins_pipe(pipe_class_default);
14176 %}
14177 
14178 //----------PEEPHOLE RULES-----------------------------------------------------
14179 // These must follow all instruction definitions as they use the names
14180 // defined in the instructions definitions.
14181 //
14182 // peepmatch ( root_instr_name [preceeding_instruction]* );
14183 //
14184 // peepconstraint %{
14185 // (instruction_number.operand_name relational_op instruction_number.operand_name
14186 //  [, ...] );
14187 // // instruction numbers are zero-based using left to right order in peepmatch
14188 //
14189 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
14190 // // provide an instruction_number.operand_name for each operand that appears
14191 // // in the replacement instruction's match rule
14192 //
14193 // ---------VM FLAGS---------------------------------------------------------
14194 //
14195 // All peephole optimizations can be turned off using -XX:-OptoPeephole
14196 //
14197 // Each peephole rule is given an identifying number starting with zero and
14198 // increasing by one in the order seen by the parser. An individual peephole
14199 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
14200 // on the command-line.
14201 //
14202 // ---------CURRENT LIMITATIONS----------------------------------------------
14203 //
14204 // Only match adjacent instructions in same basic block
14205 // Only equality constraints
14206 // Only constraints between operands, not (0.dest_reg == EAX_enc)
14207 // Only one replacement instruction
14208 //
14209 // ---------EXAMPLE----------------------------------------------------------
14210 //
14211 // // pertinent parts of existing instructions in architecture description
14212 // instruct movI(eRegI dst, eRegI src) %{
14213 //   match(Set dst (CopyI src));
14214 // %}
14215 //
14216 // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
14217 //   match(Set dst (AddI dst src));
14218 //   effect(KILL cr);
14219 // %}
14220 //
14221 // // Change (inc mov) to lea
14222 // peephole %{
14223 //   // increment preceded by register-register move
14224 //   peepmatch ( incI_eReg movI );
14225 //   // require that the destination register of the increment
14226 //   // match the destination register of the move
14227 //   peepconstraint ( 0.dst == 1.dst );
14228 //   // construct a replacement instruction that sets
14229 //   // the destination to ( move's source register + one )
14230 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14231 // %}
14232 //
14233 // Implementation no longer uses movX instructions since
14234 // machine-independent system no longer uses CopyX nodes.
14235 //
14236 // peephole %{
14237 //   peepmatch ( incI_eReg movI );
14238 //   peepconstraint ( 0.dst == 1.dst );
14239 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14240 // %}
14241 //
14242 // peephole %{
14243 //   peepmatch ( decI_eReg movI );
14244 //   peepconstraint ( 0.dst == 1.dst );
14245 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14246 // %}
14247 //
14248 // peephole %{
14249 //   peepmatch ( addI_eReg_imm movI );
14250 //   peepconstraint ( 0.dst == 1.dst );
14251 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14252 // %}
14253 //
14254 // peephole %{
14255 //   peepmatch ( addP_eReg_imm movP );
14256 //   peepconstraint ( 0.dst == 1.dst );
14257 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
14258 // %}
14259 
14260 // // Change load of spilled value to only a spill
14261 // instruct storeI(memory mem, eRegI src) %{
14262 //   match(Set mem (StoreI mem src));
14263 // %}
14264 //
14265 // instruct loadI(eRegI dst, memory mem) %{
14266 //   match(Set dst (LoadI mem));
14267 // %}
14268 //
14269 peephole %{
14270   peepmatch ( loadI storeI );
14271   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14272   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14273 %}
14274 
14275 peephole %{
14276   peepmatch ( loadL storeL );
14277   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14278   peepreplace ( storeL( 1.mem 1.mem 1.src ) );
14279 %}
14280 
14281 peephole %{
14282   peepmatch ( loadP storeP );
14283   peepconstraint ( 1.src == 0.dst, 1.dst == 0.mem );
14284   peepreplace ( storeP( 1.dst 1.dst 1.src ) );
14285 %}
14286 
14287 //----------SMARTSPILL RULES---------------------------------------------------
14288 // These must follow all instruction definitions as they use the names
14289 // defined in the instructions definitions.