1 //
   2 // Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, 2019, Red Hat, Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "asm/macroAssembler.hpp"
 999 #include "gc/shared/cardTable.hpp"
1000 #include "gc/shared/cardTableBarrierSet.hpp"
1001 #include "gc/shared/collectedHeap.hpp"
1002 #include "opto/addnode.hpp"
1003 
1004 class CallStubImpl {
1005 
1006   //--------------------------------------------------------------
1007   //---<  Used for optimization in Compile::shorten_branches  >---
1008   //--------------------------------------------------------------
1009 
1010  public:
1011   // Size of call trampoline stub.
1012   static uint size_call_trampoline() {
1013     return 0; // no call trampolines on this platform
1014   }
1015 
1016   // number of relocations needed by a call trampoline stub
1017   static uint reloc_call_trampoline() {
1018     return 0; // no call trampolines on this platform
1019   }
1020 };
1021 
1022 class HandlerImpl {
1023 
1024  public:
1025 
1026   static int emit_exception_handler(CodeBuffer &cbuf);
1027   static int emit_deopt_handler(CodeBuffer& cbuf);
1028 
1029   static uint size_exception_handler() {
1030     return MacroAssembler::far_branch_size();
1031   }
1032 
1033   static uint size_deopt_handler() {
1034     // count one adr and one far branch instruction
1035     return 4 * NativeInstruction::instruction_size;
1036   }
1037 };
1038 
1039   // graph traversal helpers
1040 
1041   MemBarNode *parent_membar(const Node *n);
1042   MemBarNode *child_membar(const MemBarNode *n);
1043   bool leading_membar(const MemBarNode *barrier);
1044 
1045   bool is_card_mark_membar(const MemBarNode *barrier);
1046   bool is_CAS(int opcode);
1047 
1048   MemBarNode *leading_to_normal(MemBarNode *leading);
1049   MemBarNode *normal_to_leading(const MemBarNode *barrier);
1050   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1051   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1052   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1053 
1054   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1055 
1056   bool unnecessary_acquire(const Node *barrier);
1057   bool needs_acquiring_load(const Node *load);
1058 
1059   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1060 
1061   bool unnecessary_release(const Node *barrier);
1062   bool unnecessary_volatile(const Node *barrier);
1063   bool needs_releasing_store(const Node *store);
1064 
1065   // predicate controlling translation of CompareAndSwapX
1066   bool needs_acquiring_load_exclusive(const Node *load);
1067 
1068   // predicate controlling translation of StoreCM
1069   bool unnecessary_storestore(const Node *storecm);
1070 
1071   // predicate controlling addressing modes
1072   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1073 %}
1074 
1075 source %{
1076 
1077   // Optimizaton of volatile gets and puts
1078   // -------------------------------------
1079   //
1080   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1081   // use to implement volatile reads and writes. For a volatile read
1082   // we simply need
1083   //
1084   //   ldar<x>
1085   //
1086   // and for a volatile write we need
1087   //
1088   //   stlr<x>
1089   //
1090   // Alternatively, we can implement them by pairing a normal
1091   // load/store with a memory barrier. For a volatile read we need
1092   //
1093   //   ldr<x>
1094   //   dmb ishld
1095   //
1096   // for a volatile write
1097   //
1098   //   dmb ish
1099   //   str<x>
1100   //   dmb ish
1101   //
1102   // We can also use ldaxr and stlxr to implement compare and swap CAS
1103   // sequences. These are normally translated to an instruction
1104   // sequence like the following
1105   //
1106   //   dmb      ish
1107   // retry:
1108   //   ldxr<x>   rval raddr
1109   //   cmp       rval rold
1110   //   b.ne done
1111   //   stlxr<x>  rval, rnew, rold
1112   //   cbnz      rval retry
1113   // done:
1114   //   cset      r0, eq
1115   //   dmb ishld
1116   //
1117   // Note that the exclusive store is already using an stlxr
1118   // instruction. That is required to ensure visibility to other
1119   // threads of the exclusive write (assuming it succeeds) before that
1120   // of any subsequent writes.
1121   //
1122   // The following instruction sequence is an improvement on the above
1123   //
1124   // retry:
1125   //   ldaxr<x>  rval raddr
1126   //   cmp       rval rold
1127   //   b.ne done
1128   //   stlxr<x>  rval, rnew, rold
1129   //   cbnz      rval retry
1130   // done:
1131   //   cset      r0, eq
1132   //
1133   // We don't need the leading dmb ish since the stlxr guarantees
1134   // visibility of prior writes in the case that the swap is
1135   // successful. Crucially we don't have to worry about the case where
1136   // the swap is not successful since no valid program should be
1137   // relying on visibility of prior changes by the attempting thread
1138   // in the case where the CAS fails.
1139   //
1140   // Similarly, we don't need the trailing dmb ishld if we substitute
1141   // an ldaxr instruction since that will provide all the guarantees we
1142   // require regarding observation of changes made by other threads
1143   // before any change to the CAS address observed by the load.
1144   //
1145   // In order to generate the desired instruction sequence we need to
1146   // be able to identify specific 'signature' ideal graph node
1147   // sequences which i) occur as a translation of a volatile reads or
1148   // writes or CAS operations and ii) do not occur through any other
1149   // translation or graph transformation. We can then provide
1150   // alternative aldc matching rules which translate these node
1151   // sequences to the desired machine code sequences. Selection of the
1152   // alternative rules can be implemented by predicates which identify
1153   // the relevant node sequences.
1154   //
1155   // The ideal graph generator translates a volatile read to the node
1156   // sequence
1157   //
1158   //   LoadX[mo_acquire]
1159   //   MemBarAcquire
1160   //
1161   // As a special case when using the compressed oops optimization we
1162   // may also see this variant
1163   //
1164   //   LoadN[mo_acquire]
1165   //   DecodeN
1166   //   MemBarAcquire
1167   //
1168   // A volatile write is translated to the node sequence
1169   //
1170   //   MemBarRelease
1171   //   StoreX[mo_release] {CardMark}-optional
1172   //   MemBarVolatile
1173   //
1174   // n.b. the above node patterns are generated with a strict
1175   // 'signature' configuration of input and output dependencies (see
1176   // the predicates below for exact details). The card mark may be as
1177   // simple as a few extra nodes or, in a few GC configurations, may
1178   // include more complex control flow between the leading and
1179   // trailing memory barriers. However, whatever the card mark
1180   // configuration these signatures are unique to translated volatile
1181   // reads/stores -- they will not appear as a result of any other
1182   // bytecode translation or inlining nor as a consequence of
1183   // optimizing transforms.
1184   //
1185   // We also want to catch inlined unsafe volatile gets and puts and
1186   // be able to implement them using either ldar<x>/stlr<x> or some
1187   // combination of ldr<x>/stlr<x> and dmb instructions.
1188   //
1189   // Inlined unsafe volatiles puts manifest as a minor variant of the
1190   // normal volatile put node sequence containing an extra cpuorder
1191   // membar
1192   //
1193   //   MemBarRelease
1194   //   MemBarCPUOrder
1195   //   StoreX[mo_release] {CardMark}-optional
1196   //   MemBarCPUOrder
1197   //   MemBarVolatile
1198   //
1199   // n.b. as an aside, a cpuorder membar is not itself subject to
1200   // matching and translation by adlc rules.  However, the rule
1201   // predicates need to detect its presence in order to correctly
1202   // select the desired adlc rules.
1203   //
1204   // Inlined unsafe volatile gets manifest as a slightly different
1205   // node sequence to a normal volatile get because of the
1206   // introduction of some CPUOrder memory barriers to bracket the
1207   // Load. However, but the same basic skeleton of a LoadX feeding a
1208   // MemBarAcquire, possibly thorugh an optional DecodeN, is still
1209   // present
1210   //
1211   //   MemBarCPUOrder
1212   //        ||       \\
1213   //   MemBarCPUOrder LoadX[mo_acquire]
1214   //        ||            |
1215   //        ||       {DecodeN} optional
1216   //        ||       /
1217   //     MemBarAcquire
1218   //
1219   // In this case the acquire membar does not directly depend on the
1220   // load. However, we can be sure that the load is generated from an
1221   // inlined unsafe volatile get if we see it dependent on this unique
1222   // sequence of membar nodes. Similarly, given an acquire membar we
1223   // can know that it was added because of an inlined unsafe volatile
1224   // get if it is fed and feeds a cpuorder membar and if its feed
1225   // membar also feeds an acquiring load.
1226   //
1227   // Finally an inlined (Unsafe) CAS operation is translated to the
1228   // following ideal graph
1229   //
1230   //   MemBarRelease
1231   //   MemBarCPUOrder
1232   //   CompareAndSwapX {CardMark}-optional
1233   //   MemBarCPUOrder
1234   //   MemBarAcquire
1235   //
1236   // So, where we can identify these volatile read and write
1237   // signatures we can choose to plant either of the above two code
1238   // sequences. For a volatile read we can simply plant a normal
1239   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1240   // also choose to inhibit translation of the MemBarAcquire and
1241   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1242   //
1243   // When we recognise a volatile store signature we can choose to
1244   // plant at a dmb ish as a translation for the MemBarRelease, a
1245   // normal str<x> and then a dmb ish for the MemBarVolatile.
1246   // Alternatively, we can inhibit translation of the MemBarRelease
1247   // and MemBarVolatile and instead plant a simple stlr<x>
1248   // instruction.
1249   //
1250   // when we recognise a CAS signature we can choose to plant a dmb
1251   // ish as a translation for the MemBarRelease, the conventional
1252   // macro-instruction sequence for the CompareAndSwap node (which
1253   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1254   // Alternatively, we can elide generation of the dmb instructions
1255   // and plant the alternative CompareAndSwap macro-instruction
1256   // sequence (which uses ldaxr<x>).
1257   //
1258   // Of course, the above only applies when we see these signature
1259   // configurations. We still want to plant dmb instructions in any
1260   // other cases where we may see a MemBarAcquire, MemBarRelease or
1261   // MemBarVolatile. For example, at the end of a constructor which
1262   // writes final/volatile fields we will see a MemBarRelease
1263   // instruction and this needs a 'dmb ish' lest we risk the
1264   // constructed object being visible without making the
1265   // final/volatile field writes visible.
1266   //
1267   // n.b. the translation rules below which rely on detection of the
1268   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1269   // If we see anything other than the signature configurations we
1270   // always just translate the loads and stores to ldr<x> and str<x>
1271   // and translate acquire, release and volatile membars to the
1272   // relevant dmb instructions.
1273   //
1274 
1275   // graph traversal helpers used for volatile put/get and CAS
1276   // optimization
1277 
1278   // 1) general purpose helpers
1279 
1280   // if node n is linked to a parent MemBarNode by an intervening
1281   // Control and Memory ProjNode return the MemBarNode otherwise return
1282   // NULL.
1283   //
1284   // n may only be a Load or a MemBar.
1285 
1286   MemBarNode *parent_membar(const Node *n)
1287   {
1288     Node *ctl = NULL;
1289     Node *mem = NULL;
1290     Node *membar = NULL;
1291 
1292     if (n->is_Load()) {
1293       ctl = n->lookup(LoadNode::Control);
1294       mem = n->lookup(LoadNode::Memory);
1295     } else if (n->is_MemBar()) {
1296       ctl = n->lookup(TypeFunc::Control);
1297       mem = n->lookup(TypeFunc::Memory);
1298     } else {
1299         return NULL;
1300     }
1301 
1302     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1303       return NULL;
1304     }
1305 
1306     membar = ctl->lookup(0);
1307 
1308     if (!membar || !membar->is_MemBar()) {
1309       return NULL;
1310     }
1311 
1312     if (mem->lookup(0) != membar) {
1313       return NULL;
1314     }
1315 
1316     return membar->as_MemBar();
1317   }
1318 
1319   // if n is linked to a child MemBarNode by intervening Control and
1320   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1321 
1322   MemBarNode *child_membar(const MemBarNode *n)
1323   {
1324     ProjNode *ctl = n->proj_out_or_null(TypeFunc::Control);
1325     ProjNode *mem = n->proj_out_or_null(TypeFunc::Memory);
1326 
1327     // MemBar needs to have both a Ctl and Mem projection
1328     if (! ctl || ! mem)
1329       return NULL;
1330 
1331     MemBarNode *child = NULL;
1332     Node *x;
1333 
1334     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1335       x = ctl->fast_out(i);
1336       // if we see a membar we keep hold of it. we may also see a new
1337       // arena copy of the original but it will appear later
1338       if (x->is_MemBar()) {
1339           child = x->as_MemBar();
1340           break;
1341       }
1342     }
1343 
1344     if (child == NULL) {
1345       return NULL;
1346     }
1347 
1348     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1349       x = mem->fast_out(i);
1350       // if we see a membar we keep hold of it. we may also see a new
1351       // arena copy of the original but it will appear later
1352       if (x == child) {
1353         return child;
1354       }
1355     }
1356     return NULL;
1357   }
1358 
1359   // helper predicate use to filter candidates for a leading memory
1360   // barrier
1361   //
1362   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1363   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1364 
1365   bool leading_membar(const MemBarNode *barrier)
1366   {
1367     int opcode = barrier->Opcode();
1368     // if this is a release membar we are ok
1369     if (opcode == Op_MemBarRelease) {
1370       return true;
1371     }
1372     // if its a cpuorder membar . . .
1373     if (opcode != Op_MemBarCPUOrder) {
1374       return false;
1375     }
1376     // then the parent has to be a release membar
1377     MemBarNode *parent = parent_membar(barrier);
1378     if (!parent) {
1379       return false;
1380     }
1381     opcode = parent->Opcode();
1382     return opcode == Op_MemBarRelease;
1383   }
1384 
1385   // 2) card mark detection helper
1386 
1387   // helper predicate which can be used to detect a volatile membar
1388   // introduced as part of a conditional card mark sequence either by
1389   // G1 or by CMS when UseCondCardMark is true.
1390   //
1391   // membar can be definitively determined to be part of a card mark
1392   // sequence if and only if all the following hold
1393   //
1394   // i) it is a MemBarVolatile
1395   //
1396   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1397   // true
1398   //
1399   // iii) the node's Mem projection feeds a StoreCM node.
1400 
1401   bool is_card_mark_membar(const MemBarNode *barrier)
1402   {
1403     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1404       return false;
1405     }
1406 
1407     if (barrier->Opcode() != Op_MemBarVolatile) {
1408       return false;
1409     }
1410 
1411     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1412 
1413     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1414       Node *y = mem->fast_out(i);
1415       if (y->Opcode() == Op_StoreCM) {
1416         return true;
1417       }
1418     }
1419 
1420     return false;
1421   }
1422 
1423 
1424   // 3) helper predicates to traverse volatile put or CAS graphs which
1425   // may contain GC barrier subgraphs
1426 
1427   // Preamble
1428   // --------
1429   //
1430   // for volatile writes we can omit generating barriers and employ a
1431   // releasing store when we see a node sequence sequence with a
1432   // leading MemBarRelease and a trailing MemBarVolatile as follows
1433   //
1434   //   MemBarRelease
1435   //  {      ||      } -- optional
1436   //  {MemBarCPUOrder}
1437   //         ||     \\
1438   //         ||     StoreX[mo_release]
1439   //         | \     /
1440   //         | MergeMem
1441   //         | /
1442   //  {MemBarCPUOrder} -- optional
1443   //  {      ||      }
1444   //   MemBarVolatile
1445   //
1446   // where
1447   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1448   //  | \ and / indicate further routing of the Ctl and Mem feeds
1449   //
1450   // this is the graph we see for non-object stores. however, for a
1451   // volatile Object store (StoreN/P) we may see other nodes below the
1452   // leading membar because of the need for a GC pre- or post-write
1453   // barrier.
1454   //
1455   // with most GC configurations we with see this simple variant which
1456   // includes a post-write barrier card mark.
1457   //
1458   //   MemBarRelease______________________________
1459   //         ||    \\               Ctl \        \\
1460   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1461   //         | \     /                       . . .  /
1462   //         | MergeMem
1463   //         | /
1464   //         ||      /
1465   //  {MemBarCPUOrder} -- optional
1466   //  {      ||      }
1467   //   MemBarVolatile
1468   //
1469   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1470   // the object address to an int used to compute the card offset) and
1471   // Ctl+Mem to a StoreB node (which does the actual card mark).
1472   //
1473   // n.b. a StoreCM node will only appear in this configuration when
1474   // using CMS or G1. StoreCM differs from a normal card mark write (StoreB)
1475   // because it implies a requirement to order visibility of the card
1476   // mark (StoreCM) relative to the object put (StoreP/N) using a
1477   // StoreStore memory barrier (arguably this ought to be represented
1478   // explicitly in the ideal graph but that is not how it works). This
1479   // ordering is required for both non-volatile and volatile
1480   // puts. Normally that means we need to translate a StoreCM using
1481   // the sequence
1482   //
1483   //   dmb ishst
1484   //   strb
1485   //
1486   // However, when using G1 or CMS with conditional card marking (as
1487   // we shall see) we don't need to insert the dmb when translating
1488   // StoreCM because there is already an intervening StoreLoad barrier
1489   // between it and the StoreP/N.
1490   //
1491   // It is also possible to perform the card mark conditionally on it
1492   // currently being unmarked in which case the volatile put graph
1493   // will look slightly different
1494   //
1495   //   MemBarRelease____________________________________________
1496   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1497   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1498   //         | \     /                              \            |
1499   //         | MergeMem                            . . .      StoreB
1500   //         | /                                                /
1501   //         ||     /
1502   //   MemBarVolatile
1503   //
1504   // It is worth noting at this stage that both the above
1505   // configurations can be uniquely identified by checking that the
1506   // memory flow includes the following subgraph:
1507   //
1508   //   MemBarRelease
1509   //  {MemBarCPUOrder}
1510   //          |  \      . . .
1511   //          |  StoreX[mo_release]  . . .
1512   //          |   /
1513   //         MergeMem
1514   //          |
1515   //  {MemBarCPUOrder}
1516   //   MemBarVolatile
1517   //
1518   // This is referred to as a *normal* subgraph. It can easily be
1519   // detected starting from any candidate MemBarRelease,
1520   // StoreX[mo_release] or MemBarVolatile.
1521   //
1522   // A simple variation on this normal case occurs for an unsafe CAS
1523   // operation. The basic graph for a non-object CAS is
1524   //
1525   //   MemBarRelease
1526   //         ||
1527   //   MemBarCPUOrder
1528   //         ||     \\   . . .
1529   //         ||     CompareAndSwapX
1530   //         ||       |
1531   //         ||     SCMemProj
1532   //         | \     /
1533   //         | MergeMem
1534   //         | /
1535   //   MemBarCPUOrder
1536   //         ||
1537   //   MemBarAcquire
1538   //
1539   // The same basic variations on this arrangement (mutatis mutandis)
1540   // occur when a card mark is introduced. i.e. we se the same basic
1541   // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
1542   // tail of the graph is a pair comprising a MemBarCPUOrder +
1543   // MemBarAcquire.
1544   //
1545   // So, in the case of a CAS the normal graph has the variant form
1546   //
1547   //   MemBarRelease
1548   //   MemBarCPUOrder
1549   //          |   \      . . .
1550   //          |  CompareAndSwapX  . . .
1551   //          |    |
1552   //          |   SCMemProj
1553   //          |   /  . . .
1554   //         MergeMem
1555   //          |
1556   //   MemBarCPUOrder
1557   //   MemBarAcquire
1558   //
1559   // This graph can also easily be detected starting from any
1560   // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1561   //
1562   // the code below uses two helper predicates, leading_to_normal and
1563   // normal_to_leading to identify these normal graphs, one validating
1564   // the layout starting from the top membar and searching down and
1565   // the other validating the layout starting from the lower membar
1566   // and searching up.
1567   //
1568   // There are two special case GC configurations when a normal graph
1569   // may not be generated: when using G1 (which always employs a
1570   // conditional card mark); and when using CMS with conditional card
1571   // marking configured. These GCs are both concurrent rather than
1572   // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1573   // graph between the leading and trailing membar nodes, in
1574   // particular enforcing stronger memory serialisation beween the
1575   // object put and the corresponding conditional card mark. CMS
1576   // employs a post-write GC barrier while G1 employs both a pre- and
1577   // post-write GC barrier. Of course the extra nodes may be absent --
1578   // they are only inserted for object puts/swaps. This significantly
1579   // complicates the task of identifying whether a MemBarRelease,
1580   // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1581   // when using these GC configurations (see below). It adds similar
1582   // complexity to the task of identifying whether a MemBarRelease,
1583   // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1584   //
1585   // In both cases the post-write subtree includes an auxiliary
1586   // MemBarVolatile (StoreLoad barrier) separating the object put/swap
1587   // and the read of the corresponding card. This poses two additional
1588   // problems.
1589   //
1590   // Firstly, a card mark MemBarVolatile needs to be distinguished
1591   // from a normal trailing MemBarVolatile. Resolving this first
1592   // problem is straightforward: a card mark MemBarVolatile always
1593   // projects a Mem feed to a StoreCM node and that is a unique marker
1594   //
1595   //      MemBarVolatile (card mark)
1596   //       C |    \     . . .
1597   //         |   StoreCM   . . .
1598   //       . . .
1599   //
1600   // The second problem is how the code generator is to translate the
1601   // card mark barrier? It always needs to be translated to a "dmb
1602   // ish" instruction whether or not it occurs as part of a volatile
1603   // put. A StoreLoad barrier is needed after the object put to ensure
1604   // i) visibility to GC threads of the object put and ii) visibility
1605   // to the mutator thread of any card clearing write by a GC
1606   // thread. Clearly a normal store (str) will not guarantee this
1607   // ordering but neither will a releasing store (stlr). The latter
1608   // guarantees that the object put is visible but does not guarantee
1609   // that writes by other threads have also been observed.
1610   //
1611   // So, returning to the task of translating the object put and the
1612   // leading/trailing membar nodes: what do the non-normal node graph
1613   // look like for these 2 special cases? and how can we determine the
1614   // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1615   // in both normal and non-normal cases?
1616   //
1617   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1618   // which selects conditonal execution based on the value loaded
1619   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1620   // intervening StoreLoad barrier (MemBarVolatile).
1621   //
1622   // So, with CMS we may see a node graph for a volatile object store
1623   // which looks like this
1624   //
1625   //   MemBarRelease
1626   //  {MemBarCPUOrder}_(leading)_________________
1627   //     C |    M \       \\                   C \
1628   //       |       \    StoreN/P[mo_release]  CastP2X
1629   //       |    Bot \    /
1630   //       |       MergeMem
1631   //       |         /
1632   //      MemBarVolatile (card mark)
1633   //     C |  ||    M |
1634   //       | LoadB    |
1635   //       |   |      |
1636   //       | Cmp      |\
1637   //       | /        | \
1638   //       If         |  \
1639   //       | \        |   \
1640   // IfFalse  IfTrue  |    \
1641   //       \     / \  |     \
1642   //        \   / StoreCM    |
1643   //         \ /      |      |
1644   //        Region   . . .   |
1645   //          | \           /
1646   //          |  . . .  \  / Bot
1647   //          |       MergeMem
1648   //          |          |
1649   //       {MemBarCPUOrder}
1650   //        MemBarVolatile (trailing)
1651   //
1652   // The first MergeMem merges the AliasIdxBot Mem slice from the
1653   // leading membar and the oopptr Mem slice from the Store into the
1654   // card mark membar. The trailing MergeMem merges the AliasIdxBot
1655   // Mem slice from the card mark membar and the AliasIdxRaw slice
1656   // from the StoreCM into the trailing membar (n.b. the latter
1657   // proceeds via a Phi associated with the If region).
1658   //
1659   // The graph for a CAS varies slightly, the difference being
1660   // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1661   // and the trailing MemBarVolatile by a MemBarCPUOrder +
1662   // MemBarAcquire pair (also the MemBarCPUOrder nodes are not optional).
1663   //
1664   //   MemBarRelease
1665   //   MemBarCPUOrder_(leading)_______________
1666   //     C |    M \       \\                C \
1667   //       |       \    CompareAndSwapN/P  CastP2X
1668   //       |        \      |
1669   //       |         \   SCMemProj
1670   //       |      Bot \   /
1671   //       |        MergeMem
1672   //       |         /
1673   //      MemBarVolatile (card mark)
1674   //     C |  ||    M |
1675   //       | LoadB    |
1676   //       |   |      |
1677   //       | Cmp      |\
1678   //       | /        | \
1679   //       If         |  \
1680   //       | \        |   \
1681   // IfFalse  IfTrue  |    \
1682   //       \     / \  |     \
1683   //        \   / StoreCM    |
1684   //         \ /      |      |
1685   //        Region   . . .   |
1686   //          | \           /
1687   //          |  . . .  \  / Bot
1688   //          |       MergeMem
1689   //          |          |
1690   //        MemBarCPUOrder
1691   //        MemBarVolatile (trailing)
1692   //
1693   //
1694   // G1 is quite a lot more complicated. The nodes inserted on behalf
1695   // of G1 may comprise: a pre-write graph which adds the old value to
1696   // the SATB queue; the releasing store itself; and, finally, a
1697   // post-write graph which performs a card mark.
1698   //
1699   // The pre-write graph may be omitted, but only when the put is
1700   // writing to a newly allocated (young gen) object and then only if
1701   // there is a direct memory chain to the Initialize node for the
1702   // object allocation. This will not happen for a volatile put since
1703   // any memory chain passes through the leading membar.
1704   //
1705   // The pre-write graph includes a series of 3 If tests. The outermost
1706   // If tests whether SATB is enabled (no else case). The next If tests
1707   // whether the old value is non-NULL (no else case). The third tests
1708   // whether the SATB queue index is > 0, if so updating the queue. The
1709   // else case for this third If calls out to the runtime to allocate a
1710   // new queue buffer.
1711   //
1712   // So with G1 the pre-write and releasing store subgraph looks like
1713   // this (the nested Ifs are omitted).
1714   //
1715   //  MemBarRelease
1716   // {MemBarCPUOrder}_(leading)___________
1717   //     C |  ||  M \   M \    M \  M \ . . .
1718   //       | LoadB   \  LoadL  LoadN   \
1719   //       | /        \                 \
1720   //       If         |\                 \
1721   //       | \        | \                 \
1722   //  IfFalse  IfTrue |  \                 \
1723   //       |     |    |   \                 |
1724   //       |     If   |   /\                |
1725   //       |     |          \               |
1726   //       |                 \              |
1727   //       |    . . .         \             |
1728   //       | /       | /       |            |
1729   //      Region  Phi[M]       |            |
1730   //       | \       |         |            |
1731   //       |  \_____ | ___     |            |
1732   //     C | C \     |   C \ M |            |
1733   //       | CastP2X | StoreN/P[mo_release] |
1734   //       |         |         |            |
1735   //     C |       M |       M |          M |
1736   //        \        |         |           /
1737   //                  . . .
1738   //          (post write subtree elided)
1739   //                    . . .
1740   //             C \         M /
1741   //                \         /
1742   //             {MemBarCPUOrder}
1743   //              MemBarVolatile (trailing)
1744   //
1745   // n.b. the LoadB in this subgraph is not the card read -- it's a
1746   // read of the SATB queue active flag.
1747   //
1748   // The G1 post-write subtree is also optional, this time when the
1749   // new value being written is either null or can be identified as a
1750   // newly allocated (young gen) object with no intervening control
1751   // flow. The latter cannot happen but the former may, in which case
1752   // the card mark membar is omitted and the memory feeds form the
1753   // leading membar and the SToreN/P are merged direct into the
1754   // trailing membar as per the normal subgraph. So, the only special
1755   // case which arises is when the post-write subgraph is generated.
1756   //
1757   // The kernel of the post-write G1 subgraph is the card mark itself
1758   // which includes a card mark memory barrier (MemBarVolatile), a
1759   // card test (LoadB), and a conditional update (If feeding a
1760   // StoreCM). These nodes are surrounded by a series of nested Ifs
1761   // which try to avoid doing the card mark. The top level If skips if
1762   // the object reference does not cross regions (i.e. it tests if
1763   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1764   // need not be recorded. The next If, which skips on a NULL value,
1765   // may be absent (it is not generated if the type of value is >=
1766   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1767   // checking if card_val != young).  n.b. although this test requires
1768   // a pre-read of the card it can safely be done before the StoreLoad
1769   // barrier. However that does not bypass the need to reread the card
1770   // after the barrier. A final, 4th If tests if the card is already
1771   // marked.
1772   //
1773   //                (pre-write subtree elided)
1774   //        . . .                  . . .    . . .  . . .
1775   //        C |                    M |     M |    M |
1776   //       Region                  Phi[M] StoreN    |
1777   //          |                     / \      |      |
1778   //         / \_______            /   \     |      |
1779   //      C / C \      . . .            \    |      |
1780   //       If   CastP2X . . .            |   |      |
1781   //       / \                           |   |      |
1782   //      /   \                          |   |      |
1783   // IfFalse IfTrue                      |   |      |
1784   //   |       |                         |   |     /|
1785   //   |       If                        |   |    / |
1786   //   |      / \                        |   |   /  |
1787   //   |     /   \                        \  |  /   |
1788   //   | IfFalse IfTrue                   MergeMem  |
1789   //   |  . . .    / \                       /      |
1790   //   |          /   \                     /       |
1791   //   |     IfFalse IfTrue                /        |
1792   //   |      . . .    |                  /         |
1793   //   |               If                /          |
1794   //   |               / \              /           |
1795   //   |              /   \            /            |
1796   //   |         IfFalse IfTrue       /             |
1797   //   |           . . .   |         /              |
1798   //   |                    \       /               |
1799   //   |                     \     /                |
1800   //   |             MemBarVolatile__(card mark)    |
1801   //   |                ||   C |  M \  M \          |
1802   //   |               LoadB   If    |    |         |
1803   //   |                      / \    |    |         |
1804   //   |                     . . .   |    |         |
1805   //   |                          \  |    |        /
1806   //   |                        StoreCM   |       /
1807   //   |                          . . .   |      /
1808   //   |                        _________/      /
1809   //   |                       /  _____________/
1810   //   |   . . .       . . .  |  /            /
1811   //   |    |                 | /   _________/
1812   //   |    |               Phi[M] /        /
1813   //   |    |                 |   /        /
1814   //   |    |                 |  /        /
1815   //   |  Region  . . .     Phi[M]  _____/
1816   //   |    /                 |    /
1817   //   |                      |   /
1818   //   | . . .   . . .        |  /
1819   //   | /                    | /
1820   // Region           |  |  Phi[M]
1821   //   |              |  |  / Bot
1822   //    \            MergeMem
1823   //     \            /
1824   //    {MemBarCPUOrder}
1825   //     MemBarVolatile
1826   //
1827   // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1828   // from the leading membar and the oopptr Mem slice from the Store
1829   // into the card mark membar i.e. the memory flow to the card mark
1830   // membar still looks like a normal graph.
1831   //
1832   // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1833   // Mem slices (from the StoreCM and other card mark queue stores).
1834   // However in this case the AliasIdxBot Mem slice does not come
1835   // direct from the card mark membar. It is merged through a series
1836   // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1837   // from the leading membar with the Mem feed from the card mark
1838   // membar. Each Phi corresponds to one of the Ifs which may skip
1839   // around the card mark membar. So when the If implementing the NULL
1840   // value check has been elided the total number of Phis is 2
1841   // otherwise it is 3.
1842   //
1843   // The CAS graph when using G1GC also includes a pre-write subgraph
1844   // and an optional post-write subgraph. The same variations are
1845   // introduced as for CMS with conditional card marking i.e. the
1846   // StoreP/N is swapped for a CompareAndSwapP/N with a following
1847   // SCMemProj, the trailing MemBarVolatile for a MemBarCPUOrder +
1848   // MemBarAcquire pair. There may be an extra If test introduced in
1849   // the CAS case, when the boolean result of the CAS is tested by the
1850   // caller. In that case an extra Region and AliasIdxBot Phi may be
1851   // introduced before the MergeMem
1852   //
1853   // So, the upshot is that in all cases the subgraph will include a
1854   // *normal* memory subgraph betwen the leading membar and its child
1855   // membar: either a normal volatile put graph including a releasing
1856   // StoreX and terminating with a trailing volatile membar or card
1857   // mark volatile membar; or a normal CAS graph including a
1858   // CompareAndSwapX + SCMemProj pair and terminating with a card mark
1859   // volatile membar or a trailing cpu order and acquire membar
1860   // pair. If the child membar is not a (volatile) card mark membar
1861   // then it marks the end of the volatile put or CAS subgraph. If the
1862   // child is a card mark membar then the normal subgraph will form
1863   // part of a larger volatile put or CAS subgraph if and only if the
1864   // child feeds an AliasIdxBot Mem feed to a trailing barrier via a
1865   // MergeMem. That feed is either direct (for CMS) or via 2, 3 or 4
1866   // Phi nodes merging the leading barrier memory flow (for G1).
1867   //
1868   // The predicates controlling generation of instructions for store
1869   // and barrier nodes employ a few simple helper functions (described
1870   // below) which identify the presence or absence of all these
1871   // subgraph configurations and provide a means of traversing from
1872   // one node in the subgraph to another.
1873 
1874   // is_CAS(int opcode)
1875   //
1876   // return true if opcode is one of the possible CompareAndSwapX
1877   // values otherwise false.
1878 
1879   bool is_CAS(int opcode)
1880   {
1881     switch(opcode) {
1882       // We handle these
1883     case Op_CompareAndSwapI:
1884     case Op_CompareAndSwapL:
1885     case Op_CompareAndSwapP:
1886     case Op_CompareAndSwapN:
1887  // case Op_CompareAndSwapB:
1888  // case Op_CompareAndSwapS:
1889       return true;
1890       // These are TBD
1891     case Op_WeakCompareAndSwapB:
1892     case Op_WeakCompareAndSwapS:
1893     case Op_WeakCompareAndSwapI:
1894     case Op_WeakCompareAndSwapL:
1895     case Op_WeakCompareAndSwapP:
1896     case Op_WeakCompareAndSwapN:
1897     case Op_CompareAndExchangeB:
1898     case Op_CompareAndExchangeS:
1899     case Op_CompareAndExchangeI:
1900     case Op_CompareAndExchangeL:
1901     case Op_CompareAndExchangeP:
1902     case Op_CompareAndExchangeN:
1903       return false;
1904     default:
1905       return false;
1906     }
1907   }
1908 
1909   // helper to determine the maximum number of Phi nodes we may need to
1910   // traverse when searching from a card mark membar for the merge mem
1911   // feeding a trailing membar or vice versa
1912 
1913   int max_phis()
1914   {
1915     if (UseG1GC) {
1916       return 4;
1917     } else if (UseConcMarkSweepGC && UseCondCardMark) {
1918       return 1;
1919     } else {
1920       return 0;
1921     }
1922   }
1923 
1924   // leading_to_normal
1925   //
1926   // graph traversal helper which detects the normal case Mem feed
1927   // from a release membar (or, optionally, its cpuorder child) to a
1928   // dependent volatile or acquire membar i.e. it ensures that one of
1929   // the following 3 Mem flow subgraphs is present.
1930   //
1931   //   MemBarRelease
1932   //  {MemBarCPUOrder} {leading}
1933   //          |  \      . . .
1934   //          |  StoreN/P[mo_release]  . . .
1935   //          |   /
1936   //         MergeMem
1937   //          |
1938   //  {MemBarCPUOrder}
1939   //   MemBarVolatile {trailing or card mark}
1940   //
1941   //   MemBarRelease
1942   //   MemBarCPUOrder {leading}
1943   //          |  \      . . .
1944   //          |  CompareAndSwapX  . . .
1945   //          |   /
1946   //         MergeMem
1947   //          |
1948   //   MemBarVolatile {card mark}
1949   //
1950   //   MemBarRelease
1951   //   MemBarCPUOrder {leading}
1952   //          |  \      . . .
1953   //          |  CompareAndSwapX  . . .
1954   //          |   /
1955   //         MergeMem
1956   //          |
1957   //   MemBarCPUOrder
1958   //   MemBarAcquire {trailing}
1959   //
1960   // if the correct configuration is present returns the trailing
1961   // or cardmark membar otherwise NULL.
1962   //
1963   // the input membar is expected to be either a cpuorder membar or a
1964   // release membar. in the latter case it should not have a cpu membar
1965   // child.
1966   //
1967   // the returned value may be a card mark or trailing membar
1968   //
1969 
1970   MemBarNode *leading_to_normal(MemBarNode *leading)
1971   {
1972     assert((leading->Opcode() == Op_MemBarRelease ||
1973             leading->Opcode() == Op_MemBarCPUOrder),
1974            "expecting a volatile or cpuroder membar!");
1975 
1976     // check the mem flow
1977     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1978 
1979     if (!mem) {
1980       return NULL;
1981     }
1982 
1983     Node *x = NULL;
1984     StoreNode * st = NULL;
1985     LoadStoreNode *cas = NULL;
1986     MergeMemNode *mm = NULL;
1987 
1988     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1989       x = mem->fast_out(i);
1990       if (x->is_MergeMem()) {
1991         if (mm != NULL) {
1992           return NULL;
1993         }
1994         // two merge mems is one too many
1995         mm = x->as_MergeMem();
1996       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1997         // two releasing stores/CAS nodes is one too many
1998         if (st != NULL || cas != NULL) {
1999           return NULL;
2000         }
2001         st = x->as_Store();
2002       } else if (is_CAS(x->Opcode())) {
2003         if (st != NULL || cas != NULL) {
2004           return NULL;
2005         }
2006         cas = x->as_LoadStore();
2007       }
2008     }
2009 
2010     // must have a store or a cas
2011     if (!st && !cas) {
2012       return NULL;
2013     }
2014 
2015     // must have a merge
2016     if (!mm) {
2017       return NULL;
2018     }
2019 
2020     Node *feed = NULL;
2021     if (cas) {
2022       // look for an SCMemProj
2023       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2024         x = cas->fast_out(i);
2025         if (x->Opcode() == Op_SCMemProj) {
2026           feed = x;
2027           break;
2028         }
2029       }
2030       if (feed == NULL) {
2031         return NULL;
2032       }
2033     } else {
2034       feed = st;
2035     }
2036     // ensure the feed node feeds the existing mergemem;
2037     for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2038       x = feed->fast_out(i);
2039       if (x == mm) {
2040         break;
2041       }
2042     }
2043     if (x != mm) {
2044       return NULL;
2045     }
2046 
2047     MemBarNode *mbar = NULL;
2048     // ensure the merge feeds to the expected type of membar
2049     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2050       x = mm->fast_out(i);
2051       if (x->is_MemBar()) {
2052         if (x->Opcode() == Op_MemBarCPUOrder) {
2053           // with a store any cpu order membar should precede a
2054           // trailing volatile membar. with a cas it should precede a
2055           // trailing acquire membar. in either case try to skip to
2056           // that next membar
2057           MemBarNode *y =  x->as_MemBar();
2058           y = child_membar(y);
2059           if (y != NULL) {
2060             // skip to this new membar to do the check
2061             x = y;
2062           }
2063           
2064         }
2065         if (x->Opcode() == Op_MemBarVolatile) {
2066           mbar = x->as_MemBar();
2067           // for a volatile store this can be either a trailing membar
2068           // or a card mark membar. for a cas it must be a card mark
2069           // membar
2070           guarantee(cas == NULL || is_card_mark_membar(mbar),
2071                     "in CAS graph volatile membar must be a card mark");
2072         } else if (cas != NULL && x->Opcode() == Op_MemBarAcquire) {
2073           mbar = x->as_MemBar();
2074         }
2075         break;
2076       }
2077     }
2078 
2079     return mbar;
2080   }
2081 
2082   // normal_to_leading
2083   //
2084   // graph traversal helper which detects the normal case Mem feed
2085   // from either a card mark or a trailing membar to a preceding
2086   // release membar (optionally its cpuorder child) i.e. it ensures
2087   // that one of the following 3 Mem flow subgraphs is present.
2088   //
2089   //   MemBarRelease
2090   //  {MemBarCPUOrder} {leading}
2091   //          |  \      . . .
2092   //          |  StoreN/P[mo_release]  . . .
2093   //          |   /
2094   //         MergeMem
2095   //          |
2096   //  {MemBarCPUOrder}
2097   //   MemBarVolatile {trailing or card mark}
2098   //
2099   //   MemBarRelease
2100   //   MemBarCPUOrder {leading}
2101   //          |  \      . . .
2102   //          |  CompareAndSwapX  . . .
2103   //          |   /
2104   //         MergeMem
2105   //          |
2106   //   MemBarVolatile {card mark}
2107   //
2108   //   MemBarRelease
2109   //   MemBarCPUOrder {leading}
2110   //          |  \      . . .
2111   //          |  CompareAndSwapX  . . .
2112   //          |   /
2113   //         MergeMem
2114   //          |
2115   //   MemBarCPUOrder
2116   //   MemBarAcquire {trailing}
2117   //
2118   // this predicate checks for the same flow as the previous predicate
2119   // but starting from the bottom rather than the top.
2120   //
2121   // if the configuration is present returns the cpuorder member for
2122   // preference or when absent the release membar otherwise NULL.
2123   //
2124   // n.b. the input membar is expected to be a MemBarVolatile but
2125   // need not be a card mark membar.
2126 
2127   MemBarNode *normal_to_leading(const MemBarNode *barrier)
2128   {
2129     // input must be a volatile membar
2130     assert((barrier->Opcode() == Op_MemBarVolatile ||
2131             barrier->Opcode() == Op_MemBarAcquire),
2132            "expecting a volatile or an acquire membar");
2133     bool barrier_is_acquire = barrier->Opcode() == Op_MemBarAcquire;
2134 
2135     // if we have an intervening cpu order membar then start the
2136     // search from it
2137     
2138     Node *x = parent_membar(barrier);
2139 
2140     if (x == NULL) {
2141       // stick with the original barrier
2142       x = (Node *)barrier;
2143     } else if (x->Opcode() != Op_MemBarCPUOrder) {
2144       // any other barrier means this is not the graph we want
2145       return NULL;
2146     }
2147 
2148     // the Mem feed to the membar should be a merge
2149     x = x ->in(TypeFunc::Memory);
2150     if (!x->is_MergeMem())
2151       return NULL;
2152 
2153     MergeMemNode *mm = x->as_MergeMem();
2154 
2155     // the merge should get its Bottom mem feed from the leading membar
2156     x = mm->in(Compile::AliasIdxBot);
2157 
2158     // ensure this is a non control projection
2159     if (!x->is_Proj() || x->is_CFG()) {
2160       return NULL;
2161     }
2162     // if it is fed by a membar that's the one we want
2163     x = x->in(0);
2164 
2165     if (!x->is_MemBar()) {
2166       return NULL;
2167     }
2168 
2169     MemBarNode *leading = x->as_MemBar();
2170     // reject invalid candidates
2171     if (!leading_membar(leading)) {
2172       return NULL;
2173     }
2174 
2175     // ok, we have a leading membar, now for the sanity clauses
2176 
2177     // the leading membar must feed Mem to a releasing store or CAS
2178     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2179     StoreNode *st = NULL;
2180     LoadStoreNode *cas = NULL;
2181     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2182       x = mem->fast_out(i);
2183       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2184         // two stores or CASes is one too many
2185         if (st != NULL || cas != NULL) {
2186           return NULL;
2187         }
2188         st = x->as_Store();
2189       } else if (is_CAS(x->Opcode())) {
2190         if (st != NULL || cas != NULL) {
2191           return NULL;
2192         }
2193         cas = x->as_LoadStore();
2194       }
2195     }
2196 
2197     // we cannot have both a store and a cas
2198     if (st == NULL && cas == NULL) {
2199       // we have neither -- this is not a normal graph
2200       return NULL;
2201     }
2202     if (st == NULL) {
2203       // if we started from a volatile membar and found a CAS then the
2204       // original membar ought to be for a card mark
2205       guarantee((barrier_is_acquire || is_card_mark_membar(barrier)),
2206                 "unexpected volatile barrier (i.e. not card mark) in CAS graph");
2207       // check that the CAS feeds the merge we used to get here via an
2208       // intermediary SCMemProj
2209       Node *scmemproj = NULL;
2210       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2211         x = cas->fast_out(i);
2212         if (x->Opcode() == Op_SCMemProj) {
2213           scmemproj = x;
2214           break;
2215         }
2216       }
2217       if (scmemproj == NULL) {
2218         return NULL;
2219       }
2220       for (DUIterator_Fast imax, i = scmemproj->fast_outs(imax); i < imax; i++) {
2221         x = scmemproj->fast_out(i);
2222         if (x == mm) {
2223           return leading;
2224         }
2225       }
2226     } else {
2227       // we should not have found a store if we started from an acquire
2228       guarantee(!barrier_is_acquire,
2229                 "unexpected trailing acquire barrier in volatile store graph");
2230 
2231       // the store should feed the merge we used to get here
2232       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2233         if (st->fast_out(i) == mm) {
2234           return leading;
2235         }
2236       }
2237     }
2238 
2239     return NULL;
2240   }
2241 
2242   // card_mark_to_trailing
2243   //
2244   // graph traversal helper which detects extra, non-normal Mem feed
2245   // from a card mark volatile membar to a trailing membar i.e. it
2246   // ensures that one of the following three GC post-write Mem flow
2247   // subgraphs is present.
2248   //
2249   // 1)
2250   //     . . .
2251   //       |
2252   //   MemBarVolatile (card mark)
2253   //      |          |
2254   //      |        StoreCM
2255   //      |          |
2256   //      |        . . .
2257   //  Bot |  /
2258   //   MergeMem
2259   //      |
2260   //   {MemBarCPUOrder}            OR  MemBarCPUOrder
2261   //    MemBarVolatile {trailing}      MemBarAcquire {trailing}
2262   //                                 
2263   //
2264   // 2)
2265   //   MemBarRelease/CPUOrder (leading)
2266   //    |
2267   //    |
2268   //    |\       . . .
2269   //    | \        |
2270   //    |  \  MemBarVolatile (card mark)
2271   //    |   \   |     |
2272   //     \   \  |   StoreCM    . . .
2273   //      \   \ |
2274   //       \  Phi
2275   //        \ /
2276   //        Phi  . . .
2277   //     Bot |   /
2278   //       MergeMem
2279   //         |
2280   //   {MemBarCPUOrder}            OR  MemBarCPUOrder
2281   //    MemBarVolatile {trailing}      MemBarAcquire {trailing}
2282   //
2283   // 3)
2284   //   MemBarRelease/CPUOrder (leading)
2285   //    |
2286   //    |\
2287   //    | \
2288   //    |  \      . . .
2289   //    |   \       |
2290   //    |\   \  MemBarVolatile (card mark)
2291   //    | \   \   |     |
2292   //    |  \   \  |   StoreCM    . . .
2293   //    |   \   \ |
2294   //     \   \  Phi
2295   //      \   \ /
2296   //       \  Phi
2297   //        \ /
2298   //        Phi  . . .
2299   //     Bot |   /
2300   //       MergeMem
2301   //         |
2302   //         |
2303   //   {MemBarCPUOrder}            OR  MemBarCPUOrder
2304   //    MemBarVolatile {trailing}      MemBarAcquire {trailing}
2305   //
2306   // 4)
2307   //   MemBarRelease/CPUOrder (leading)
2308   //    |
2309   //    |\
2310   //    | \
2311   //    |  \
2312   //    |   \
2313   //    |\   \
2314   //    | \   \
2315   //    |  \   \        . . .
2316   //    |   \   \         |
2317   //    |\   \   \   MemBarVolatile (card mark)
2318   //    | \   \   \   /   |
2319   //    |  \   \   \ /  StoreCM    . . .
2320   //    |   \   \  Phi
2321   //     \   \   \ /
2322   //      \   \  Phi
2323   //       \   \ /
2324   //        \  Phi
2325   //         \ /
2326   //         Phi  . . .
2327   //      Bot |   /
2328   //       MergeMem
2329   //          |
2330   //          |
2331   //    MemBarCPUOrder
2332   //    MemBarAcquire {trailing}
2333   //
2334   // configuration 1 is only valid if UseConcMarkSweepGC &&
2335   // UseCondCardMark
2336   //
2337   // configuration 2, is only valid if UseConcMarkSweepGC &&
2338   // UseCondCardMark or if UseG1GC
2339   //
2340   // configurations 3 and 4 are only valid if UseG1GC.
2341   //
2342   // if a valid configuration is present returns the trailing membar
2343   // otherwise NULL.
2344   //
2345   // n.b. the supplied membar is expected to be a card mark
2346   // MemBarVolatile i.e. the caller must ensure the input node has the
2347   // correct operand and feeds Mem to a StoreCM node
2348 
2349   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2350   {
2351     // input must be a card mark volatile membar
2352     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2353 
2354     Node *feed = barrier->proj_out(TypeFunc::Memory);
2355     Node *x;
2356     MergeMemNode *mm = NULL;
2357 
2358     const int MAX_PHIS = max_phis(); // max phis we will search through
2359     int phicount = 0;                // current search count
2360 
2361     bool retry_feed = true;
2362     while (retry_feed) {
2363       // see if we have a direct MergeMem feed
2364       for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2365         x = feed->fast_out(i);
2366         // the correct Phi will be merging a Bot memory slice
2367         if (x->is_MergeMem()) {
2368           mm = x->as_MergeMem();
2369           break;
2370         }
2371       }
2372       if (mm) {
2373         retry_feed = false;
2374       } else if (phicount++ < MAX_PHIS) {
2375         // the barrier may feed indirectly via one or two Phi nodes
2376         PhiNode *phi = NULL;
2377         for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2378           x = feed->fast_out(i);
2379           // the correct Phi will be merging a Bot memory slice
2380           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2381             phi = x->as_Phi();
2382             break;
2383           }
2384         }
2385         if (!phi) {
2386           return NULL;
2387         }
2388         // look for another merge below this phi
2389         feed = phi;
2390       } else {
2391         // couldn't find a merge
2392         return NULL;
2393       }
2394     }
2395 
2396     // sanity check this feed turns up as the expected slice
2397     guarantee(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2398 
2399     MemBarNode *trailing = NULL;
2400     // be sure we have a trailing membar fed by the merge
2401     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2402       x = mm->fast_out(i);
2403       if (x->is_MemBar()) {
2404         // if this is an intervening cpu order membar skip to the
2405         // following membar
2406         if (x->Opcode() == Op_MemBarCPUOrder) {
2407           MemBarNode *y =  x->as_MemBar();
2408           y = child_membar(y);
2409           if (y != NULL) {
2410             x = y;
2411           }
2412         }
2413         if (x->Opcode() == Op_MemBarVolatile ||
2414             x->Opcode() == Op_MemBarAcquire) {
2415           trailing = x->as_MemBar();
2416         }
2417         break;
2418       }
2419     }
2420 
2421     return trailing;
2422   }
2423 
2424   // trailing_to_card_mark
2425   //
2426   // graph traversal helper which detects extra, non-normal Mem feed
2427   // from a trailing volatile membar to a preceding card mark volatile
2428   // membar i.e. it identifies whether one of the three possible extra
2429   // GC post-write Mem flow subgraphs is present
2430   //
2431   // this predicate checks for the same flow as the previous predicate
2432   // but starting from the bottom rather than the top.
2433   //
2434   // if the configuration is present returns the card mark membar
2435   // otherwise NULL
2436   //
2437   // n.b. the supplied membar is expected to be a trailing
2438   // MemBarVolatile or MemBarAcquire i.e. the caller must ensure the
2439   // input node has the correct opcode
2440 
2441   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2442   {
2443     assert(trailing->Opcode() == Op_MemBarVolatile ||
2444            trailing->Opcode() == Op_MemBarAcquire,
2445            "expecting a volatile or acquire membar");
2446     assert(!is_card_mark_membar(trailing),
2447            "not expecting a card mark membar");
2448 
2449     Node *x = (Node *)trailing;
2450 
2451     // look for a preceding cpu order membar
2452     MemBarNode *y = parent_membar(x->as_MemBar());
2453     if (y != NULL) {
2454       // make sure it is a cpu order membar
2455       if (y->Opcode() != Op_MemBarCPUOrder) {
2456         // this is nto the graph we were looking for
2457         return NULL;
2458       }
2459       // start the search from here
2460       x = y;
2461     }
2462 
2463     // the Mem feed to the membar should be a merge
2464     x = x->in(TypeFunc::Memory);
2465     if (!x->is_MergeMem()) {
2466       return NULL;
2467     }
2468 
2469     MergeMemNode *mm = x->as_MergeMem();
2470 
2471     x = mm->in(Compile::AliasIdxBot);
2472     // with G1 we may possibly see a Phi or two before we see a Memory
2473     // Proj from the card mark membar
2474 
2475     const int MAX_PHIS = max_phis(); // max phis we will search through
2476     int phicount = 0;                    // current search count
2477 
2478     bool retry_feed = !x->is_Proj();
2479 
2480     while (retry_feed) {
2481       if (x->is_Phi() && phicount++ < MAX_PHIS) {
2482         PhiNode *phi = x->as_Phi();
2483         ProjNode *proj = NULL;
2484         PhiNode *nextphi = NULL;
2485         bool found_leading = false;
2486         for (uint i = 1; i < phi->req(); i++) {
2487           x = phi->in(i);
2488           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2489             nextphi = x->as_Phi();
2490           } else if (x->is_Proj()) {
2491             int opcode = x->in(0)->Opcode();
2492             if (opcode == Op_MemBarVolatile) {
2493               proj = x->as_Proj();
2494             } else if (opcode == Op_MemBarRelease ||
2495                        opcode == Op_MemBarCPUOrder) {
2496               // probably a leading membar
2497               found_leading = true;
2498             }
2499           }
2500         }
2501         // if we found a correct looking proj then retry from there
2502         // otherwise we must see a leading and a phi or this the
2503         // wrong config
2504         if (proj != NULL) {
2505           x = proj;
2506           retry_feed = false;
2507         } else if (found_leading && nextphi != NULL) {
2508           // retry from this phi to check phi2
2509           x = nextphi;
2510         } else {
2511           // not what we were looking for
2512           return NULL;
2513         }
2514       } else {
2515         return NULL;
2516       }
2517     }
2518     // the proj has to come from the card mark membar
2519     x = x->in(0);
2520     if (!x->is_MemBar()) {
2521       return NULL;
2522     }
2523 
2524     MemBarNode *card_mark_membar = x->as_MemBar();
2525 
2526     if (!is_card_mark_membar(card_mark_membar)) {
2527       return NULL;
2528     }
2529 
2530     return card_mark_membar;
2531   }
2532 
2533   // trailing_to_leading
2534   //
2535   // graph traversal helper which checks the Mem flow up the graph
2536   // from a (non-card mark) trailing membar attempting to locate and
2537   // return an associated leading membar. it first looks for a
2538   // subgraph in the normal configuration (relying on helper
2539   // normal_to_leading). failing that it then looks for one of the
2540   // possible post-write card mark subgraphs linking the trailing node
2541   // to a the card mark membar (relying on helper
2542   // trailing_to_card_mark), and then checks that the card mark membar
2543   // is fed by a leading membar (once again relying on auxiliary
2544   // predicate normal_to_leading).
2545   //
2546   // if the configuration is valid returns the cpuorder member for
2547   // preference or when absent the release membar otherwise NULL.
2548   //
2549   // n.b. the input membar is expected to be either a volatile or
2550   // acquire membar but in the former case must *not* be a card mark
2551   // membar.
2552 
2553   MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2554   {
2555     assert((trailing->Opcode() == Op_MemBarAcquire ||
2556             trailing->Opcode() == Op_MemBarVolatile),
2557            "expecting an acquire or volatile membar");
2558     assert((trailing->Opcode() != Op_MemBarVolatile ||
2559             !is_card_mark_membar(trailing)),
2560            "not expecting a card mark membar");
2561 
2562     MemBarNode *leading = normal_to_leading(trailing);
2563 
2564     if (leading) {
2565       return leading;
2566     }
2567 
2568     // there is no normal path from trailing to leading membar. see if
2569     // we can arrive via a card mark membar
2570 
2571     MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2572 
2573     if (!card_mark_membar) {
2574       return NULL;
2575     }
2576 
2577     return normal_to_leading(card_mark_membar);
2578   }
2579 
2580   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2581 
2582 bool unnecessary_acquire(const Node *barrier)
2583 {
2584   assert(barrier->is_MemBar(), "expecting a membar");
2585 
2586   if (UseBarriersForVolatile) {
2587     // we need to plant a dmb
2588     return false;
2589   }
2590 
2591   // a volatile read derived from bytecode (or also from an inlined
2592   // SHA field read via LibraryCallKit::load_field_from_object)
2593   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2594   // with a bogus read dependency on it's preceding load. so in those
2595   // cases we will find the load node at the PARMS offset of the
2596   // acquire membar.  n.b. there may be an intervening DecodeN node.
2597 
2598   Node *x = barrier->lookup(TypeFunc::Parms);
2599   if (x) {
2600     // we are starting from an acquire and it has a fake dependency
2601     //
2602     // need to check for
2603     //
2604     //   LoadX[mo_acquire]
2605     //   {  |1   }
2606     //   {DecodeN}
2607     //      |Parms
2608     //   MemBarAcquire*
2609     //
2610     // where * tags node we were passed
2611     // and |k means input k
2612     if (x->is_DecodeNarrowPtr()) {
2613       x = x->in(1);
2614     }
2615 
2616     return (x->is_Load() && x->as_Load()->is_acquire());
2617   }
2618 
2619   // other option for unnecessary membar is that it is a trailing node
2620   // belonging to a CAS
2621 
2622   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2623 
2624   return leading != NULL;
2625 }
2626 
2627 bool needs_acquiring_load(const Node *n)
2628 {
2629   assert(n->is_Load(), "expecting a load");
2630   if (UseBarriersForVolatile) {
2631     // we use a normal load and a dmb
2632     return false;
2633   }
2634 
2635   LoadNode *ld = n->as_Load();
2636 
2637   if (!ld->is_acquire()) {
2638     return false;
2639   }
2640 
2641   // check if this load is feeding an acquire membar
2642   //
2643   //   LoadX[mo_acquire]
2644   //   {  |1   }
2645   //   {DecodeN}
2646   //      |Parms
2647   //   MemBarAcquire*
2648   //
2649   // where * tags node we were passed
2650   // and |k means input k
2651 
2652   Node *start = ld;
2653   Node *mbacq = NULL;
2654 
2655   // if we hit a DecodeNarrowPtr we reset the start node and restart
2656   // the search through the outputs
2657  restart:
2658 
2659   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2660     Node *x = start->fast_out(i);
2661     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2662       mbacq = x;
2663     } else if (!mbacq &&
2664                (x->is_DecodeNarrowPtr() ||
2665                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2666       start = x;
2667       goto restart;
2668     }
2669   }
2670 
2671   if (mbacq) {
2672     return true;
2673   }
2674 
2675   return false;
2676 }
2677 
2678 bool unnecessary_release(const Node *n)
2679 {
2680   assert((n->is_MemBar() &&
2681           n->Opcode() == Op_MemBarRelease),
2682          "expecting a release membar");
2683 
2684   if (UseBarriersForVolatile) {
2685     // we need to plant a dmb
2686     return false;
2687   }
2688 
2689   // if there is a dependent CPUOrder barrier then use that as the
2690   // leading
2691 
2692   MemBarNode *barrier = n->as_MemBar();
2693   // check for an intervening cpuorder membar
2694   MemBarNode *b = child_membar(barrier);
2695   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2696     // ok, so start the check from the dependent cpuorder barrier
2697     barrier = b;
2698   }
2699 
2700   // must start with a normal feed
2701   MemBarNode *child_barrier = leading_to_normal(barrier);
2702 
2703   if (!child_barrier) {
2704     return false;
2705   }
2706 
2707   if (!is_card_mark_membar(child_barrier)) {
2708     // this is the trailing membar and we are done
2709     return true;
2710   }
2711 
2712   // must be sure this card mark feeds a trailing membar
2713   MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2714   return (trailing != NULL);
2715 }
2716 
2717 bool unnecessary_volatile(const Node *n)
2718 {
2719   // assert n->is_MemBar();
2720   if (UseBarriersForVolatile) {
2721     // we need to plant a dmb
2722     return false;
2723   }
2724 
2725   MemBarNode *mbvol = n->as_MemBar();
2726 
2727   // first we check if this is part of a card mark. if so then we have
2728   // to generate a StoreLoad barrier
2729 
2730   if (is_card_mark_membar(mbvol)) {
2731       return false;
2732   }
2733 
2734   // ok, if it's not a card mark then we still need to check if it is
2735   // a trailing membar of a volatile put graph.
2736 
2737   return (trailing_to_leading(mbvol) != NULL);
2738 }
2739 
2740 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2741 
2742 bool needs_releasing_store(const Node *n)
2743 {
2744   // assert n->is_Store();
2745   if (UseBarriersForVolatile) {
2746     // we use a normal store and dmb combination
2747     return false;
2748   }
2749 
2750   StoreNode *st = n->as_Store();
2751 
2752   // the store must be marked as releasing
2753   if (!st->is_release()) {
2754     return false;
2755   }
2756 
2757   // the store must be fed by a membar
2758 
2759   Node *x = st->lookup(StoreNode::Memory);
2760 
2761   if (! x || !x->is_Proj()) {
2762     return false;
2763   }
2764 
2765   ProjNode *proj = x->as_Proj();
2766 
2767   x = proj->lookup(0);
2768 
2769   if (!x || !x->is_MemBar()) {
2770     return false;
2771   }
2772 
2773   MemBarNode *barrier = x->as_MemBar();
2774 
2775   // if the barrier is a release membar or a cpuorder mmebar fed by a
2776   // release membar then we need to check whether that forms part of a
2777   // volatile put graph.
2778 
2779   // reject invalid candidates
2780   if (!leading_membar(barrier)) {
2781     return false;
2782   }
2783 
2784   // does this lead a normal subgraph?
2785   MemBarNode *mbvol = leading_to_normal(barrier);
2786 
2787   if (!mbvol) {
2788     return false;
2789   }
2790 
2791   // all done unless this is a card mark
2792   if (!is_card_mark_membar(mbvol)) {
2793     return true;
2794   }
2795 
2796   // we found a card mark -- just make sure we have a trailing barrier
2797 
2798   return (card_mark_to_trailing(mbvol) != NULL);
2799 }
2800 
2801 // predicate controlling translation of CAS
2802 //
2803 // returns true if CAS needs to use an acquiring load otherwise false
2804 
2805 bool needs_acquiring_load_exclusive(const Node *n)
2806 {
2807   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2808   if (UseBarriersForVolatile) {
2809     return false;
2810   }
2811 
2812   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2813 #ifdef ASSERT
2814   LoadStoreNode *st = n->as_LoadStore();
2815 
2816   // the store must be fed by a membar
2817 
2818   Node *x = st->lookup(StoreNode::Memory);
2819 
2820   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2821 
2822   ProjNode *proj = x->as_Proj();
2823 
2824   x = proj->lookup(0);
2825 
2826   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2827 
2828   MemBarNode *barrier = x->as_MemBar();
2829 
2830   // the barrier must be a cpuorder mmebar fed by a release membar
2831 
2832   guarantee(barrier->Opcode() == Op_MemBarCPUOrder,
2833             "CAS not fed by cpuorder membar!");
2834 
2835   MemBarNode *b = parent_membar(barrier);
2836   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2837           "CAS not fed by cpuorder+release membar pair!");
2838 
2839   // does this lead a normal subgraph?
2840   MemBarNode *mbar = leading_to_normal(barrier);
2841 
2842   guarantee(mbar != NULL, "CAS not embedded in normal graph!");
2843 
2844   // if this is a card mark membar check we have a trailing acquire
2845 
2846   if (is_card_mark_membar(mbar)) {
2847     mbar = card_mark_to_trailing(mbar);
2848   }
2849 
2850   guarantee(mbar != NULL, "card mark membar for CAS not embedded in normal graph!");
2851 
2852   guarantee(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2853 #endif // ASSERT
2854   // so we can just return true here
2855   return true;
2856 }
2857 
2858 // predicate controlling translation of StoreCM
2859 //
2860 // returns true if a StoreStore must precede the card write otherwise
2861 // false
2862 
2863 bool unnecessary_storestore(const Node *storecm)
2864 {
2865   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2866 
2867   // we need to generate a dmb ishst between an object put and the
2868   // associated card mark when we are using CMS without conditional
2869   // card marking
2870 
2871   if (UseConcMarkSweepGC && !UseCondCardMark) {
2872     return false;
2873   }
2874 
2875   // a storestore is unnecesary in all other cases
2876 
2877   return true;
2878 }
2879 
2880 
2881 #define __ _masm.
2882 
2883 // advance declarations for helper functions to convert register
2884 // indices to register objects
2885 
2886 // the ad file has to provide implementations of certain methods
2887 // expected by the generic code
2888 //
2889 // REQUIRED FUNCTIONALITY
2890 
2891 //=============================================================================
2892 
2893 // !!!!! Special hack to get all types of calls to specify the byte offset
2894 //       from the start of the call to the point where the return address
2895 //       will point.
2896 
2897 int MachCallStaticJavaNode::ret_addr_offset()
2898 {
2899   // call should be a simple bl
2900   int off = 4;
2901   return off;
2902 }
2903 
2904 int MachCallDynamicJavaNode::ret_addr_offset()
2905 {
2906   return 16; // movz, movk, movk, bl
2907 }
2908 
2909 int MachCallRuntimeNode::ret_addr_offset() {
2910   // for generated stubs the call will be
2911   //   far_call(addr)
2912   // for real runtime callouts it will be six instructions
2913   // see aarch64_enc_java_to_runtime
2914   //   adr(rscratch2, retaddr)
2915   //   lea(rscratch1, RuntimeAddress(addr)
2916   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2917   //   blrt rscratch1
2918   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2919   if (cb) {
2920     return MacroAssembler::far_branch_size();
2921   } else {
2922     return 6 * NativeInstruction::instruction_size;
2923   }
2924 }
2925 
2926 // Indicate if the safepoint node needs the polling page as an input
2927 
2928 // the shared code plants the oop data at the start of the generated
2929 // code for the safepoint node and that needs ot be at the load
2930 // instruction itself. so we cannot plant a mov of the safepoint poll
2931 // address followed by a load. setting this to true means the mov is
2932 // scheduled as a prior instruction. that's better for scheduling
2933 // anyway.
2934 
2935 bool SafePointNode::needs_polling_address_input()
2936 {
2937   return true;
2938 }
2939 
2940 //=============================================================================
2941 
2942 #ifndef PRODUCT
2943 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2944   st->print("BREAKPOINT");
2945 }
2946 #endif
2947 
2948 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2949   MacroAssembler _masm(&cbuf);
2950   __ brk(0);
2951 }
2952 
2953 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2954   return MachNode::size(ra_);
2955 }
2956 
2957 //=============================================================================
2958 
2959 #ifndef PRODUCT
2960   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2961     st->print("nop \t# %d bytes pad for loops and calls", _count);
2962   }
2963 #endif
2964 
2965   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2966     MacroAssembler _masm(&cbuf);
2967     for (int i = 0; i < _count; i++) {
2968       __ nop();
2969     }
2970   }
2971 
2972   uint MachNopNode::size(PhaseRegAlloc*) const {
2973     return _count * NativeInstruction::instruction_size;
2974   }
2975 
2976 //=============================================================================
2977 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
2978 
2979 int Compile::ConstantTable::calculate_table_base_offset() const {
2980   return 0;  // absolute addressing, no offset
2981 }
2982 
2983 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
2984 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
2985   ShouldNotReachHere();
2986 }
2987 
2988 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
2989   // Empty encoding
2990 }
2991 
2992 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
2993   return 0;
2994 }
2995 
2996 #ifndef PRODUCT
2997 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
2998   st->print("-- \t// MachConstantBaseNode (empty encoding)");
2999 }
3000 #endif
3001 
3002 #ifndef PRODUCT
3003 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3004   Compile* C = ra_->C;
3005 
3006   int framesize = C->frame_slots() << LogBytesPerInt;
3007 
3008   if (C->need_stack_bang(framesize))
3009     st->print("# stack bang size=%d\n\t", framesize);
3010 
3011   if (framesize < ((1 << 9) + 2 * wordSize)) {
3012     st->print("sub  sp, sp, #%d\n\t", framesize);
3013     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
3014     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
3015   } else {
3016     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
3017     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
3018     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3019     st->print("sub  sp, sp, rscratch1");
3020   }
3021 }
3022 #endif
3023 
3024 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3025   Compile* C = ra_->C;
3026   MacroAssembler _masm(&cbuf);
3027 
3028   // n.b. frame size includes space for return pc and rfp
3029   const long framesize = C->frame_size_in_bytes();
3030   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
3031 
3032   // insert a nop at the start of the prolog so we can patch in a
3033   // branch if we need to invalidate the method later
3034   __ nop();
3035 
3036   int bangsize = C->bang_size_in_bytes();
3037   if (C->need_stack_bang(bangsize) && UseStackBanging)
3038     __ generate_stack_overflow_check(bangsize);
3039 
3040   __ build_frame(framesize);
3041 
3042   if (NotifySimulator) {
3043     __ notify(Assembler::method_entry);
3044   }
3045 
3046   if (VerifyStackAtCalls) {
3047     Unimplemented();
3048   }
3049 
3050   C->set_frame_complete(cbuf.insts_size());
3051 
3052   if (C->has_mach_constant_base_node()) {
3053     // NOTE: We set the table base offset here because users might be
3054     // emitted before MachConstantBaseNode.
3055     Compile::ConstantTable& constant_table = C->constant_table();
3056     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
3057   }
3058 }
3059 
3060 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
3061 {
3062   return MachNode::size(ra_); // too many variables; just compute it
3063                               // the hard way
3064 }
3065 
3066 int MachPrologNode::reloc() const
3067 {
3068   return 0;
3069 }
3070 
3071 //=============================================================================
3072 
3073 #ifndef PRODUCT
3074 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3075   Compile* C = ra_->C;
3076   int framesize = C->frame_slots() << LogBytesPerInt;
3077 
3078   st->print("# pop frame %d\n\t",framesize);
3079 
3080   if (framesize == 0) {
3081     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3082   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
3083     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
3084     st->print("add  sp, sp, #%d\n\t", framesize);
3085   } else {
3086     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3087     st->print("add  sp, sp, rscratch1\n\t");
3088     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3089   }
3090 
3091   if (do_polling() && C->is_method_compilation()) {
3092     st->print("# touch polling page\n\t");
3093     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
3094     st->print("ldr zr, [rscratch1]");
3095   }
3096 }
3097 #endif
3098 
3099 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3100   Compile* C = ra_->C;
3101   MacroAssembler _masm(&cbuf);
3102   int framesize = C->frame_slots() << LogBytesPerInt;
3103 
3104   __ remove_frame(framesize);
3105 
3106   if (NotifySimulator) {
3107     __ notify(Assembler::method_reentry);
3108   }
3109 
3110   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
3111     __ reserved_stack_check();
3112   }
3113 
3114   if (do_polling() && C->is_method_compilation()) {
3115     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3116   }
3117 }
3118 
3119 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3120   // Variable size. Determine dynamically.
3121   return MachNode::size(ra_);
3122 }
3123 
3124 int MachEpilogNode::reloc() const {
3125   // Return number of relocatable values contained in this instruction.
3126   return 1; // 1 for polling page.
3127 }
3128 
3129 const Pipeline * MachEpilogNode::pipeline() const {
3130   return MachNode::pipeline_class();
3131 }
3132 
3133 // This method seems to be obsolete. It is declared in machnode.hpp
3134 // and defined in all *.ad files, but it is never called. Should we
3135 // get rid of it?
3136 int MachEpilogNode::safepoint_offset() const {
3137   assert(do_polling(), "no return for this epilog node");
3138   return 4;
3139 }
3140 
3141 //=============================================================================
3142 
3143 // Figure out which register class each belongs in: rc_int, rc_float or
3144 // rc_stack.
3145 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3146 
3147 static enum RC rc_class(OptoReg::Name reg) {
3148 
3149   if (reg == OptoReg::Bad) {
3150     return rc_bad;
3151   }
3152 
3153   // we have 30 int registers * 2 halves
3154   // (rscratch1 and rscratch2 are omitted)
3155 
3156   if (reg < 60) {
3157     return rc_int;
3158   }
3159 
3160   // we have 32 float register * 2 halves
3161   if (reg < 60 + 128) {
3162     return rc_float;
3163   }
3164 
3165   // Between float regs & stack is the flags regs.
3166   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3167 
3168   return rc_stack;
3169 }
3170 
3171 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3172   Compile* C = ra_->C;
3173 
3174   // Get registers to move.
3175   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3176   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3177   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3178   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3179 
3180   enum RC src_hi_rc = rc_class(src_hi);
3181   enum RC src_lo_rc = rc_class(src_lo);
3182   enum RC dst_hi_rc = rc_class(dst_hi);
3183   enum RC dst_lo_rc = rc_class(dst_lo);
3184 
3185   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3186 
3187   if (src_hi != OptoReg::Bad) {
3188     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3189            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3190            "expected aligned-adjacent pairs");
3191   }
3192 
3193   if (src_lo == dst_lo && src_hi == dst_hi) {
3194     return 0;            // Self copy, no move.
3195   }
3196 
3197   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3198               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3199   int src_offset = ra_->reg2offset(src_lo);
3200   int dst_offset = ra_->reg2offset(dst_lo);
3201 
3202   if (bottom_type()->isa_vect() != NULL) {
3203     uint ireg = ideal_reg();
3204     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3205     if (cbuf) {
3206       MacroAssembler _masm(cbuf);
3207       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3208       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3209         // stack->stack
3210         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
3211         if (ireg == Op_VecD) {
3212           __ unspill(rscratch1, true, src_offset);
3213           __ spill(rscratch1, true, dst_offset);
3214         } else {
3215           __ spill_copy128(src_offset, dst_offset);
3216         }
3217       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3218         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3219                ireg == Op_VecD ? __ T8B : __ T16B,
3220                as_FloatRegister(Matcher::_regEncode[src_lo]));
3221       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3222         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3223                        ireg == Op_VecD ? __ D : __ Q,
3224                        ra_->reg2offset(dst_lo));
3225       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3226         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3227                        ireg == Op_VecD ? __ D : __ Q,
3228                        ra_->reg2offset(src_lo));
3229       } else {
3230         ShouldNotReachHere();
3231       }
3232     }
3233   } else if (cbuf) {
3234     MacroAssembler _masm(cbuf);
3235     switch (src_lo_rc) {
3236     case rc_int:
3237       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3238         if (is64) {
3239             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3240                    as_Register(Matcher::_regEncode[src_lo]));
3241         } else {
3242             MacroAssembler _masm(cbuf);
3243             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3244                     as_Register(Matcher::_regEncode[src_lo]));
3245         }
3246       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3247         if (is64) {
3248             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3249                      as_Register(Matcher::_regEncode[src_lo]));
3250         } else {
3251             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3252                      as_Register(Matcher::_regEncode[src_lo]));
3253         }
3254       } else {                    // gpr --> stack spill
3255         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3256         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3257       }
3258       break;
3259     case rc_float:
3260       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3261         if (is64) {
3262             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3263                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3264         } else {
3265             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3266                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3267         }
3268       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3269           if (cbuf) {
3270             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3271                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3272         } else {
3273             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3274                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3275         }
3276       } else {                    // fpr --> stack spill
3277         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3278         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3279                  is64 ? __ D : __ S, dst_offset);
3280       }
3281       break;
3282     case rc_stack:
3283       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3284         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3285       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3286         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3287                    is64 ? __ D : __ S, src_offset);
3288       } else {                    // stack --> stack copy
3289         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3290         __ unspill(rscratch1, is64, src_offset);
3291         __ spill(rscratch1, is64, dst_offset);
3292       }
3293       break;
3294     default:
3295       assert(false, "bad rc_class for spill");
3296       ShouldNotReachHere();
3297     }
3298   }
3299 
3300   if (st) {
3301     st->print("spill ");
3302     if (src_lo_rc == rc_stack) {
3303       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3304     } else {
3305       st->print("%s -> ", Matcher::regName[src_lo]);
3306     }
3307     if (dst_lo_rc == rc_stack) {
3308       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3309     } else {
3310       st->print("%s", Matcher::regName[dst_lo]);
3311     }
3312     if (bottom_type()->isa_vect() != NULL) {
3313       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3314     } else {
3315       st->print("\t# spill size = %d", is64 ? 64:32);
3316     }
3317   }
3318 
3319   return 0;
3320 
3321 }
3322 
3323 #ifndef PRODUCT
3324 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3325   if (!ra_)
3326     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3327   else
3328     implementation(NULL, ra_, false, st);
3329 }
3330 #endif
3331 
3332 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3333   implementation(&cbuf, ra_, false, NULL);
3334 }
3335 
3336 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3337   return MachNode::size(ra_);
3338 }
3339 
3340 //=============================================================================
3341 
3342 #ifndef PRODUCT
3343 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3344   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3345   int reg = ra_->get_reg_first(this);
3346   st->print("add %s, rsp, #%d]\t# box lock",
3347             Matcher::regName[reg], offset);
3348 }
3349 #endif
3350 
3351 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3352   MacroAssembler _masm(&cbuf);
3353 
3354   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3355   int reg    = ra_->get_encode(this);
3356 
3357   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3358     __ add(as_Register(reg), sp, offset);
3359   } else {
3360     ShouldNotReachHere();
3361   }
3362 }
3363 
3364 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3365   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3366   return 4;
3367 }
3368 
3369 //=============================================================================
3370 
3371 #ifndef PRODUCT
3372 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3373 {
3374   st->print_cr("# MachUEPNode");
3375   if (UseCompressedClassPointers) {
3376     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3377     if (Universe::narrow_klass_shift() != 0) {
3378       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3379     }
3380   } else {
3381    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3382   }
3383   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3384   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3385 }
3386 #endif
3387 
3388 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3389 {
3390   // This is the unverified entry point.
3391   MacroAssembler _masm(&cbuf);
3392 
3393   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3394   Label skip;
3395   // TODO
3396   // can we avoid this skip and still use a reloc?
3397   __ br(Assembler::EQ, skip);
3398   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3399   __ bind(skip);
3400 }
3401 
3402 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3403 {
3404   return MachNode::size(ra_);
3405 }
3406 
3407 // REQUIRED EMIT CODE
3408 
3409 //=============================================================================
3410 
3411 // Emit exception handler code.
3412 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3413 {
3414   // mov rscratch1 #exception_blob_entry_point
3415   // br rscratch1
3416   // Note that the code buffer's insts_mark is always relative to insts.
3417   // That's why we must use the macroassembler to generate a handler.
3418   MacroAssembler _masm(&cbuf);
3419   address base = __ start_a_stub(size_exception_handler());
3420   if (base == NULL) {
3421     ciEnv::current()->record_failure("CodeCache is full");
3422     return 0;  // CodeBuffer::expand failed
3423   }
3424   int offset = __ offset();
3425   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3426   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3427   __ end_a_stub();
3428   return offset;
3429 }
3430 
3431 // Emit deopt handler code.
3432 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3433 {
3434   // Note that the code buffer's insts_mark is always relative to insts.
3435   // That's why we must use the macroassembler to generate a handler.
3436   MacroAssembler _masm(&cbuf);
3437   address base = __ start_a_stub(size_deopt_handler());
3438   if (base == NULL) {
3439     ciEnv::current()->record_failure("CodeCache is full");
3440     return 0;  // CodeBuffer::expand failed
3441   }
3442   int offset = __ offset();
3443 
3444   __ adr(lr, __ pc());
3445   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3446 
3447   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3448   __ end_a_stub();
3449   return offset;
3450 }
3451 
3452 // REQUIRED MATCHER CODE
3453 
3454 //=============================================================================
3455 
3456 const bool Matcher::match_rule_supported(int opcode) {
3457 
3458   switch (opcode) {
3459   default:
3460     break;
3461   }
3462 
3463   if (!has_match_rule(opcode)) {
3464     return false;
3465   }
3466 
3467   return true;  // Per default match rules are supported.
3468 }
3469 
3470 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3471 
3472   // TODO
3473   // identify extra cases that we might want to provide match rules for
3474   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3475   bool ret_value = match_rule_supported(opcode);
3476   // Add rules here.
3477 
3478   return ret_value;  // Per default match rules are supported.
3479 }
3480 
3481 const bool Matcher::has_predicated_vectors(void) {
3482   return false;
3483 }
3484 
3485 const int Matcher::float_pressure(int default_pressure_threshold) {
3486   return default_pressure_threshold;
3487 }
3488 
3489 int Matcher::regnum_to_fpu_offset(int regnum)
3490 {
3491   Unimplemented();
3492   return 0;
3493 }
3494 
3495 // Is this branch offset short enough that a short branch can be used?
3496 //
3497 // NOTE: If the platform does not provide any short branch variants, then
3498 //       this method should return false for offset 0.
3499 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3500   // The passed offset is relative to address of the branch.
3501 
3502   return (-32768 <= offset && offset < 32768);
3503 }
3504 
3505 const bool Matcher::isSimpleConstant64(jlong value) {
3506   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3507   // Probably always true, even if a temp register is required.
3508   return true;
3509 }
3510 
3511 // true just means we have fast l2f conversion
3512 const bool Matcher::convL2FSupported(void) {
3513   return true;
3514 }
3515 
3516 // Vector width in bytes.
3517 const int Matcher::vector_width_in_bytes(BasicType bt) {
3518   int size = MIN2(16,(int)MaxVectorSize);
3519   // Minimum 2 values in vector
3520   if (size < 2*type2aelembytes(bt)) size = 0;
3521   // But never < 4
3522   if (size < 4) size = 0;
3523   return size;
3524 }
3525 
3526 // Limits on vector size (number of elements) loaded into vector.
3527 const int Matcher::max_vector_size(const BasicType bt) {
3528   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3529 }
3530 const int Matcher::min_vector_size(const BasicType bt) {
3531 //  For the moment limit the vector size to 8 bytes
3532     int size = 8 / type2aelembytes(bt);
3533     if (size < 2) size = 2;
3534     return size;
3535 }
3536 
3537 // Vector ideal reg.
3538 const uint Matcher::vector_ideal_reg(int len) {
3539   switch(len) {
3540     case  8: return Op_VecD;
3541     case 16: return Op_VecX;
3542   }
3543   ShouldNotReachHere();
3544   return 0;
3545 }
3546 
3547 const uint Matcher::vector_shift_count_ideal_reg(int size) {
3548   return Op_VecX;
3549 }
3550 
3551 // AES support not yet implemented
3552 const bool Matcher::pass_original_key_for_aes() {
3553   return false;
3554 }
3555 
3556 // x86 supports misaligned vectors store/load.
3557 const bool Matcher::misaligned_vectors_ok() {
3558   return !AlignVector; // can be changed by flag
3559 }
3560 
3561 // false => size gets scaled to BytesPerLong, ok.
3562 const bool Matcher::init_array_count_is_in_bytes = false;
3563 
3564 // Use conditional move (CMOVL)
3565 const int Matcher::long_cmove_cost() {
3566   // long cmoves are no more expensive than int cmoves
3567   return 0;
3568 }
3569 
3570 const int Matcher::float_cmove_cost() {
3571   // float cmoves are no more expensive than int cmoves
3572   return 0;
3573 }
3574 
3575 // Does the CPU require late expand (see block.cpp for description of late expand)?
3576 const bool Matcher::require_postalloc_expand = false;
3577 
3578 // Do we need to mask the count passed to shift instructions or does
3579 // the cpu only look at the lower 5/6 bits anyway?
3580 const bool Matcher::need_masked_shift_count = false;
3581 
3582 // This affects two different things:
3583 //  - how Decode nodes are matched
3584 //  - how ImplicitNullCheck opportunities are recognized
3585 // If true, the matcher will try to remove all Decodes and match them
3586 // (as operands) into nodes. NullChecks are not prepared to deal with
3587 // Decodes by final_graph_reshaping().
3588 // If false, final_graph_reshaping() forces the decode behind the Cmp
3589 // for a NullCheck. The matcher matches the Decode node into a register.
3590 // Implicit_null_check optimization moves the Decode along with the
3591 // memory operation back up before the NullCheck.
3592 bool Matcher::narrow_oop_use_complex_address() {
3593   return Universe::narrow_oop_shift() == 0;
3594 }
3595 
3596 bool Matcher::narrow_klass_use_complex_address() {
3597 // TODO
3598 // decide whether we need to set this to true
3599   return false;
3600 }
3601 
3602 bool Matcher::const_oop_prefer_decode() {
3603   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
3604   return Universe::narrow_oop_base() == NULL;
3605 }
3606 
3607 bool Matcher::const_klass_prefer_decode() {
3608   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
3609   return Universe::narrow_klass_base() == NULL;
3610 }
3611 
3612 // Is it better to copy float constants, or load them directly from
3613 // memory?  Intel can load a float constant from a direct address,
3614 // requiring no extra registers.  Most RISCs will have to materialize
3615 // an address into a register first, so they would do better to copy
3616 // the constant from stack.
3617 const bool Matcher::rematerialize_float_constants = false;
3618 
3619 // If CPU can load and store mis-aligned doubles directly then no
3620 // fixup is needed.  Else we split the double into 2 integer pieces
3621 // and move it piece-by-piece.  Only happens when passing doubles into
3622 // C code as the Java calling convention forces doubles to be aligned.
3623 const bool Matcher::misaligned_doubles_ok = true;
3624 
3625 // No-op on amd64
3626 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3627   Unimplemented();
3628 }
3629 
3630 // Advertise here if the CPU requires explicit rounding operations to
3631 // implement the UseStrictFP mode.
3632 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3633 
3634 // Are floats converted to double when stored to stack during
3635 // deoptimization?
3636 bool Matcher::float_in_double() { return false; }
3637 
3638 // Do ints take an entire long register or just half?
3639 // The relevant question is how the int is callee-saved:
3640 // the whole long is written but de-opt'ing will have to extract
3641 // the relevant 32 bits.
3642 const bool Matcher::int_in_long = true;
3643 
3644 // Return whether or not this register is ever used as an argument.
3645 // This function is used on startup to build the trampoline stubs in
3646 // generateOptoStub.  Registers not mentioned will be killed by the VM
3647 // call in the trampoline, and arguments in those registers not be
3648 // available to the callee.
3649 bool Matcher::can_be_java_arg(int reg)
3650 {
3651   return
3652     reg ==  R0_num || reg == R0_H_num ||
3653     reg ==  R1_num || reg == R1_H_num ||
3654     reg ==  R2_num || reg == R2_H_num ||
3655     reg ==  R3_num || reg == R3_H_num ||
3656     reg ==  R4_num || reg == R4_H_num ||
3657     reg ==  R5_num || reg == R5_H_num ||
3658     reg ==  R6_num || reg == R6_H_num ||
3659     reg ==  R7_num || reg == R7_H_num ||
3660     reg ==  V0_num || reg == V0_H_num ||
3661     reg ==  V1_num || reg == V1_H_num ||
3662     reg ==  V2_num || reg == V2_H_num ||
3663     reg ==  V3_num || reg == V3_H_num ||
3664     reg ==  V4_num || reg == V4_H_num ||
3665     reg ==  V5_num || reg == V5_H_num ||
3666     reg ==  V6_num || reg == V6_H_num ||
3667     reg ==  V7_num || reg == V7_H_num;
3668 }
3669 
3670 bool Matcher::is_spillable_arg(int reg)
3671 {
3672   return can_be_java_arg(reg);
3673 }
3674 
3675 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3676   return false;
3677 }
3678 
3679 RegMask Matcher::divI_proj_mask() {
3680   ShouldNotReachHere();
3681   return RegMask();
3682 }
3683 
3684 // Register for MODI projection of divmodI.
3685 RegMask Matcher::modI_proj_mask() {
3686   ShouldNotReachHere();
3687   return RegMask();
3688 }
3689 
3690 // Register for DIVL projection of divmodL.
3691 RegMask Matcher::divL_proj_mask() {
3692   ShouldNotReachHere();
3693   return RegMask();
3694 }
3695 
3696 // Register for MODL projection of divmodL.
3697 RegMask Matcher::modL_proj_mask() {
3698   ShouldNotReachHere();
3699   return RegMask();
3700 }
3701 
3702 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3703   return FP_REG_mask();
3704 }
3705 
3706 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
3707   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3708     Node* u = addp->fast_out(i);
3709     if (u->is_Mem()) {
3710       int opsize = u->as_Mem()->memory_size();
3711       assert(opsize > 0, "unexpected memory operand size");
3712       if (u->as_Mem()->memory_size() != (1<<shift)) {
3713         return false;
3714       }
3715     }
3716   }
3717   return true;
3718 }
3719 
3720 const bool Matcher::convi2l_type_required = false;
3721 
3722 // Should the Matcher clone shifts on addressing modes, expecting them
3723 // to be subsumed into complex addressing expressions or compute them
3724 // into registers?
3725 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
3726   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
3727     return true;
3728   }
3729 
3730   Node *off = m->in(AddPNode::Offset);
3731   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
3732       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
3733       // Are there other uses besides address expressions?
3734       !is_visited(off)) {
3735     address_visited.set(off->_idx); // Flag as address_visited
3736     mstack.push(off->in(2), Visit);
3737     Node *conv = off->in(1);
3738     if (conv->Opcode() == Op_ConvI2L &&
3739         // Are there other uses besides address expressions?
3740         !is_visited(conv)) {
3741       address_visited.set(conv->_idx); // Flag as address_visited
3742       mstack.push(conv->in(1), Pre_Visit);
3743     } else {
3744       mstack.push(conv, Pre_Visit);
3745     }
3746     address_visited.test_set(m->_idx); // Flag as address_visited
3747     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3748     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3749     return true;
3750   } else if (off->Opcode() == Op_ConvI2L &&
3751              // Are there other uses besides address expressions?
3752              !is_visited(off)) {
3753     address_visited.test_set(m->_idx); // Flag as address_visited
3754     address_visited.set(off->_idx); // Flag as address_visited
3755     mstack.push(off->in(1), Pre_Visit);
3756     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3757     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3758     return true;
3759   }
3760   return false;
3761 }
3762 
3763 void Compile::reshape_address(AddPNode* addp) {
3764 }
3765 
3766 // helper for encoding java_to_runtime calls on sim
3767 //
3768 // this is needed to compute the extra arguments required when
3769 // planting a call to the simulator blrt instruction. the TypeFunc
3770 // can be queried to identify the counts for integral, and floating
3771 // arguments and the return type
3772 
3773 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3774 {
3775   int gps = 0;
3776   int fps = 0;
3777   const TypeTuple *domain = tf->domain();
3778   int max = domain->cnt();
3779   for (int i = TypeFunc::Parms; i < max; i++) {
3780     const Type *t = domain->field_at(i);
3781     switch(t->basic_type()) {
3782     case T_FLOAT:
3783     case T_DOUBLE:
3784       fps++;
3785     default:
3786       gps++;
3787     }
3788   }
3789   gpcnt = gps;
3790   fpcnt = fps;
3791   BasicType rt = tf->return_type();
3792   switch (rt) {
3793   case T_VOID:
3794     rtype = MacroAssembler::ret_type_void;
3795     break;
3796   default:
3797     rtype = MacroAssembler::ret_type_integral;
3798     break;
3799   case T_FLOAT:
3800     rtype = MacroAssembler::ret_type_float;
3801     break;
3802   case T_DOUBLE:
3803     rtype = MacroAssembler::ret_type_double;
3804     break;
3805   }
3806 }
3807 
3808 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
3809   MacroAssembler _masm(&cbuf);                                          \
3810   {                                                                     \
3811     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3812     guarantee(DISP == 0, "mode not permitted for volatile");            \
3813     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3814     __ INSN(REG, as_Register(BASE));                                    \
3815   }
3816 
3817 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3818 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3819 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3820                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3821 
3822   // Used for all non-volatile memory accesses.  The use of
3823   // $mem->opcode() to discover whether this pattern uses sign-extended
3824   // offsets is something of a kludge.
3825   static void loadStore(MacroAssembler masm, mem_insn insn,
3826                          Register reg, int opcode,
3827                          Register base, int index, int size, int disp)
3828   {
3829     Address::extend scale;
3830 
3831     // Hooboy, this is fugly.  We need a way to communicate to the
3832     // encoder that the index needs to be sign extended, so we have to
3833     // enumerate all the cases.
3834     switch (opcode) {
3835     case INDINDEXSCALEDI2L:
3836     case INDINDEXSCALEDI2LN:
3837     case INDINDEXI2L:
3838     case INDINDEXI2LN:
3839       scale = Address::sxtw(size);
3840       break;
3841     default:
3842       scale = Address::lsl(size);
3843     }
3844 
3845     if (index == -1) {
3846       (masm.*insn)(reg, Address(base, disp));
3847     } else {
3848       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3849       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3850     }
3851   }
3852 
3853   static void loadStore(MacroAssembler masm, mem_float_insn insn,
3854                          FloatRegister reg, int opcode,
3855                          Register base, int index, int size, int disp)
3856   {
3857     Address::extend scale;
3858 
3859     switch (opcode) {
3860     case INDINDEXSCALEDI2L:
3861     case INDINDEXSCALEDI2LN:
3862       scale = Address::sxtw(size);
3863       break;
3864     default:
3865       scale = Address::lsl(size);
3866     }
3867 
3868      if (index == -1) {
3869       (masm.*insn)(reg, Address(base, disp));
3870     } else {
3871       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3872       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3873     }
3874   }
3875 
3876   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
3877                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3878                          int opcode, Register base, int index, int size, int disp)
3879   {
3880     if (index == -1) {
3881       (masm.*insn)(reg, T, Address(base, disp));
3882     } else {
3883       assert(disp == 0, "unsupported address mode");
3884       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3885     }
3886   }
3887 
3888 %}
3889 
3890 
3891 
3892 //----------ENCODING BLOCK-----------------------------------------------------
3893 // This block specifies the encoding classes used by the compiler to
3894 // output byte streams.  Encoding classes are parameterized macros
3895 // used by Machine Instruction Nodes in order to generate the bit
3896 // encoding of the instruction.  Operands specify their base encoding
3897 // interface with the interface keyword.  There are currently
3898 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3899 // COND_INTER.  REG_INTER causes an operand to generate a function
3900 // which returns its register number when queried.  CONST_INTER causes
3901 // an operand to generate a function which returns the value of the
3902 // constant when queried.  MEMORY_INTER causes an operand to generate
3903 // four functions which return the Base Register, the Index Register,
3904 // the Scale Value, and the Offset Value of the operand when queried.
3905 // COND_INTER causes an operand to generate six functions which return
3906 // the encoding code (ie - encoding bits for the instruction)
3907 // associated with each basic boolean condition for a conditional
3908 // instruction.
3909 //
3910 // Instructions specify two basic values for encoding.  Again, a
3911 // function is available to check if the constant displacement is an
3912 // oop. They use the ins_encode keyword to specify their encoding
3913 // classes (which must be a sequence of enc_class names, and their
3914 // parameters, specified in the encoding block), and they use the
3915 // opcode keyword to specify, in order, their primary, secondary, and
3916 // tertiary opcode.  Only the opcode sections which a particular
3917 // instruction needs for encoding need to be specified.
3918 encode %{
3919   // Build emit functions for each basic byte or larger field in the
3920   // intel encoding scheme (opcode, rm, sib, immediate), and call them
3921   // from C++ code in the enc_class source block.  Emit functions will
3922   // live in the main source block for now.  In future, we can
3923   // generalize this by adding a syntax that specifies the sizes of
3924   // fields in an order, so that the adlc can build the emit functions
3925   // automagically
3926 
3927   // catch all for unimplemented encodings
3928   enc_class enc_unimplemented %{
3929     MacroAssembler _masm(&cbuf);
3930     __ unimplemented("C2 catch all");
3931   %}
3932 
3933   // BEGIN Non-volatile memory access
3934 
3935   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3936     Register dst_reg = as_Register($dst$$reg);
3937     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
3938                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3939   %}
3940 
3941   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3942     Register dst_reg = as_Register($dst$$reg);
3943     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
3944                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3945   %}
3946 
3947   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3948     Register dst_reg = as_Register($dst$$reg);
3949     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3950                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3951   %}
3952 
3953   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3954     Register dst_reg = as_Register($dst$$reg);
3955     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3956                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3957   %}
3958 
3959   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3960     Register dst_reg = as_Register($dst$$reg);
3961     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
3962                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3963   %}
3964 
3965   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
3966     Register dst_reg = as_Register($dst$$reg);
3967     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
3968                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3969   %}
3970 
3971   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
3972     Register dst_reg = as_Register($dst$$reg);
3973     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3974                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3975   %}
3976 
3977   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
3978     Register dst_reg = as_Register($dst$$reg);
3979     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3980                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3981   %}
3982 
3983   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
3984     Register dst_reg = as_Register($dst$$reg);
3985     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3986                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3987   %}
3988 
3989   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
3990     Register dst_reg = as_Register($dst$$reg);
3991     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3992                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3993   %}
3994 
3995   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
3996     Register dst_reg = as_Register($dst$$reg);
3997     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
3998                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3999   %}
4000 
4001   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
4002     Register dst_reg = as_Register($dst$$reg);
4003     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
4004                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4005   %}
4006 
4007   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
4008     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4009     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
4010                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4011   %}
4012 
4013   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
4014     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4015     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
4016                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4017   %}
4018 
4019   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
4020     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4021     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
4022        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4023   %}
4024 
4025   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
4026     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4027     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
4028        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4029   %}
4030 
4031   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
4032     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4033     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
4034        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4035   %}
4036 
4037   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
4038     Register src_reg = as_Register($src$$reg);
4039     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
4040                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4041   %}
4042 
4043   enc_class aarch64_enc_strb0(memory mem) %{
4044     MacroAssembler _masm(&cbuf);
4045     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
4046                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4047   %}
4048 
4049   enc_class aarch64_enc_strb0_ordered(memory mem) %{
4050     MacroAssembler _masm(&cbuf);
4051     __ membar(Assembler::StoreStore);
4052     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
4053                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4054   %}
4055 
4056   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
4057     Register src_reg = as_Register($src$$reg);
4058     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
4059                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4060   %}
4061 
4062   enc_class aarch64_enc_strh0(memory mem) %{
4063     MacroAssembler _masm(&cbuf);
4064     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
4065                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4066   %}
4067 
4068   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4069     Register src_reg = as_Register($src$$reg);
4070     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
4071                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4072   %}
4073 
4074   enc_class aarch64_enc_strw0(memory mem) %{
4075     MacroAssembler _masm(&cbuf);
4076     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
4077                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4078   %}
4079 
4080   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4081     Register src_reg = as_Register($src$$reg);
4082     // we sometimes get asked to store the stack pointer into the
4083     // current thread -- we cannot do that directly on AArch64
4084     if (src_reg == r31_sp) {
4085       MacroAssembler _masm(&cbuf);
4086       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4087       __ mov(rscratch2, sp);
4088       src_reg = rscratch2;
4089     }
4090     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
4091                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4092   %}
4093 
4094   enc_class aarch64_enc_str0(memory mem) %{
4095     MacroAssembler _masm(&cbuf);
4096     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
4097                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4098   %}
4099 
4100   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4101     FloatRegister src_reg = as_FloatRegister($src$$reg);
4102     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
4103                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4104   %}
4105 
4106   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4107     FloatRegister src_reg = as_FloatRegister($src$$reg);
4108     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
4109                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4110   %}
4111 
4112   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4113     FloatRegister src_reg = as_FloatRegister($src$$reg);
4114     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
4115        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4116   %}
4117 
4118   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4119     FloatRegister src_reg = as_FloatRegister($src$$reg);
4120     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
4121        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4122   %}
4123 
4124   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4125     FloatRegister src_reg = as_FloatRegister($src$$reg);
4126     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
4127        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4128   %}
4129 
4130   // END Non-volatile memory access
4131 
4132   // volatile loads and stores
4133 
4134   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4135     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4136                  rscratch1, stlrb);
4137   %}
4138 
4139   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4140     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4141                  rscratch1, stlrh);
4142   %}
4143 
4144   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4145     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4146                  rscratch1, stlrw);
4147   %}
4148 
4149 
4150   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4151     Register dst_reg = as_Register($dst$$reg);
4152     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4153              rscratch1, ldarb);
4154     __ sxtbw(dst_reg, dst_reg);
4155   %}
4156 
4157   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4158     Register dst_reg = as_Register($dst$$reg);
4159     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4160              rscratch1, ldarb);
4161     __ sxtb(dst_reg, dst_reg);
4162   %}
4163 
4164   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4165     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4166              rscratch1, ldarb);
4167   %}
4168 
4169   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4170     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4171              rscratch1, ldarb);
4172   %}
4173 
4174   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4175     Register dst_reg = as_Register($dst$$reg);
4176     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4177              rscratch1, ldarh);
4178     __ sxthw(dst_reg, dst_reg);
4179   %}
4180 
4181   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4182     Register dst_reg = as_Register($dst$$reg);
4183     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4184              rscratch1, ldarh);
4185     __ sxth(dst_reg, dst_reg);
4186   %}
4187 
4188   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4189     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4190              rscratch1, ldarh);
4191   %}
4192 
4193   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4194     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4195              rscratch1, ldarh);
4196   %}
4197 
4198   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4199     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4200              rscratch1, ldarw);
4201   %}
4202 
4203   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4204     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4205              rscratch1, ldarw);
4206   %}
4207 
4208   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4209     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4210              rscratch1, ldar);
4211   %}
4212 
4213   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4214     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4215              rscratch1, ldarw);
4216     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4217   %}
4218 
4219   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4220     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4221              rscratch1, ldar);
4222     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4223   %}
4224 
4225   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4226     Register src_reg = as_Register($src$$reg);
4227     // we sometimes get asked to store the stack pointer into the
4228     // current thread -- we cannot do that directly on AArch64
4229     if (src_reg == r31_sp) {
4230         MacroAssembler _masm(&cbuf);
4231       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4232       __ mov(rscratch2, sp);
4233       src_reg = rscratch2;
4234     }
4235     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4236                  rscratch1, stlr);
4237   %}
4238 
4239   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4240     {
4241       MacroAssembler _masm(&cbuf);
4242       FloatRegister src_reg = as_FloatRegister($src$$reg);
4243       __ fmovs(rscratch2, src_reg);
4244     }
4245     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4246                  rscratch1, stlrw);
4247   %}
4248 
4249   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4250     {
4251       MacroAssembler _masm(&cbuf);
4252       FloatRegister src_reg = as_FloatRegister($src$$reg);
4253       __ fmovd(rscratch2, src_reg);
4254     }
4255     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4256                  rscratch1, stlr);
4257   %}
4258 
4259   // synchronized read/update encodings
4260 
4261   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4262     MacroAssembler _masm(&cbuf);
4263     Register dst_reg = as_Register($dst$$reg);
4264     Register base = as_Register($mem$$base);
4265     int index = $mem$$index;
4266     int scale = $mem$$scale;
4267     int disp = $mem$$disp;
4268     if (index == -1) {
4269        if (disp != 0) {
4270         __ lea(rscratch1, Address(base, disp));
4271         __ ldaxr(dst_reg, rscratch1);
4272       } else {
4273         // TODO
4274         // should we ever get anything other than this case?
4275         __ ldaxr(dst_reg, base);
4276       }
4277     } else {
4278       Register index_reg = as_Register(index);
4279       if (disp == 0) {
4280         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4281         __ ldaxr(dst_reg, rscratch1);
4282       } else {
4283         __ lea(rscratch1, Address(base, disp));
4284         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4285         __ ldaxr(dst_reg, rscratch1);
4286       }
4287     }
4288   %}
4289 
4290   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4291     MacroAssembler _masm(&cbuf);
4292     Register src_reg = as_Register($src$$reg);
4293     Register base = as_Register($mem$$base);
4294     int index = $mem$$index;
4295     int scale = $mem$$scale;
4296     int disp = $mem$$disp;
4297     if (index == -1) {
4298        if (disp != 0) {
4299         __ lea(rscratch2, Address(base, disp));
4300         __ stlxr(rscratch1, src_reg, rscratch2);
4301       } else {
4302         // TODO
4303         // should we ever get anything other than this case?
4304         __ stlxr(rscratch1, src_reg, base);
4305       }
4306     } else {
4307       Register index_reg = as_Register(index);
4308       if (disp == 0) {
4309         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4310         __ stlxr(rscratch1, src_reg, rscratch2);
4311       } else {
4312         __ lea(rscratch2, Address(base, disp));
4313         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4314         __ stlxr(rscratch1, src_reg, rscratch2);
4315       }
4316     }
4317     __ cmpw(rscratch1, zr);
4318   %}
4319 
4320   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4321     MacroAssembler _masm(&cbuf);
4322     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4323     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4324                Assembler::xword, /*acquire*/ false, /*release*/ true,
4325                /*weak*/ false, noreg);
4326   %}
4327 
4328   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4329     MacroAssembler _masm(&cbuf);
4330     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4331     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4332                Assembler::word, /*acquire*/ false, /*release*/ true,
4333                /*weak*/ false, noreg);
4334   %}
4335 
4336   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4337     MacroAssembler _masm(&cbuf);
4338     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4339     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4340                Assembler::halfword, /*acquire*/ false, /*release*/ true,
4341                /*weak*/ false, noreg);
4342   %}
4343 
4344   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4345     MacroAssembler _masm(&cbuf);
4346     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4347     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4348                Assembler::byte, /*acquire*/ false, /*release*/ true,
4349                /*weak*/ false, noreg);
4350   %}
4351 
4352 
4353   // The only difference between aarch64_enc_cmpxchg and
4354   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4355   // CompareAndSwap sequence to serve as a barrier on acquiring a
4356   // lock.
4357   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4358     MacroAssembler _masm(&cbuf);
4359     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4360     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4361                Assembler::xword, /*acquire*/ true, /*release*/ true,
4362                /*weak*/ false, noreg);
4363   %}
4364 
4365   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4366     MacroAssembler _masm(&cbuf);
4367     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4368     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4369                Assembler::word, /*acquire*/ true, /*release*/ true,
4370                /*weak*/ false, noreg);
4371   %}
4372 
4373 
4374   // auxiliary used for CompareAndSwapX to set result register
4375   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4376     MacroAssembler _masm(&cbuf);
4377     Register res_reg = as_Register($res$$reg);
4378     __ cset(res_reg, Assembler::EQ);
4379   %}
4380 
4381   // prefetch encodings
4382 
4383   enc_class aarch64_enc_prefetchw(memory mem) %{
4384     MacroAssembler _masm(&cbuf);
4385     Register base = as_Register($mem$$base);
4386     int index = $mem$$index;
4387     int scale = $mem$$scale;
4388     int disp = $mem$$disp;
4389     if (index == -1) {
4390       __ prfm(Address(base, disp), PSTL1KEEP);
4391     } else {
4392       Register index_reg = as_Register(index);
4393       if (disp == 0) {
4394         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4395       } else {
4396         __ lea(rscratch1, Address(base, disp));
4397         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4398       }
4399     }
4400   %}
4401 
4402   /// mov envcodings
4403 
4404   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4405     MacroAssembler _masm(&cbuf);
4406     u_int32_t con = (u_int32_t)$src$$constant;
4407     Register dst_reg = as_Register($dst$$reg);
4408     if (con == 0) {
4409       __ movw(dst_reg, zr);
4410     } else {
4411       __ movw(dst_reg, con);
4412     }
4413   %}
4414 
4415   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4416     MacroAssembler _masm(&cbuf);
4417     Register dst_reg = as_Register($dst$$reg);
4418     u_int64_t con = (u_int64_t)$src$$constant;
4419     if (con == 0) {
4420       __ mov(dst_reg, zr);
4421     } else {
4422       __ mov(dst_reg, con);
4423     }
4424   %}
4425 
4426   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4427     MacroAssembler _masm(&cbuf);
4428     Register dst_reg = as_Register($dst$$reg);
4429     address con = (address)$src$$constant;
4430     if (con == NULL || con == (address)1) {
4431       ShouldNotReachHere();
4432     } else {
4433       relocInfo::relocType rtype = $src->constant_reloc();
4434       if (rtype == relocInfo::oop_type) {
4435         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4436       } else if (rtype == relocInfo::metadata_type) {
4437         __ mov_metadata(dst_reg, (Metadata*)con);
4438       } else {
4439         assert(rtype == relocInfo::none, "unexpected reloc type");
4440         if (con < (address)(uintptr_t)os::vm_page_size()) {
4441           __ mov(dst_reg, con);
4442         } else {
4443           unsigned long offset;
4444           __ adrp(dst_reg, con, offset);
4445           __ add(dst_reg, dst_reg, offset);
4446         }
4447       }
4448     }
4449   %}
4450 
4451   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4452     MacroAssembler _masm(&cbuf);
4453     Register dst_reg = as_Register($dst$$reg);
4454     __ mov(dst_reg, zr);
4455   %}
4456 
4457   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4458     MacroAssembler _masm(&cbuf);
4459     Register dst_reg = as_Register($dst$$reg);
4460     __ mov(dst_reg, (u_int64_t)1);
4461   %}
4462 
4463   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4464     MacroAssembler _masm(&cbuf);
4465     address page = (address)$src$$constant;
4466     Register dst_reg = as_Register($dst$$reg);
4467     unsigned long off;
4468     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4469     assert(off == 0, "assumed offset == 0");
4470   %}
4471 
4472   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4473     MacroAssembler _masm(&cbuf);
4474     __ load_byte_map_base($dst$$Register);
4475   %}
4476 
4477   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4478     MacroAssembler _masm(&cbuf);
4479     Register dst_reg = as_Register($dst$$reg);
4480     address con = (address)$src$$constant;
4481     if (con == NULL) {
4482       ShouldNotReachHere();
4483     } else {
4484       relocInfo::relocType rtype = $src->constant_reloc();
4485       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4486       __ set_narrow_oop(dst_reg, (jobject)con);
4487     }
4488   %}
4489 
4490   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4491     MacroAssembler _masm(&cbuf);
4492     Register dst_reg = as_Register($dst$$reg);
4493     __ mov(dst_reg, zr);
4494   %}
4495 
4496   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4497     MacroAssembler _masm(&cbuf);
4498     Register dst_reg = as_Register($dst$$reg);
4499     address con = (address)$src$$constant;
4500     if (con == NULL) {
4501       ShouldNotReachHere();
4502     } else {
4503       relocInfo::relocType rtype = $src->constant_reloc();
4504       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4505       __ set_narrow_klass(dst_reg, (Klass *)con);
4506     }
4507   %}
4508 
4509   // arithmetic encodings
4510 
4511   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4512     MacroAssembler _masm(&cbuf);
4513     Register dst_reg = as_Register($dst$$reg);
4514     Register src_reg = as_Register($src1$$reg);
4515     int32_t con = (int32_t)$src2$$constant;
4516     // add has primary == 0, subtract has primary == 1
4517     if ($primary) { con = -con; }
4518     if (con < 0) {
4519       __ subw(dst_reg, src_reg, -con);
4520     } else {
4521       __ addw(dst_reg, src_reg, con);
4522     }
4523   %}
4524 
4525   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4526     MacroAssembler _masm(&cbuf);
4527     Register dst_reg = as_Register($dst$$reg);
4528     Register src_reg = as_Register($src1$$reg);
4529     int32_t con = (int32_t)$src2$$constant;
4530     // add has primary == 0, subtract has primary == 1
4531     if ($primary) { con = -con; }
4532     if (con < 0) {
4533       __ sub(dst_reg, src_reg, -con);
4534     } else {
4535       __ add(dst_reg, src_reg, con);
4536     }
4537   %}
4538 
4539   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4540     MacroAssembler _masm(&cbuf);
4541    Register dst_reg = as_Register($dst$$reg);
4542    Register src1_reg = as_Register($src1$$reg);
4543    Register src2_reg = as_Register($src2$$reg);
4544     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4545   %}
4546 
4547   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4548     MacroAssembler _masm(&cbuf);
4549    Register dst_reg = as_Register($dst$$reg);
4550    Register src1_reg = as_Register($src1$$reg);
4551    Register src2_reg = as_Register($src2$$reg);
4552     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4553   %}
4554 
4555   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4556     MacroAssembler _masm(&cbuf);
4557    Register dst_reg = as_Register($dst$$reg);
4558    Register src1_reg = as_Register($src1$$reg);
4559    Register src2_reg = as_Register($src2$$reg);
4560     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4561   %}
4562 
4563   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4564     MacroAssembler _masm(&cbuf);
4565    Register dst_reg = as_Register($dst$$reg);
4566    Register src1_reg = as_Register($src1$$reg);
4567    Register src2_reg = as_Register($src2$$reg);
4568     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4569   %}
4570 
4571   // compare instruction encodings
4572 
4573   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4574     MacroAssembler _masm(&cbuf);
4575     Register reg1 = as_Register($src1$$reg);
4576     Register reg2 = as_Register($src2$$reg);
4577     __ cmpw(reg1, reg2);
4578   %}
4579 
4580   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4581     MacroAssembler _masm(&cbuf);
4582     Register reg = as_Register($src1$$reg);
4583     int32_t val = $src2$$constant;
4584     if (val >= 0) {
4585       __ subsw(zr, reg, val);
4586     } else {
4587       __ addsw(zr, reg, -val);
4588     }
4589   %}
4590 
4591   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4592     MacroAssembler _masm(&cbuf);
4593     Register reg1 = as_Register($src1$$reg);
4594     u_int32_t val = (u_int32_t)$src2$$constant;
4595     __ movw(rscratch1, val);
4596     __ cmpw(reg1, rscratch1);
4597   %}
4598 
4599   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4600     MacroAssembler _masm(&cbuf);
4601     Register reg1 = as_Register($src1$$reg);
4602     Register reg2 = as_Register($src2$$reg);
4603     __ cmp(reg1, reg2);
4604   %}
4605 
4606   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4607     MacroAssembler _masm(&cbuf);
4608     Register reg = as_Register($src1$$reg);
4609     int64_t val = $src2$$constant;
4610     if (val >= 0) {
4611       __ subs(zr, reg, val);
4612     } else if (val != -val) {
4613       __ adds(zr, reg, -val);
4614     } else {
4615     // aargh, Long.MIN_VALUE is a special case
4616       __ orr(rscratch1, zr, (u_int64_t)val);
4617       __ subs(zr, reg, rscratch1);
4618     }
4619   %}
4620 
4621   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4622     MacroAssembler _masm(&cbuf);
4623     Register reg1 = as_Register($src1$$reg);
4624     u_int64_t val = (u_int64_t)$src2$$constant;
4625     __ mov(rscratch1, val);
4626     __ cmp(reg1, rscratch1);
4627   %}
4628 
4629   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4630     MacroAssembler _masm(&cbuf);
4631     Register reg1 = as_Register($src1$$reg);
4632     Register reg2 = as_Register($src2$$reg);
4633     __ cmp(reg1, reg2);
4634   %}
4635 
4636   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4637     MacroAssembler _masm(&cbuf);
4638     Register reg1 = as_Register($src1$$reg);
4639     Register reg2 = as_Register($src2$$reg);
4640     __ cmpw(reg1, reg2);
4641   %}
4642 
4643   enc_class aarch64_enc_testp(iRegP src) %{
4644     MacroAssembler _masm(&cbuf);
4645     Register reg = as_Register($src$$reg);
4646     __ cmp(reg, zr);
4647   %}
4648 
4649   enc_class aarch64_enc_testn(iRegN src) %{
4650     MacroAssembler _masm(&cbuf);
4651     Register reg = as_Register($src$$reg);
4652     __ cmpw(reg, zr);
4653   %}
4654 
4655   enc_class aarch64_enc_b(label lbl) %{
4656     MacroAssembler _masm(&cbuf);
4657     Label *L = $lbl$$label;
4658     __ b(*L);
4659   %}
4660 
4661   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4662     MacroAssembler _masm(&cbuf);
4663     Label *L = $lbl$$label;
4664     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4665   %}
4666 
4667   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4668     MacroAssembler _masm(&cbuf);
4669     Label *L = $lbl$$label;
4670     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4671   %}
4672 
4673   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4674   %{
4675      Register sub_reg = as_Register($sub$$reg);
4676      Register super_reg = as_Register($super$$reg);
4677      Register temp_reg = as_Register($temp$$reg);
4678      Register result_reg = as_Register($result$$reg);
4679 
4680      Label miss;
4681      MacroAssembler _masm(&cbuf);
4682      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4683                                      NULL, &miss,
4684                                      /*set_cond_codes:*/ true);
4685      if ($primary) {
4686        __ mov(result_reg, zr);
4687      }
4688      __ bind(miss);
4689   %}
4690 
4691   enc_class aarch64_enc_java_static_call(method meth) %{
4692     MacroAssembler _masm(&cbuf);
4693 
4694     address addr = (address)$meth$$method;
4695     address call;
4696     if (!_method) {
4697       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4698       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4699     } else {
4700       int method_index = resolved_method_index(cbuf);
4701       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4702                                                   : static_call_Relocation::spec(method_index);
4703       call = __ trampoline_call(Address(addr, rspec), &cbuf);
4704 
4705       // Emit stub for static call
4706       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4707       if (stub == NULL) {
4708         ciEnv::current()->record_failure("CodeCache is full");
4709         return;
4710       }
4711     }
4712     if (call == NULL) {
4713       ciEnv::current()->record_failure("CodeCache is full");
4714       return;
4715     }
4716   %}
4717 
4718   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4719     MacroAssembler _masm(&cbuf);
4720     int method_index = resolved_method_index(cbuf);
4721     address call = __ ic_call((address)$meth$$method, method_index);
4722     if (call == NULL) {
4723       ciEnv::current()->record_failure("CodeCache is full");
4724       return;
4725     }
4726   %}
4727 
4728   enc_class aarch64_enc_call_epilog() %{
4729     MacroAssembler _masm(&cbuf);
4730     if (VerifyStackAtCalls) {
4731       // Check that stack depth is unchanged: find majik cookie on stack
4732       __ call_Unimplemented();
4733     }
4734   %}
4735 
4736   enc_class aarch64_enc_java_to_runtime(method meth) %{
4737     MacroAssembler _masm(&cbuf);
4738 
4739     // some calls to generated routines (arraycopy code) are scheduled
4740     // by C2 as runtime calls. if so we can call them using a br (they
4741     // will be in a reachable segment) otherwise we have to use a blrt
4742     // which loads the absolute address into a register.
4743     address entry = (address)$meth$$method;
4744     CodeBlob *cb = CodeCache::find_blob(entry);
4745     if (cb) {
4746       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4747       if (call == NULL) {
4748         ciEnv::current()->record_failure("CodeCache is full");
4749         return;
4750       }
4751     } else {
4752       int gpcnt;
4753       int fpcnt;
4754       int rtype;
4755       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4756       Label retaddr;
4757       __ adr(rscratch2, retaddr);
4758       __ lea(rscratch1, RuntimeAddress(entry));
4759       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
4760       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4761       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4762       __ bind(retaddr);
4763       __ add(sp, sp, 2 * wordSize);
4764     }
4765   %}
4766 
4767   enc_class aarch64_enc_rethrow() %{
4768     MacroAssembler _masm(&cbuf);
4769     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4770   %}
4771 
4772   enc_class aarch64_enc_ret() %{
4773     MacroAssembler _masm(&cbuf);
4774     __ ret(lr);
4775   %}
4776 
4777   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4778     MacroAssembler _masm(&cbuf);
4779     Register target_reg = as_Register($jump_target$$reg);
4780     __ br(target_reg);
4781   %}
4782 
4783   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4784     MacroAssembler _masm(&cbuf);
4785     Register target_reg = as_Register($jump_target$$reg);
4786     // exception oop should be in r0
4787     // ret addr has been popped into lr
4788     // callee expects it in r3
4789     __ mov(r3, lr);
4790     __ br(target_reg);
4791   %}
4792 
4793   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4794     MacroAssembler _masm(&cbuf);
4795     Register oop = as_Register($object$$reg);
4796     Register box = as_Register($box$$reg);
4797     Register disp_hdr = as_Register($tmp$$reg);
4798     Register tmp = as_Register($tmp2$$reg);
4799     Label cont;
4800     Label object_has_monitor;
4801     Label cas_failed;
4802 
4803     assert_different_registers(oop, box, tmp, disp_hdr);
4804 
4805     // Load markOop from object into displaced_header.
4806     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4807 
4808     // Always do locking in runtime.
4809     if (EmitSync & 0x01) {
4810       __ cmp(oop, zr);
4811       return;
4812     }
4813 
4814     if (UseBiasedLocking && !UseOptoBiasInlining) {
4815       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4816     }
4817 
4818     // Handle existing monitor
4819     if ((EmitSync & 0x02) == 0) {
4820       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4821     }
4822 
4823     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4824     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4825 
4826     // Load Compare Value application register.
4827 
4828     // Initialize the box. (Must happen before we update the object mark!)
4829     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4830 
4831     // Compare object markOop with mark and if equal exchange scratch1
4832     // with object markOop.
4833     if (UseLSE) {
4834       __ mov(tmp, disp_hdr);
4835       __ casal(Assembler::xword, tmp, box, oop);
4836       __ cmp(tmp, disp_hdr);
4837       __ br(Assembler::EQ, cont);
4838     } else {
4839       Label retry_load;
4840       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4841         __ prfm(Address(oop), PSTL1STRM);
4842       __ bind(retry_load);
4843       __ ldaxr(tmp, oop);
4844       __ cmp(tmp, disp_hdr);
4845       __ br(Assembler::NE, cas_failed);
4846       // use stlxr to ensure update is immediately visible
4847       __ stlxr(tmp, box, oop);
4848       __ cbzw(tmp, cont);
4849       __ b(retry_load);
4850     }
4851 
4852     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4853 
4854     // If the compare-and-exchange succeeded, then we found an unlocked
4855     // object, will have now locked it will continue at label cont
4856 
4857     __ bind(cas_failed);
4858     // We did not see an unlocked object so try the fast recursive case.
4859 
4860     // Check if the owner is self by comparing the value in the
4861     // markOop of object (disp_hdr) with the stack pointer.
4862     __ mov(rscratch1, sp);
4863     __ sub(disp_hdr, disp_hdr, rscratch1);
4864     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4865     // If condition is true we are cont and hence we can store 0 as the
4866     // displaced header in the box, which indicates that it is a recursive lock.
4867     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4868     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4869 
4870     // Handle existing monitor.
4871     if ((EmitSync & 0x02) == 0) {
4872       __ b(cont);
4873 
4874       __ bind(object_has_monitor);
4875       // The object's monitor m is unlocked iff m->owner == NULL,
4876       // otherwise m->owner may contain a thread or a stack address.
4877       //
4878       // Try to CAS m->owner from NULL to current thread.
4879       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4880       __ mov(disp_hdr, zr);
4881 
4882       if (UseLSE) {
4883         __ mov(rscratch1, disp_hdr);
4884         __ casal(Assembler::xword, rscratch1, rthread, tmp);
4885         __ cmp(rscratch1, disp_hdr);
4886       } else {
4887         Label retry_load, fail;
4888         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4889           __ prfm(Address(tmp), PSTL1STRM);
4890         __ bind(retry_load);
4891         __ ldaxr(rscratch1, tmp);
4892         __ cmp(disp_hdr, rscratch1);
4893         __ br(Assembler::NE, fail);
4894         // use stlxr to ensure update is immediately visible
4895         __ stlxr(rscratch1, rthread, tmp);
4896         __ cbnzw(rscratch1, retry_load);
4897         __ bind(fail);
4898       }
4899 
4900       // Store a non-null value into the box to avoid looking like a re-entrant
4901       // lock. The fast-path monitor unlock code checks for
4902       // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
4903       // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
4904       __ mov(tmp, (address)markOopDesc::unused_mark());
4905       __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4906     }
4907 
4908     __ bind(cont);
4909     // flag == EQ indicates success
4910     // flag == NE indicates failure
4911   %}
4912 
4913   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4914     MacroAssembler _masm(&cbuf);
4915     Register oop = as_Register($object$$reg);
4916     Register box = as_Register($box$$reg);
4917     Register disp_hdr = as_Register($tmp$$reg);
4918     Register tmp = as_Register($tmp2$$reg);
4919     Label cont;
4920     Label object_has_monitor;
4921 
4922     assert_different_registers(oop, box, tmp, disp_hdr);
4923 
4924     // Always do locking in runtime.
4925     if (EmitSync & 0x01) {
4926       __ cmp(oop, zr); // Oop can't be 0 here => always false.
4927       return;
4928     }
4929 
4930     if (UseBiasedLocking && !UseOptoBiasInlining) {
4931       __ biased_locking_exit(oop, tmp, cont);
4932     }
4933 
4934     // Find the lock address and load the displaced header from the stack.
4935     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4936 
4937     // If the displaced header is 0, we have a recursive unlock.
4938     __ cmp(disp_hdr, zr);
4939     __ br(Assembler::EQ, cont);
4940 
4941     // Handle existing monitor.
4942     if ((EmitSync & 0x02) == 0) {
4943       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
4944       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4945     }
4946 
4947     // Check if it is still a light weight lock, this is is true if we
4948     // see the stack address of the basicLock in the markOop of the
4949     // object.
4950 
4951     if (UseLSE) {
4952       __ mov(tmp, box);
4953       __ casl(Assembler::xword, tmp, disp_hdr, oop);
4954       __ cmp(tmp, box);
4955       __ b(cont);
4956     } else {
4957       Label retry_load;
4958       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4959         __ prfm(Address(oop), PSTL1STRM);
4960       __ bind(retry_load);
4961       __ ldxr(tmp, oop);
4962       __ cmp(box, tmp);
4963       __ br(Assembler::NE, cont);
4964       // use stlxr to ensure update is immediately visible
4965       __ stlxr(tmp, disp_hdr, oop);
4966       __ cbzw(tmp, cont);
4967       __ b(retry_load);
4968     }
4969 
4970     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4971 
4972     // Handle existing monitor.
4973     if ((EmitSync & 0x02) == 0) {
4974       __ bind(object_has_monitor);
4975       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
4976       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4977       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
4978       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
4979       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
4980       __ cmp(rscratch1, zr);
4981       __ br(Assembler::NE, cont);
4982 
4983       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
4984       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
4985       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
4986       __ cmp(rscratch1, zr);
4987       __ cbnz(rscratch1, cont);
4988       // need a release store here
4989       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4990       __ stlr(zr, tmp); // set unowned
4991     }
4992 
4993     __ bind(cont);
4994     // flag == EQ indicates success
4995     // flag == NE indicates failure
4996   %}
4997 
4998 %}
4999 
5000 //----------FRAME--------------------------------------------------------------
5001 // Definition of frame structure and management information.
5002 //
5003 //  S T A C K   L A Y O U T    Allocators stack-slot number
5004 //                             |   (to get allocators register number
5005 //  G  Owned by    |        |  v    add OptoReg::stack0())
5006 //  r   CALLER     |        |
5007 //  o     |        +--------+      pad to even-align allocators stack-slot
5008 //  w     V        |  pad0  |        numbers; owned by CALLER
5009 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
5010 //  h     ^        |   in   |  5
5011 //        |        |  args  |  4   Holes in incoming args owned by SELF
5012 //  |     |        |        |  3
5013 //  |     |        +--------+
5014 //  V     |        | old out|      Empty on Intel, window on Sparc
5015 //        |    old |preserve|      Must be even aligned.
5016 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
5017 //        |        |   in   |  3   area for Intel ret address
5018 //     Owned by    |preserve|      Empty on Sparc.
5019 //       SELF      +--------+
5020 //        |        |  pad2  |  2   pad to align old SP
5021 //        |        +--------+  1
5022 //        |        | locks  |  0
5023 //        |        +--------+----> OptoReg::stack0(), even aligned
5024 //        |        |  pad1  | 11   pad to align new SP
5025 //        |        +--------+
5026 //        |        |        | 10
5027 //        |        | spills |  9   spills
5028 //        V        |        |  8   (pad0 slot for callee)
5029 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5030 //        ^        |  out   |  7
5031 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5032 //     Owned by    +--------+
5033 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5034 //        |    new |preserve|      Must be even-aligned.
5035 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
5036 //        |        |        |
5037 //
5038 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5039 //         known from SELF's arguments and the Java calling convention.
5040 //         Region 6-7 is determined per call site.
5041 // Note 2: If the calling convention leaves holes in the incoming argument
5042 //         area, those holes are owned by SELF.  Holes in the outgoing area
5043 //         are owned by the CALLEE.  Holes should not be nessecary in the
5044 //         incoming area, as the Java calling convention is completely under
5045 //         the control of the AD file.  Doubles can be sorted and packed to
5046 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
5047 //         varargs C calling conventions.
5048 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5049 //         even aligned with pad0 as needed.
5050 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5051 //           (the latter is true on Intel but is it false on AArch64?)
5052 //         region 6-11 is even aligned; it may be padded out more so that
5053 //         the region from SP to FP meets the minimum stack alignment.
5054 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5055 //         alignment.  Region 11, pad1, may be dynamically extended so that
5056 //         SP meets the minimum alignment.
5057 
5058 frame %{
5059   // What direction does stack grow in (assumed to be same for C & Java)
5060   stack_direction(TOWARDS_LOW);
5061 
5062   // These three registers define part of the calling convention
5063   // between compiled code and the interpreter.
5064 
5065   // Inline Cache Register or methodOop for I2C.
5066   inline_cache_reg(R12);
5067 
5068   // Method Oop Register when calling interpreter.
5069   interpreter_method_oop_reg(R12);
5070 
5071   // Number of stack slots consumed by locking an object
5072   sync_stack_slots(2);
5073 
5074   // Compiled code's Frame Pointer
5075   frame_pointer(R31);
5076 
5077   // Interpreter stores its frame pointer in a register which is
5078   // stored to the stack by I2CAdaptors.
5079   // I2CAdaptors convert from interpreted java to compiled java.
5080   interpreter_frame_pointer(R29);
5081 
5082   // Stack alignment requirement
5083   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5084 
5085   // Number of stack slots between incoming argument block and the start of
5086   // a new frame.  The PROLOG must add this many slots to the stack.  The
5087   // EPILOG must remove this many slots. aarch64 needs two slots for
5088   // return address and fp.
5089   // TODO think this is correct but check
5090   in_preserve_stack_slots(4);
5091 
5092   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5093   // for calls to C.  Supports the var-args backing area for register parms.
5094   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5095 
5096   // The after-PROLOG location of the return address.  Location of
5097   // return address specifies a type (REG or STACK) and a number
5098   // representing the register number (i.e. - use a register name) or
5099   // stack slot.
5100   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5101   // Otherwise, it is above the locks and verification slot and alignment word
5102   // TODO this may well be correct but need to check why that - 2 is there
5103   // ppc port uses 0 but we definitely need to allow for fixed_slots
5104   // which folds in the space used for monitors
5105   return_addr(STACK - 2 +
5106               align_up((Compile::current()->in_preserve_stack_slots() +
5107                         Compile::current()->fixed_slots()),
5108                        stack_alignment_in_slots()));
5109 
5110   // Body of function which returns an integer array locating
5111   // arguments either in registers or in stack slots.  Passed an array
5112   // of ideal registers called "sig" and a "length" count.  Stack-slot
5113   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5114   // arguments for a CALLEE.  Incoming stack arguments are
5115   // automatically biased by the preserve_stack_slots field above.
5116 
5117   calling_convention
5118   %{
5119     // No difference between ingoing/outgoing just pass false
5120     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5121   %}
5122 
5123   c_calling_convention
5124   %{
5125     // This is obviously always outgoing
5126     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5127   %}
5128 
5129   // Location of compiled Java return values.  Same as C for now.
5130   return_value
5131   %{
5132     // TODO do we allow ideal_reg == Op_RegN???
5133     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5134            "only return normal values");
5135 
5136     static const int lo[Op_RegL + 1] = { // enum name
5137       0,                                 // Op_Node
5138       0,                                 // Op_Set
5139       R0_num,                            // Op_RegN
5140       R0_num,                            // Op_RegI
5141       R0_num,                            // Op_RegP
5142       V0_num,                            // Op_RegF
5143       V0_num,                            // Op_RegD
5144       R0_num                             // Op_RegL
5145     };
5146 
5147     static const int hi[Op_RegL + 1] = { // enum name
5148       0,                                 // Op_Node
5149       0,                                 // Op_Set
5150       OptoReg::Bad,                       // Op_RegN
5151       OptoReg::Bad,                      // Op_RegI
5152       R0_H_num,                          // Op_RegP
5153       OptoReg::Bad,                      // Op_RegF
5154       V0_H_num,                          // Op_RegD
5155       R0_H_num                           // Op_RegL
5156     };
5157 
5158     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5159   %}
5160 %}
5161 
5162 //----------ATTRIBUTES---------------------------------------------------------
5163 //----------Operand Attributes-------------------------------------------------
5164 op_attrib op_cost(1);        // Required cost attribute
5165 
5166 //----------Instruction Attributes---------------------------------------------
5167 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5168 ins_attrib ins_size(32);        // Required size attribute (in bits)
5169 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5170                                 // a non-matching short branch variant
5171                                 // of some long branch?
5172 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5173                                 // be a power of 2) specifies the
5174                                 // alignment that some part of the
5175                                 // instruction (not necessarily the
5176                                 // start) requires.  If > 1, a
5177                                 // compute_padding() function must be
5178                                 // provided for the instruction
5179 
5180 //----------OPERANDS-----------------------------------------------------------
5181 // Operand definitions must precede instruction definitions for correct parsing
5182 // in the ADLC because operands constitute user defined types which are used in
5183 // instruction definitions.
5184 
5185 //----------Simple Operands----------------------------------------------------
5186 
5187 // Integer operands 32 bit
5188 // 32 bit immediate
5189 operand immI()
5190 %{
5191   match(ConI);
5192 
5193   op_cost(0);
5194   format %{ %}
5195   interface(CONST_INTER);
5196 %}
5197 
5198 // 32 bit zero
5199 operand immI0()
5200 %{
5201   predicate(n->get_int() == 0);
5202   match(ConI);
5203 
5204   op_cost(0);
5205   format %{ %}
5206   interface(CONST_INTER);
5207 %}
5208 
5209 // 32 bit unit increment
5210 operand immI_1()
5211 %{
5212   predicate(n->get_int() == 1);
5213   match(ConI);
5214 
5215   op_cost(0);
5216   format %{ %}
5217   interface(CONST_INTER);
5218 %}
5219 
5220 // 32 bit unit decrement
5221 operand immI_M1()
5222 %{
5223   predicate(n->get_int() == -1);
5224   match(ConI);
5225 
5226   op_cost(0);
5227   format %{ %}
5228   interface(CONST_INTER);
5229 %}
5230 
5231 // Shift values for add/sub extension shift
5232 operand immIExt()
5233 %{
5234   predicate(0 <= n->get_int() && (n->get_int() <= 4));
5235   match(ConI);
5236 
5237   op_cost(0);
5238   format %{ %}
5239   interface(CONST_INTER);
5240 %}
5241 
5242 operand immI_le_4()
5243 %{
5244   predicate(n->get_int() <= 4);
5245   match(ConI);
5246 
5247   op_cost(0);
5248   format %{ %}
5249   interface(CONST_INTER);
5250 %}
5251 
5252 operand immI_31()
5253 %{
5254   predicate(n->get_int() == 31);
5255   match(ConI);
5256 
5257   op_cost(0);
5258   format %{ %}
5259   interface(CONST_INTER);
5260 %}
5261 
5262 operand immI_8()
5263 %{
5264   predicate(n->get_int() == 8);
5265   match(ConI);
5266 
5267   op_cost(0);
5268   format %{ %}
5269   interface(CONST_INTER);
5270 %}
5271 
5272 operand immI_16()
5273 %{
5274   predicate(n->get_int() == 16);
5275   match(ConI);
5276 
5277   op_cost(0);
5278   format %{ %}
5279   interface(CONST_INTER);
5280 %}
5281 
5282 operand immI_24()
5283 %{
5284   predicate(n->get_int() == 24);
5285   match(ConI);
5286 
5287   op_cost(0);
5288   format %{ %}
5289   interface(CONST_INTER);
5290 %}
5291 
5292 operand immI_32()
5293 %{
5294   predicate(n->get_int() == 32);
5295   match(ConI);
5296 
5297   op_cost(0);
5298   format %{ %}
5299   interface(CONST_INTER);
5300 %}
5301 
5302 operand immI_48()
5303 %{
5304   predicate(n->get_int() == 48);
5305   match(ConI);
5306 
5307   op_cost(0);
5308   format %{ %}
5309   interface(CONST_INTER);
5310 %}
5311 
5312 operand immI_56()
5313 %{
5314   predicate(n->get_int() == 56);
5315   match(ConI);
5316 
5317   op_cost(0);
5318   format %{ %}
5319   interface(CONST_INTER);
5320 %}
5321 
5322 operand immI_63()
5323 %{
5324   predicate(n->get_int() == 63);
5325   match(ConI);
5326 
5327   op_cost(0);
5328   format %{ %}
5329   interface(CONST_INTER);
5330 %}
5331 
5332 operand immI_64()
5333 %{
5334   predicate(n->get_int() == 64);
5335   match(ConI);
5336 
5337   op_cost(0);
5338   format %{ %}
5339   interface(CONST_INTER);
5340 %}
5341 
5342 operand immI_255()
5343 %{
5344   predicate(n->get_int() == 255);
5345   match(ConI);
5346 
5347   op_cost(0);
5348   format %{ %}
5349   interface(CONST_INTER);
5350 %}
5351 
5352 operand immI_65535()
5353 %{
5354   predicate(n->get_int() == 65535);
5355   match(ConI);
5356 
5357   op_cost(0);
5358   format %{ %}
5359   interface(CONST_INTER);
5360 %}
5361 
5362 operand immL_255()
5363 %{
5364   predicate(n->get_long() == 255L);
5365   match(ConL);
5366 
5367   op_cost(0);
5368   format %{ %}
5369   interface(CONST_INTER);
5370 %}
5371 
5372 operand immL_65535()
5373 %{
5374   predicate(n->get_long() == 65535L);
5375   match(ConL);
5376 
5377   op_cost(0);
5378   format %{ %}
5379   interface(CONST_INTER);
5380 %}
5381 
5382 operand immL_4294967295()
5383 %{
5384   predicate(n->get_long() == 4294967295L);
5385   match(ConL);
5386 
5387   op_cost(0);
5388   format %{ %}
5389   interface(CONST_INTER);
5390 %}
5391 
5392 operand immL_bitmask()
5393 %{
5394   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5395             && is_power_of_2(n->get_long() + 1));
5396   match(ConL);
5397 
5398   op_cost(0);
5399   format %{ %}
5400   interface(CONST_INTER);
5401 %}
5402 
5403 operand immI_bitmask()
5404 %{
5405   predicate(((n->get_int() & 0xc0000000) == 0)
5406             && is_power_of_2(n->get_int() + 1));
5407   match(ConI);
5408 
5409   op_cost(0);
5410   format %{ %}
5411   interface(CONST_INTER);
5412 %}
5413 
5414 // Scale values for scaled offset addressing modes (up to long but not quad)
5415 operand immIScale()
5416 %{
5417   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5418   match(ConI);
5419 
5420   op_cost(0);
5421   format %{ %}
5422   interface(CONST_INTER);
5423 %}
5424 
5425 // 26 bit signed offset -- for pc-relative branches
5426 operand immI26()
5427 %{
5428   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5429   match(ConI);
5430 
5431   op_cost(0);
5432   format %{ %}
5433   interface(CONST_INTER);
5434 %}
5435 
5436 // 19 bit signed offset -- for pc-relative loads
5437 operand immI19()
5438 %{
5439   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5440   match(ConI);
5441 
5442   op_cost(0);
5443   format %{ %}
5444   interface(CONST_INTER);
5445 %}
5446 
5447 // 12 bit unsigned offset -- for base plus immediate loads
5448 operand immIU12()
5449 %{
5450   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5451   match(ConI);
5452 
5453   op_cost(0);
5454   format %{ %}
5455   interface(CONST_INTER);
5456 %}
5457 
5458 operand immLU12()
5459 %{
5460   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5461   match(ConL);
5462 
5463   op_cost(0);
5464   format %{ %}
5465   interface(CONST_INTER);
5466 %}
5467 
5468 // Offset for scaled or unscaled immediate loads and stores
5469 operand immIOffset()
5470 %{
5471   predicate(Address::offset_ok_for_immed(n->get_int()));
5472   match(ConI);
5473 
5474   op_cost(0);
5475   format %{ %}
5476   interface(CONST_INTER);
5477 %}
5478 
5479 operand immIOffset4()
5480 %{
5481   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
5482   match(ConI);
5483 
5484   op_cost(0);
5485   format %{ %}
5486   interface(CONST_INTER);
5487 %}
5488 
5489 operand immIOffset8()
5490 %{
5491   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
5492   match(ConI);
5493 
5494   op_cost(0);
5495   format %{ %}
5496   interface(CONST_INTER);
5497 %}
5498 
5499 operand immIOffset16()
5500 %{
5501   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
5502   match(ConI);
5503 
5504   op_cost(0);
5505   format %{ %}
5506   interface(CONST_INTER);
5507 %}
5508 
5509 operand immLoffset()
5510 %{
5511   predicate(Address::offset_ok_for_immed(n->get_long()));
5512   match(ConL);
5513 
5514   op_cost(0);
5515   format %{ %}
5516   interface(CONST_INTER);
5517 %}
5518 
5519 operand immLoffset4()
5520 %{
5521   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
5522   match(ConL);
5523 
5524   op_cost(0);
5525   format %{ %}
5526   interface(CONST_INTER);
5527 %}
5528 
5529 operand immLoffset8()
5530 %{
5531   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
5532   match(ConL);
5533 
5534   op_cost(0);
5535   format %{ %}
5536   interface(CONST_INTER);
5537 %}
5538 
5539 operand immLoffset16()
5540 %{
5541   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
5542   match(ConL);
5543 
5544   op_cost(0);
5545   format %{ %}
5546   interface(CONST_INTER);
5547 %}
5548 
5549 // 32 bit integer valid for add sub immediate
5550 operand immIAddSub()
5551 %{
5552   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5553   match(ConI);
5554   op_cost(0);
5555   format %{ %}
5556   interface(CONST_INTER);
5557 %}
5558 
5559 // 32 bit unsigned integer valid for logical immediate
5560 // TODO -- check this is right when e.g the mask is 0x80000000
5561 operand immILog()
5562 %{
5563   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5564   match(ConI);
5565 
5566   op_cost(0);
5567   format %{ %}
5568   interface(CONST_INTER);
5569 %}
5570 
5571 // Integer operands 64 bit
5572 // 64 bit immediate
5573 operand immL()
5574 %{
5575   match(ConL);
5576 
5577   op_cost(0);
5578   format %{ %}
5579   interface(CONST_INTER);
5580 %}
5581 
5582 // 64 bit zero
5583 operand immL0()
5584 %{
5585   predicate(n->get_long() == 0);
5586   match(ConL);
5587 
5588   op_cost(0);
5589   format %{ %}
5590   interface(CONST_INTER);
5591 %}
5592 
5593 // 64 bit unit increment
5594 operand immL_1()
5595 %{
5596   predicate(n->get_long() == 1);
5597   match(ConL);
5598 
5599   op_cost(0);
5600   format %{ %}
5601   interface(CONST_INTER);
5602 %}
5603 
5604 // 64 bit unit decrement
5605 operand immL_M1()
5606 %{
5607   predicate(n->get_long() == -1);
5608   match(ConL);
5609 
5610   op_cost(0);
5611   format %{ %}
5612   interface(CONST_INTER);
5613 %}
5614 
5615 // 32 bit offset of pc in thread anchor
5616 
5617 operand immL_pc_off()
5618 %{
5619   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5620                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5621   match(ConL);
5622 
5623   op_cost(0);
5624   format %{ %}
5625   interface(CONST_INTER);
5626 %}
5627 
5628 // 64 bit integer valid for add sub immediate
5629 operand immLAddSub()
5630 %{
5631   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5632   match(ConL);
5633   op_cost(0);
5634   format %{ %}
5635   interface(CONST_INTER);
5636 %}
5637 
5638 // 64 bit integer valid for logical immediate
5639 operand immLLog()
5640 %{
5641   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5642   match(ConL);
5643   op_cost(0);
5644   format %{ %}
5645   interface(CONST_INTER);
5646 %}
5647 
5648 // Long Immediate: low 32-bit mask
5649 operand immL_32bits()
5650 %{
5651   predicate(n->get_long() == 0xFFFFFFFFL);
5652   match(ConL);
5653   op_cost(0);
5654   format %{ %}
5655   interface(CONST_INTER);
5656 %}
5657 
5658 // Pointer operands
5659 // Pointer Immediate
5660 operand immP()
5661 %{
5662   match(ConP);
5663 
5664   op_cost(0);
5665   format %{ %}
5666   interface(CONST_INTER);
5667 %}
5668 
5669 // NULL Pointer Immediate
5670 operand immP0()
5671 %{
5672   predicate(n->get_ptr() == 0);
5673   match(ConP);
5674 
5675   op_cost(0);
5676   format %{ %}
5677   interface(CONST_INTER);
5678 %}
5679 
5680 // Pointer Immediate One
5681 // this is used in object initialization (initial object header)
5682 operand immP_1()
5683 %{
5684   predicate(n->get_ptr() == 1);
5685   match(ConP);
5686 
5687   op_cost(0);
5688   format %{ %}
5689   interface(CONST_INTER);
5690 %}
5691 
5692 // Polling Page Pointer Immediate
5693 operand immPollPage()
5694 %{
5695   predicate((address)n->get_ptr() == os::get_polling_page());
5696   match(ConP);
5697 
5698   op_cost(0);
5699   format %{ %}
5700   interface(CONST_INTER);
5701 %}
5702 
5703 // Card Table Byte Map Base
5704 operand immByteMapBase()
5705 %{
5706   // Get base of card map
5707   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
5708             (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
5709   match(ConP);
5710 
5711   op_cost(0);
5712   format %{ %}
5713   interface(CONST_INTER);
5714 %}
5715 
5716 // Pointer Immediate Minus One
5717 // this is used when we want to write the current PC to the thread anchor
5718 operand immP_M1()
5719 %{
5720   predicate(n->get_ptr() == -1);
5721   match(ConP);
5722 
5723   op_cost(0);
5724   format %{ %}
5725   interface(CONST_INTER);
5726 %}
5727 
5728 // Pointer Immediate Minus Two
5729 // this is used when we want to write the current PC to the thread anchor
5730 operand immP_M2()
5731 %{
5732   predicate(n->get_ptr() == -2);
5733   match(ConP);
5734 
5735   op_cost(0);
5736   format %{ %}
5737   interface(CONST_INTER);
5738 %}
5739 
5740 // Float and Double operands
5741 // Double Immediate
5742 operand immD()
5743 %{
5744   match(ConD);
5745   op_cost(0);
5746   format %{ %}
5747   interface(CONST_INTER);
5748 %}
5749 
5750 // Double Immediate: +0.0d
5751 operand immD0()
5752 %{
5753   predicate(jlong_cast(n->getd()) == 0);
5754   match(ConD);
5755 
5756   op_cost(0);
5757   format %{ %}
5758   interface(CONST_INTER);
5759 %}
5760 
5761 // constant 'double +0.0'.
5762 operand immDPacked()
5763 %{
5764   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5765   match(ConD);
5766   op_cost(0);
5767   format %{ %}
5768   interface(CONST_INTER);
5769 %}
5770 
5771 // Float Immediate
5772 operand immF()
5773 %{
5774   match(ConF);
5775   op_cost(0);
5776   format %{ %}
5777   interface(CONST_INTER);
5778 %}
5779 
5780 // Float Immediate: +0.0f.
5781 operand immF0()
5782 %{
5783   predicate(jint_cast(n->getf()) == 0);
5784   match(ConF);
5785 
5786   op_cost(0);
5787   format %{ %}
5788   interface(CONST_INTER);
5789 %}
5790 
5791 //
5792 operand immFPacked()
5793 %{
5794   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5795   match(ConF);
5796   op_cost(0);
5797   format %{ %}
5798   interface(CONST_INTER);
5799 %}
5800 
5801 // Narrow pointer operands
5802 // Narrow Pointer Immediate
5803 operand immN()
5804 %{
5805   match(ConN);
5806 
5807   op_cost(0);
5808   format %{ %}
5809   interface(CONST_INTER);
5810 %}
5811 
5812 // Narrow NULL Pointer Immediate
5813 operand immN0()
5814 %{
5815   predicate(n->get_narrowcon() == 0);
5816   match(ConN);
5817 
5818   op_cost(0);
5819   format %{ %}
5820   interface(CONST_INTER);
5821 %}
5822 
5823 operand immNKlass()
5824 %{
5825   match(ConNKlass);
5826 
5827   op_cost(0);
5828   format %{ %}
5829   interface(CONST_INTER);
5830 %}
5831 
5832 // Integer 32 bit Register Operands
5833 // Integer 32 bitRegister (excludes SP)
5834 operand iRegI()
5835 %{
5836   constraint(ALLOC_IN_RC(any_reg32));
5837   match(RegI);
5838   match(iRegINoSp);
5839   op_cost(0);
5840   format %{ %}
5841   interface(REG_INTER);
5842 %}
5843 
5844 // Integer 32 bit Register not Special
5845 operand iRegINoSp()
5846 %{
5847   constraint(ALLOC_IN_RC(no_special_reg32));
5848   match(RegI);
5849   op_cost(0);
5850   format %{ %}
5851   interface(REG_INTER);
5852 %}
5853 
5854 // Integer 64 bit Register Operands
5855 // Integer 64 bit Register (includes SP)
5856 operand iRegL()
5857 %{
5858   constraint(ALLOC_IN_RC(any_reg));
5859   match(RegL);
5860   match(iRegLNoSp);
5861   op_cost(0);
5862   format %{ %}
5863   interface(REG_INTER);
5864 %}
5865 
5866 // Integer 64 bit Register not Special
5867 operand iRegLNoSp()
5868 %{
5869   constraint(ALLOC_IN_RC(no_special_reg));
5870   match(RegL);
5871   match(iRegL_R0);
5872   format %{ %}
5873   interface(REG_INTER);
5874 %}
5875 
5876 // Pointer Register Operands
5877 // Pointer Register
5878 operand iRegP()
5879 %{
5880   constraint(ALLOC_IN_RC(ptr_reg));
5881   match(RegP);
5882   match(iRegPNoSp);
5883   match(iRegP_R0);
5884   //match(iRegP_R2);
5885   //match(iRegP_R4);
5886   //match(iRegP_R5);
5887   match(thread_RegP);
5888   op_cost(0);
5889   format %{ %}
5890   interface(REG_INTER);
5891 %}
5892 
5893 // Pointer 64 bit Register not Special
5894 operand iRegPNoSp()
5895 %{
5896   constraint(ALLOC_IN_RC(no_special_ptr_reg));
5897   match(RegP);
5898   // match(iRegP);
5899   // match(iRegP_R0);
5900   // match(iRegP_R2);
5901   // match(iRegP_R4);
5902   // match(iRegP_R5);
5903   // match(thread_RegP);
5904   op_cost(0);
5905   format %{ %}
5906   interface(REG_INTER);
5907 %}
5908 
5909 // Pointer 64 bit Register R0 only
5910 operand iRegP_R0()
5911 %{
5912   constraint(ALLOC_IN_RC(r0_reg));
5913   match(RegP);
5914   // match(iRegP);
5915   match(iRegPNoSp);
5916   op_cost(0);
5917   format %{ %}
5918   interface(REG_INTER);
5919 %}
5920 
5921 // Pointer 64 bit Register R1 only
5922 operand iRegP_R1()
5923 %{
5924   constraint(ALLOC_IN_RC(r1_reg));
5925   match(RegP);
5926   // match(iRegP);
5927   match(iRegPNoSp);
5928   op_cost(0);
5929   format %{ %}
5930   interface(REG_INTER);
5931 %}
5932 
5933 // Pointer 64 bit Register R2 only
5934 operand iRegP_R2()
5935 %{
5936   constraint(ALLOC_IN_RC(r2_reg));
5937   match(RegP);
5938   // match(iRegP);
5939   match(iRegPNoSp);
5940   op_cost(0);
5941   format %{ %}
5942   interface(REG_INTER);
5943 %}
5944 
5945 // Pointer 64 bit Register R3 only
5946 operand iRegP_R3()
5947 %{
5948   constraint(ALLOC_IN_RC(r3_reg));
5949   match(RegP);
5950   // match(iRegP);
5951   match(iRegPNoSp);
5952   op_cost(0);
5953   format %{ %}
5954   interface(REG_INTER);
5955 %}
5956 
5957 // Pointer 64 bit Register R4 only
5958 operand iRegP_R4()
5959 %{
5960   constraint(ALLOC_IN_RC(r4_reg));
5961   match(RegP);
5962   // match(iRegP);
5963   match(iRegPNoSp);
5964   op_cost(0);
5965   format %{ %}
5966   interface(REG_INTER);
5967 %}
5968 
5969 // Pointer 64 bit Register R5 only
5970 operand iRegP_R5()
5971 %{
5972   constraint(ALLOC_IN_RC(r5_reg));
5973   match(RegP);
5974   // match(iRegP);
5975   match(iRegPNoSp);
5976   op_cost(0);
5977   format %{ %}
5978   interface(REG_INTER);
5979 %}
5980 
5981 // Pointer 64 bit Register R10 only
5982 operand iRegP_R10()
5983 %{
5984   constraint(ALLOC_IN_RC(r10_reg));
5985   match(RegP);
5986   // match(iRegP);
5987   match(iRegPNoSp);
5988   op_cost(0);
5989   format %{ %}
5990   interface(REG_INTER);
5991 %}
5992 
5993 // Long 64 bit Register R0 only
5994 operand iRegL_R0()
5995 %{
5996   constraint(ALLOC_IN_RC(r0_reg));
5997   match(RegL);
5998   match(iRegLNoSp);
5999   op_cost(0);
6000   format %{ %}
6001   interface(REG_INTER);
6002 %}
6003 
6004 // Long 64 bit Register R2 only
6005 operand iRegL_R2()
6006 %{
6007   constraint(ALLOC_IN_RC(r2_reg));
6008   match(RegL);
6009   match(iRegLNoSp);
6010   op_cost(0);
6011   format %{ %}
6012   interface(REG_INTER);
6013 %}
6014 
6015 // Long 64 bit Register R3 only
6016 operand iRegL_R3()
6017 %{
6018   constraint(ALLOC_IN_RC(r3_reg));
6019   match(RegL);
6020   match(iRegLNoSp);
6021   op_cost(0);
6022   format %{ %}
6023   interface(REG_INTER);
6024 %}
6025 
6026 // Long 64 bit Register R11 only
6027 operand iRegL_R11()
6028 %{
6029   constraint(ALLOC_IN_RC(r11_reg));
6030   match(RegL);
6031   match(iRegLNoSp);
6032   op_cost(0);
6033   format %{ %}
6034   interface(REG_INTER);
6035 %}
6036 
6037 // Pointer 64 bit Register FP only
6038 operand iRegP_FP()
6039 %{
6040   constraint(ALLOC_IN_RC(fp_reg));
6041   match(RegP);
6042   // match(iRegP);
6043   op_cost(0);
6044   format %{ %}
6045   interface(REG_INTER);
6046 %}
6047 
6048 // Register R0 only
6049 operand iRegI_R0()
6050 %{
6051   constraint(ALLOC_IN_RC(int_r0_reg));
6052   match(RegI);
6053   match(iRegINoSp);
6054   op_cost(0);
6055   format %{ %}
6056   interface(REG_INTER);
6057 %}
6058 
6059 // Register R2 only
6060 operand iRegI_R2()
6061 %{
6062   constraint(ALLOC_IN_RC(int_r2_reg));
6063   match(RegI);
6064   match(iRegINoSp);
6065   op_cost(0);
6066   format %{ %}
6067   interface(REG_INTER);
6068 %}
6069 
6070 // Register R3 only
6071 operand iRegI_R3()
6072 %{
6073   constraint(ALLOC_IN_RC(int_r3_reg));
6074   match(RegI);
6075   match(iRegINoSp);
6076   op_cost(0);
6077   format %{ %}
6078   interface(REG_INTER);
6079 %}
6080 
6081 
6082 // Register R4 only
6083 operand iRegI_R4()
6084 %{
6085   constraint(ALLOC_IN_RC(int_r4_reg));
6086   match(RegI);
6087   match(iRegINoSp);
6088   op_cost(0);
6089   format %{ %}
6090   interface(REG_INTER);
6091 %}
6092 
6093 
6094 // Pointer Register Operands
6095 // Narrow Pointer Register
6096 operand iRegN()
6097 %{
6098   constraint(ALLOC_IN_RC(any_reg32));
6099   match(RegN);
6100   match(iRegNNoSp);
6101   op_cost(0);
6102   format %{ %}
6103   interface(REG_INTER);
6104 %}
6105 
6106 operand iRegN_R0()
6107 %{
6108   constraint(ALLOC_IN_RC(r0_reg));
6109   match(iRegN);
6110   op_cost(0);
6111   format %{ %}
6112   interface(REG_INTER);
6113 %}
6114 
6115 operand iRegN_R2()
6116 %{
6117   constraint(ALLOC_IN_RC(r2_reg));
6118   match(iRegN);
6119   op_cost(0);
6120   format %{ %}
6121   interface(REG_INTER);
6122 %}
6123 
6124 operand iRegN_R3()
6125 %{
6126   constraint(ALLOC_IN_RC(r3_reg));
6127   match(iRegN);
6128   op_cost(0);
6129   format %{ %}
6130   interface(REG_INTER);
6131 %}
6132 
6133 // Integer 64 bit Register not Special
6134 operand iRegNNoSp()
6135 %{
6136   constraint(ALLOC_IN_RC(no_special_reg32));
6137   match(RegN);
6138   op_cost(0);
6139   format %{ %}
6140   interface(REG_INTER);
6141 %}
6142 
6143 // heap base register -- used for encoding immN0
6144 
6145 operand iRegIHeapbase()
6146 %{
6147   constraint(ALLOC_IN_RC(heapbase_reg));
6148   match(RegI);
6149   op_cost(0);
6150   format %{ %}
6151   interface(REG_INTER);
6152 %}
6153 
6154 // Float Register
6155 // Float register operands
6156 operand vRegF()
6157 %{
6158   constraint(ALLOC_IN_RC(float_reg));
6159   match(RegF);
6160 
6161   op_cost(0);
6162   format %{ %}
6163   interface(REG_INTER);
6164 %}
6165 
6166 // Double Register
6167 // Double register operands
6168 operand vRegD()
6169 %{
6170   constraint(ALLOC_IN_RC(double_reg));
6171   match(RegD);
6172 
6173   op_cost(0);
6174   format %{ %}
6175   interface(REG_INTER);
6176 %}
6177 
6178 operand vecD()
6179 %{
6180   constraint(ALLOC_IN_RC(vectord_reg));
6181   match(VecD);
6182 
6183   op_cost(0);
6184   format %{ %}
6185   interface(REG_INTER);
6186 %}
6187 
6188 operand vecX()
6189 %{
6190   constraint(ALLOC_IN_RC(vectorx_reg));
6191   match(VecX);
6192 
6193   op_cost(0);
6194   format %{ %}
6195   interface(REG_INTER);
6196 %}
6197 
6198 operand vRegD_V0()
6199 %{
6200   constraint(ALLOC_IN_RC(v0_reg));
6201   match(RegD);
6202   op_cost(0);
6203   format %{ %}
6204   interface(REG_INTER);
6205 %}
6206 
6207 operand vRegD_V1()
6208 %{
6209   constraint(ALLOC_IN_RC(v1_reg));
6210   match(RegD);
6211   op_cost(0);
6212   format %{ %}
6213   interface(REG_INTER);
6214 %}
6215 
6216 operand vRegD_V2()
6217 %{
6218   constraint(ALLOC_IN_RC(v2_reg));
6219   match(RegD);
6220   op_cost(0);
6221   format %{ %}
6222   interface(REG_INTER);
6223 %}
6224 
6225 operand vRegD_V3()
6226 %{
6227   constraint(ALLOC_IN_RC(v3_reg));
6228   match(RegD);
6229   op_cost(0);
6230   format %{ %}
6231   interface(REG_INTER);
6232 %}
6233 
6234 // Flags register, used as output of signed compare instructions
6235 
6236 // note that on AArch64 we also use this register as the output for
6237 // for floating point compare instructions (CmpF CmpD). this ensures
6238 // that ordered inequality tests use GT, GE, LT or LE none of which
6239 // pass through cases where the result is unordered i.e. one or both
6240 // inputs to the compare is a NaN. this means that the ideal code can
6241 // replace e.g. a GT with an LE and not end up capturing the NaN case
6242 // (where the comparison should always fail). EQ and NE tests are
6243 // always generated in ideal code so that unordered folds into the NE
6244 // case, matching the behaviour of AArch64 NE.
6245 //
6246 // This differs from x86 where the outputs of FP compares use a
6247 // special FP flags registers and where compares based on this
6248 // register are distinguished into ordered inequalities (cmpOpUCF) and
6249 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6250 // to explicitly handle the unordered case in branches. x86 also has
6251 // to include extra CMoveX rules to accept a cmpOpUCF input.
6252 
6253 operand rFlagsReg()
6254 %{
6255   constraint(ALLOC_IN_RC(int_flags));
6256   match(RegFlags);
6257 
6258   op_cost(0);
6259   format %{ "RFLAGS" %}
6260   interface(REG_INTER);
6261 %}
6262 
6263 // Flags register, used as output of unsigned compare instructions
6264 operand rFlagsRegU()
6265 %{
6266   constraint(ALLOC_IN_RC(int_flags));
6267   match(RegFlags);
6268 
6269   op_cost(0);
6270   format %{ "RFLAGSU" %}
6271   interface(REG_INTER);
6272 %}
6273 
6274 // Special Registers
6275 
6276 // Method Register
6277 operand inline_cache_RegP(iRegP reg)
6278 %{
6279   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6280   match(reg);
6281   match(iRegPNoSp);
6282   op_cost(0);
6283   format %{ %}
6284   interface(REG_INTER);
6285 %}
6286 
6287 operand interpreter_method_oop_RegP(iRegP reg)
6288 %{
6289   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6290   match(reg);
6291   match(iRegPNoSp);
6292   op_cost(0);
6293   format %{ %}
6294   interface(REG_INTER);
6295 %}
6296 
6297 // Thread Register
6298 operand thread_RegP(iRegP reg)
6299 %{
6300   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6301   match(reg);
6302   op_cost(0);
6303   format %{ %}
6304   interface(REG_INTER);
6305 %}
6306 
6307 operand lr_RegP(iRegP reg)
6308 %{
6309   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6310   match(reg);
6311   op_cost(0);
6312   format %{ %}
6313   interface(REG_INTER);
6314 %}
6315 
6316 //----------Memory Operands----------------------------------------------------
6317 
6318 operand indirect(iRegP reg)
6319 %{
6320   constraint(ALLOC_IN_RC(ptr_reg));
6321   match(reg);
6322   op_cost(0);
6323   format %{ "[$reg]" %}
6324   interface(MEMORY_INTER) %{
6325     base($reg);
6326     index(0xffffffff);
6327     scale(0x0);
6328     disp(0x0);
6329   %}
6330 %}
6331 
6332 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6333 %{
6334   constraint(ALLOC_IN_RC(ptr_reg));
6335   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6336   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6337   op_cost(0);
6338   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6339   interface(MEMORY_INTER) %{
6340     base($reg);
6341     index($ireg);
6342     scale($scale);
6343     disp(0x0);
6344   %}
6345 %}
6346 
6347 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6348 %{
6349   constraint(ALLOC_IN_RC(ptr_reg));
6350   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6351   match(AddP reg (LShiftL lreg scale));
6352   op_cost(0);
6353   format %{ "$reg, $lreg lsl($scale)" %}
6354   interface(MEMORY_INTER) %{
6355     base($reg);
6356     index($lreg);
6357     scale($scale);
6358     disp(0x0);
6359   %}
6360 %}
6361 
6362 operand indIndexI2L(iRegP reg, iRegI ireg)
6363 %{
6364   constraint(ALLOC_IN_RC(ptr_reg));
6365   match(AddP reg (ConvI2L ireg));
6366   op_cost(0);
6367   format %{ "$reg, $ireg, 0, I2L" %}
6368   interface(MEMORY_INTER) %{
6369     base($reg);
6370     index($ireg);
6371     scale(0x0);
6372     disp(0x0);
6373   %}
6374 %}
6375 
6376 operand indIndex(iRegP reg, iRegL lreg)
6377 %{
6378   constraint(ALLOC_IN_RC(ptr_reg));
6379   match(AddP reg lreg);
6380   op_cost(0);
6381   format %{ "$reg, $lreg" %}
6382   interface(MEMORY_INTER) %{
6383     base($reg);
6384     index($lreg);
6385     scale(0x0);
6386     disp(0x0);
6387   %}
6388 %}
6389 
6390 operand indOffI(iRegP reg, immIOffset off)
6391 %{
6392   constraint(ALLOC_IN_RC(ptr_reg));
6393   match(AddP reg off);
6394   op_cost(0);
6395   format %{ "[$reg, $off]" %}
6396   interface(MEMORY_INTER) %{
6397     base($reg);
6398     index(0xffffffff);
6399     scale(0x0);
6400     disp($off);
6401   %}
6402 %}
6403 
6404 operand indOffI4(iRegP reg, immIOffset4 off)
6405 %{
6406   constraint(ALLOC_IN_RC(ptr_reg));
6407   match(AddP reg off);
6408   op_cost(0);
6409   format %{ "[$reg, $off]" %}
6410   interface(MEMORY_INTER) %{
6411     base($reg);
6412     index(0xffffffff);
6413     scale(0x0);
6414     disp($off);
6415   %}
6416 %}
6417 
6418 operand indOffI8(iRegP reg, immIOffset8 off)
6419 %{
6420   constraint(ALLOC_IN_RC(ptr_reg));
6421   match(AddP reg off);
6422   op_cost(0);
6423   format %{ "[$reg, $off]" %}
6424   interface(MEMORY_INTER) %{
6425     base($reg);
6426     index(0xffffffff);
6427     scale(0x0);
6428     disp($off);
6429   %}
6430 %}
6431 
6432 operand indOffI16(iRegP reg, immIOffset16 off)
6433 %{
6434   constraint(ALLOC_IN_RC(ptr_reg));
6435   match(AddP reg off);
6436   op_cost(0);
6437   format %{ "[$reg, $off]" %}
6438   interface(MEMORY_INTER) %{
6439     base($reg);
6440     index(0xffffffff);
6441     scale(0x0);
6442     disp($off);
6443   %}
6444 %}
6445 
6446 operand indOffL(iRegP reg, immLoffset off)
6447 %{
6448   constraint(ALLOC_IN_RC(ptr_reg));
6449   match(AddP reg off);
6450   op_cost(0);
6451   format %{ "[$reg, $off]" %}
6452   interface(MEMORY_INTER) %{
6453     base($reg);
6454     index(0xffffffff);
6455     scale(0x0);
6456     disp($off);
6457   %}
6458 %}
6459 
6460 operand indOffL4(iRegP reg, immLoffset4 off)
6461 %{
6462   constraint(ALLOC_IN_RC(ptr_reg));
6463   match(AddP reg off);
6464   op_cost(0);
6465   format %{ "[$reg, $off]" %}
6466   interface(MEMORY_INTER) %{
6467     base($reg);
6468     index(0xffffffff);
6469     scale(0x0);
6470     disp($off);
6471   %}
6472 %}
6473 
6474 operand indOffL8(iRegP reg, immLoffset8 off)
6475 %{
6476   constraint(ALLOC_IN_RC(ptr_reg));
6477   match(AddP reg off);
6478   op_cost(0);
6479   format %{ "[$reg, $off]" %}
6480   interface(MEMORY_INTER) %{
6481     base($reg);
6482     index(0xffffffff);
6483     scale(0x0);
6484     disp($off);
6485   %}
6486 %}
6487 
6488 operand indOffL16(iRegP reg, immLoffset16 off)
6489 %{
6490   constraint(ALLOC_IN_RC(ptr_reg));
6491   match(AddP reg off);
6492   op_cost(0);
6493   format %{ "[$reg, $off]" %}
6494   interface(MEMORY_INTER) %{
6495     base($reg);
6496     index(0xffffffff);
6497     scale(0x0);
6498     disp($off);
6499   %}
6500 %}
6501 
6502 operand indirectN(iRegN reg)
6503 %{
6504   predicate(Universe::narrow_oop_shift() == 0);
6505   constraint(ALLOC_IN_RC(ptr_reg));
6506   match(DecodeN reg);
6507   op_cost(0);
6508   format %{ "[$reg]\t# narrow" %}
6509   interface(MEMORY_INTER) %{
6510     base($reg);
6511     index(0xffffffff);
6512     scale(0x0);
6513     disp(0x0);
6514   %}
6515 %}
6516 
6517 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6518 %{
6519   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6520   constraint(ALLOC_IN_RC(ptr_reg));
6521   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6522   op_cost(0);
6523   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6524   interface(MEMORY_INTER) %{
6525     base($reg);
6526     index($ireg);
6527     scale($scale);
6528     disp(0x0);
6529   %}
6530 %}
6531 
6532 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6533 %{
6534   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6535   constraint(ALLOC_IN_RC(ptr_reg));
6536   match(AddP (DecodeN reg) (LShiftL lreg scale));
6537   op_cost(0);
6538   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6539   interface(MEMORY_INTER) %{
6540     base($reg);
6541     index($lreg);
6542     scale($scale);
6543     disp(0x0);
6544   %}
6545 %}
6546 
6547 operand indIndexI2LN(iRegN reg, iRegI ireg)
6548 %{
6549   predicate(Universe::narrow_oop_shift() == 0);
6550   constraint(ALLOC_IN_RC(ptr_reg));
6551   match(AddP (DecodeN reg) (ConvI2L ireg));
6552   op_cost(0);
6553   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
6554   interface(MEMORY_INTER) %{
6555     base($reg);
6556     index($ireg);
6557     scale(0x0);
6558     disp(0x0);
6559   %}
6560 %}
6561 
6562 operand indIndexN(iRegN reg, iRegL lreg)
6563 %{
6564   predicate(Universe::narrow_oop_shift() == 0);
6565   constraint(ALLOC_IN_RC(ptr_reg));
6566   match(AddP (DecodeN reg) lreg);
6567   op_cost(0);
6568   format %{ "$reg, $lreg\t# narrow" %}
6569   interface(MEMORY_INTER) %{
6570     base($reg);
6571     index($lreg);
6572     scale(0x0);
6573     disp(0x0);
6574   %}
6575 %}
6576 
6577 operand indOffIN(iRegN reg, immIOffset off)
6578 %{
6579   predicate(Universe::narrow_oop_shift() == 0);
6580   constraint(ALLOC_IN_RC(ptr_reg));
6581   match(AddP (DecodeN reg) off);
6582   op_cost(0);
6583   format %{ "[$reg, $off]\t# narrow" %}
6584   interface(MEMORY_INTER) %{
6585     base($reg);
6586     index(0xffffffff);
6587     scale(0x0);
6588     disp($off);
6589   %}
6590 %}
6591 
6592 operand indOffLN(iRegN reg, immLoffset off)
6593 %{
6594   predicate(Universe::narrow_oop_shift() == 0);
6595   constraint(ALLOC_IN_RC(ptr_reg));
6596   match(AddP (DecodeN reg) off);
6597   op_cost(0);
6598   format %{ "[$reg, $off]\t# narrow" %}
6599   interface(MEMORY_INTER) %{
6600     base($reg);
6601     index(0xffffffff);
6602     scale(0x0);
6603     disp($off);
6604   %}
6605 %}
6606 
6607 
6608 
6609 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6610 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6611 %{
6612   constraint(ALLOC_IN_RC(ptr_reg));
6613   match(AddP reg off);
6614   op_cost(0);
6615   format %{ "[$reg, $off]" %}
6616   interface(MEMORY_INTER) %{
6617     base($reg);
6618     index(0xffffffff);
6619     scale(0x0);
6620     disp($off);
6621   %}
6622 %}
6623 
6624 //----------Special Memory Operands--------------------------------------------
6625 // Stack Slot Operand - This operand is used for loading and storing temporary
6626 //                      values on the stack where a match requires a value to
6627 //                      flow through memory.
6628 operand stackSlotP(sRegP reg)
6629 %{
6630   constraint(ALLOC_IN_RC(stack_slots));
6631   op_cost(100);
6632   // No match rule because this operand is only generated in matching
6633   // match(RegP);
6634   format %{ "[$reg]" %}
6635   interface(MEMORY_INTER) %{
6636     base(0x1e);  // RSP
6637     index(0x0);  // No Index
6638     scale(0x0);  // No Scale
6639     disp($reg);  // Stack Offset
6640   %}
6641 %}
6642 
6643 operand stackSlotI(sRegI reg)
6644 %{
6645   constraint(ALLOC_IN_RC(stack_slots));
6646   // No match rule because this operand is only generated in matching
6647   // match(RegI);
6648   format %{ "[$reg]" %}
6649   interface(MEMORY_INTER) %{
6650     base(0x1e);  // RSP
6651     index(0x0);  // No Index
6652     scale(0x0);  // No Scale
6653     disp($reg);  // Stack Offset
6654   %}
6655 %}
6656 
6657 operand stackSlotF(sRegF reg)
6658 %{
6659   constraint(ALLOC_IN_RC(stack_slots));
6660   // No match rule because this operand is only generated in matching
6661   // match(RegF);
6662   format %{ "[$reg]" %}
6663   interface(MEMORY_INTER) %{
6664     base(0x1e);  // RSP
6665     index(0x0);  // No Index
6666     scale(0x0);  // No Scale
6667     disp($reg);  // Stack Offset
6668   %}
6669 %}
6670 
6671 operand stackSlotD(sRegD reg)
6672 %{
6673   constraint(ALLOC_IN_RC(stack_slots));
6674   // No match rule because this operand is only generated in matching
6675   // match(RegD);
6676   format %{ "[$reg]" %}
6677   interface(MEMORY_INTER) %{
6678     base(0x1e);  // RSP
6679     index(0x0);  // No Index
6680     scale(0x0);  // No Scale
6681     disp($reg);  // Stack Offset
6682   %}
6683 %}
6684 
6685 operand stackSlotL(sRegL reg)
6686 %{
6687   constraint(ALLOC_IN_RC(stack_slots));
6688   // No match rule because this operand is only generated in matching
6689   // match(RegL);
6690   format %{ "[$reg]" %}
6691   interface(MEMORY_INTER) %{
6692     base(0x1e);  // RSP
6693     index(0x0);  // No Index
6694     scale(0x0);  // No Scale
6695     disp($reg);  // Stack Offset
6696   %}
6697 %}
6698 
6699 // Operands for expressing Control Flow
6700 // NOTE: Label is a predefined operand which should not be redefined in
6701 //       the AD file. It is generically handled within the ADLC.
6702 
6703 //----------Conditional Branch Operands----------------------------------------
6704 // Comparison Op  - This is the operation of the comparison, and is limited to
6705 //                  the following set of codes:
6706 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6707 //
6708 // Other attributes of the comparison, such as unsignedness, are specified
6709 // by the comparison instruction that sets a condition code flags register.
6710 // That result is represented by a flags operand whose subtype is appropriate
6711 // to the unsignedness (etc.) of the comparison.
6712 //
6713 // Later, the instruction which matches both the Comparison Op (a Bool) and
6714 // the flags (produced by the Cmp) specifies the coding of the comparison op
6715 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6716 
6717 // used for signed integral comparisons and fp comparisons
6718 
6719 operand cmpOp()
6720 %{
6721   match(Bool);
6722 
6723   format %{ "" %}
6724   interface(COND_INTER) %{
6725     equal(0x0, "eq");
6726     not_equal(0x1, "ne");
6727     less(0xb, "lt");
6728     greater_equal(0xa, "ge");
6729     less_equal(0xd, "le");
6730     greater(0xc, "gt");
6731     overflow(0x6, "vs");
6732     no_overflow(0x7, "vc");
6733   %}
6734 %}
6735 
6736 // used for unsigned integral comparisons
6737 
6738 operand cmpOpU()
6739 %{
6740   match(Bool);
6741 
6742   format %{ "" %}
6743   interface(COND_INTER) %{
6744     equal(0x0, "eq");
6745     not_equal(0x1, "ne");
6746     less(0x3, "lo");
6747     greater_equal(0x2, "hs");
6748     less_equal(0x9, "ls");
6749     greater(0x8, "hi");
6750     overflow(0x6, "vs");
6751     no_overflow(0x7, "vc");
6752   %}
6753 %}
6754 
6755 // used for certain integral comparisons which can be
6756 // converted to cbxx or tbxx instructions
6757 
6758 operand cmpOpEqNe()
6759 %{
6760   match(Bool);
6761   match(CmpOp);
6762   op_cost(0);
6763   predicate(n->as_Bool()->_test._test == BoolTest::ne
6764             || n->as_Bool()->_test._test == BoolTest::eq);
6765 
6766   format %{ "" %}
6767   interface(COND_INTER) %{
6768     equal(0x0, "eq");
6769     not_equal(0x1, "ne");
6770     less(0xb, "lt");
6771     greater_equal(0xa, "ge");
6772     less_equal(0xd, "le");
6773     greater(0xc, "gt");
6774     overflow(0x6, "vs");
6775     no_overflow(0x7, "vc");
6776   %}
6777 %}
6778 
6779 // used for certain integral comparisons which can be
6780 // converted to cbxx or tbxx instructions
6781 
6782 operand cmpOpLtGe()
6783 %{
6784   match(Bool);
6785   match(CmpOp);
6786   op_cost(0);
6787 
6788   predicate(n->as_Bool()->_test._test == BoolTest::lt
6789             || n->as_Bool()->_test._test == BoolTest::ge);
6790 
6791   format %{ "" %}
6792   interface(COND_INTER) %{
6793     equal(0x0, "eq");
6794     not_equal(0x1, "ne");
6795     less(0xb, "lt");
6796     greater_equal(0xa, "ge");
6797     less_equal(0xd, "le");
6798     greater(0xc, "gt");
6799     overflow(0x6, "vs");
6800     no_overflow(0x7, "vc");
6801   %}
6802 %}
6803 
6804 // used for certain unsigned integral comparisons which can be
6805 // converted to cbxx or tbxx instructions
6806 
6807 operand cmpOpUEqNeLtGe()
6808 %{
6809   match(Bool);
6810   match(CmpOp);
6811   op_cost(0);
6812 
6813   predicate(n->as_Bool()->_test._test == BoolTest::eq
6814             || n->as_Bool()->_test._test == BoolTest::ne
6815             || n->as_Bool()->_test._test == BoolTest::lt
6816             || n->as_Bool()->_test._test == BoolTest::ge);
6817 
6818   format %{ "" %}
6819   interface(COND_INTER) %{
6820     equal(0x0, "eq");
6821     not_equal(0x1, "ne");
6822     less(0xb, "lt");
6823     greater_equal(0xa, "ge");
6824     less_equal(0xd, "le");
6825     greater(0xc, "gt");
6826     overflow(0x6, "vs");
6827     no_overflow(0x7, "vc");
6828   %}
6829 %}
6830 
6831 // Special operand allowing long args to int ops to be truncated for free
6832 
6833 operand iRegL2I(iRegL reg) %{
6834 
6835   op_cost(0);
6836 
6837   match(ConvL2I reg);
6838 
6839   format %{ "l2i($reg)" %}
6840 
6841   interface(REG_INTER)
6842 %}
6843 
6844 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
6845 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
6846 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
6847 
6848 //----------OPERAND CLASSES----------------------------------------------------
6849 // Operand Classes are groups of operands that are used as to simplify
6850 // instruction definitions by not requiring the AD writer to specify
6851 // separate instructions for every form of operand when the
6852 // instruction accepts multiple operand types with the same basic
6853 // encoding and format. The classic case of this is memory operands.
6854 
6855 // memory is used to define read/write location for load/store
6856 // instruction defs. we can turn a memory op into an Address
6857 
6858 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
6859                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
6860 
6861 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
6862 // operations. it allows the src to be either an iRegI or a (ConvL2I
6863 // iRegL). in the latter case the l2i normally planted for a ConvL2I
6864 // can be elided because the 32-bit instruction will just employ the
6865 // lower 32 bits anyway.
6866 //
6867 // n.b. this does not elide all L2I conversions. if the truncated
6868 // value is consumed by more than one operation then the ConvL2I
6869 // cannot be bundled into the consuming nodes so an l2i gets planted
6870 // (actually a movw $dst $src) and the downstream instructions consume
6871 // the result of the l2i as an iRegI input. That's a shame since the
6872 // movw is actually redundant but its not too costly.
6873 
6874 opclass iRegIorL2I(iRegI, iRegL2I);
6875 
6876 //----------PIPELINE-----------------------------------------------------------
6877 // Rules which define the behavior of the target architectures pipeline.
6878 
6879 // For specific pipelines, eg A53, define the stages of that pipeline
6880 //pipe_desc(ISS, EX1, EX2, WR);
6881 #define ISS S0
6882 #define EX1 S1
6883 #define EX2 S2
6884 #define WR  S3
6885 
6886 // Integer ALU reg operation
6887 pipeline %{
6888 
6889 attributes %{
6890   // ARM instructions are of fixed length
6891   fixed_size_instructions;        // Fixed size instructions TODO does
6892   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
6893   // ARM instructions come in 32-bit word units
6894   instruction_unit_size = 4;         // An instruction is 4 bytes long
6895   instruction_fetch_unit_size = 64;  // The processor fetches one line
6896   instruction_fetch_units = 1;       // of 64 bytes
6897 
6898   // List of nop instructions
6899   nops( MachNop );
6900 %}
6901 
6902 // We don't use an actual pipeline model so don't care about resources
6903 // or description. we do use pipeline classes to introduce fixed
6904 // latencies
6905 
6906 //----------RESOURCES----------------------------------------------------------
6907 // Resources are the functional units available to the machine
6908 
6909 resources( INS0, INS1, INS01 = INS0 | INS1,
6910            ALU0, ALU1, ALU = ALU0 | ALU1,
6911            MAC,
6912            DIV,
6913            BRANCH,
6914            LDST,
6915            NEON_FP);
6916 
6917 //----------PIPELINE DESCRIPTION-----------------------------------------------
6918 // Pipeline Description specifies the stages in the machine's pipeline
6919 
6920 // Define the pipeline as a generic 6 stage pipeline
6921 pipe_desc(S0, S1, S2, S3, S4, S5);
6922 
6923 //----------PIPELINE CLASSES---------------------------------------------------
6924 // Pipeline Classes describe the stages in which input and output are
6925 // referenced by the hardware pipeline.
6926 
6927 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
6928 %{
6929   single_instruction;
6930   src1   : S1(read);
6931   src2   : S2(read);
6932   dst    : S5(write);
6933   INS01  : ISS;
6934   NEON_FP : S5;
6935 %}
6936 
6937 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
6938 %{
6939   single_instruction;
6940   src1   : S1(read);
6941   src2   : S2(read);
6942   dst    : S5(write);
6943   INS01  : ISS;
6944   NEON_FP : S5;
6945 %}
6946 
6947 pipe_class fp_uop_s(vRegF dst, vRegF src)
6948 %{
6949   single_instruction;
6950   src    : S1(read);
6951   dst    : S5(write);
6952   INS01  : ISS;
6953   NEON_FP : S5;
6954 %}
6955 
6956 pipe_class fp_uop_d(vRegD dst, vRegD src)
6957 %{
6958   single_instruction;
6959   src    : S1(read);
6960   dst    : S5(write);
6961   INS01  : ISS;
6962   NEON_FP : S5;
6963 %}
6964 
6965 pipe_class fp_d2f(vRegF dst, vRegD src)
6966 %{
6967   single_instruction;
6968   src    : S1(read);
6969   dst    : S5(write);
6970   INS01  : ISS;
6971   NEON_FP : S5;
6972 %}
6973 
6974 pipe_class fp_f2d(vRegD dst, vRegF src)
6975 %{
6976   single_instruction;
6977   src    : S1(read);
6978   dst    : S5(write);
6979   INS01  : ISS;
6980   NEON_FP : S5;
6981 %}
6982 
6983 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
6984 %{
6985   single_instruction;
6986   src    : S1(read);
6987   dst    : S5(write);
6988   INS01  : ISS;
6989   NEON_FP : S5;
6990 %}
6991 
6992 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
6993 %{
6994   single_instruction;
6995   src    : S1(read);
6996   dst    : S5(write);
6997   INS01  : ISS;
6998   NEON_FP : S5;
6999 %}
7000 
7001 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
7002 %{
7003   single_instruction;
7004   src    : S1(read);
7005   dst    : S5(write);
7006   INS01  : ISS;
7007   NEON_FP : S5;
7008 %}
7009 
7010 pipe_class fp_l2f(vRegF dst, iRegL src)
7011 %{
7012   single_instruction;
7013   src    : S1(read);
7014   dst    : S5(write);
7015   INS01  : ISS;
7016   NEON_FP : S5;
7017 %}
7018 
7019 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
7020 %{
7021   single_instruction;
7022   src    : S1(read);
7023   dst    : S5(write);
7024   INS01  : ISS;
7025   NEON_FP : S5;
7026 %}
7027 
7028 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
7029 %{
7030   single_instruction;
7031   src    : S1(read);
7032   dst    : S5(write);
7033   INS01  : ISS;
7034   NEON_FP : S5;
7035 %}
7036 
7037 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
7038 %{
7039   single_instruction;
7040   src    : S1(read);
7041   dst    : S5(write);
7042   INS01  : ISS;
7043   NEON_FP : S5;
7044 %}
7045 
7046 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
7047 %{
7048   single_instruction;
7049   src    : S1(read);
7050   dst    : S5(write);
7051   INS01  : ISS;
7052   NEON_FP : S5;
7053 %}
7054 
7055 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
7056 %{
7057   single_instruction;
7058   src1   : S1(read);
7059   src2   : S2(read);
7060   dst    : S5(write);
7061   INS0   : ISS;
7062   NEON_FP : S5;
7063 %}
7064 
7065 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
7066 %{
7067   single_instruction;
7068   src1   : S1(read);
7069   src2   : S2(read);
7070   dst    : S5(write);
7071   INS0   : ISS;
7072   NEON_FP : S5;
7073 %}
7074 
7075 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
7076 %{
7077   single_instruction;
7078   cr     : S1(read);
7079   src1   : S1(read);
7080   src2   : S1(read);
7081   dst    : S3(write);
7082   INS01  : ISS;
7083   NEON_FP : S3;
7084 %}
7085 
7086 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
7087 %{
7088   single_instruction;
7089   cr     : S1(read);
7090   src1   : S1(read);
7091   src2   : S1(read);
7092   dst    : S3(write);
7093   INS01  : ISS;
7094   NEON_FP : S3;
7095 %}
7096 
7097 pipe_class fp_imm_s(vRegF dst)
7098 %{
7099   single_instruction;
7100   dst    : S3(write);
7101   INS01  : ISS;
7102   NEON_FP : S3;
7103 %}
7104 
7105 pipe_class fp_imm_d(vRegD dst)
7106 %{
7107   single_instruction;
7108   dst    : S3(write);
7109   INS01  : ISS;
7110   NEON_FP : S3;
7111 %}
7112 
7113 pipe_class fp_load_constant_s(vRegF dst)
7114 %{
7115   single_instruction;
7116   dst    : S4(write);
7117   INS01  : ISS;
7118   NEON_FP : S4;
7119 %}
7120 
7121 pipe_class fp_load_constant_d(vRegD dst)
7122 %{
7123   single_instruction;
7124   dst    : S4(write);
7125   INS01  : ISS;
7126   NEON_FP : S4;
7127 %}
7128 
7129 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
7130 %{
7131   single_instruction;
7132   dst    : S5(write);
7133   src1   : S1(read);
7134   src2   : S1(read);
7135   INS01  : ISS;
7136   NEON_FP : S5;
7137 %}
7138 
7139 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
7140 %{
7141   single_instruction;
7142   dst    : S5(write);
7143   src1   : S1(read);
7144   src2   : S1(read);
7145   INS0   : ISS;
7146   NEON_FP : S5;
7147 %}
7148 
7149 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
7150 %{
7151   single_instruction;
7152   dst    : S5(write);
7153   src1   : S1(read);
7154   src2   : S1(read);
7155   dst    : S1(read);
7156   INS01  : ISS;
7157   NEON_FP : S5;
7158 %}
7159 
7160 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
7161 %{
7162   single_instruction;
7163   dst    : S5(write);
7164   src1   : S1(read);
7165   src2   : S1(read);
7166   dst    : S1(read);
7167   INS0   : ISS;
7168   NEON_FP : S5;
7169 %}
7170 
7171 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
7172 %{
7173   single_instruction;
7174   dst    : S4(write);
7175   src1   : S2(read);
7176   src2   : S2(read);
7177   INS01  : ISS;
7178   NEON_FP : S4;
7179 %}
7180 
7181 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
7182 %{
7183   single_instruction;
7184   dst    : S4(write);
7185   src1   : S2(read);
7186   src2   : S2(read);
7187   INS0   : ISS;
7188   NEON_FP : S4;
7189 %}
7190 
7191 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
7192 %{
7193   single_instruction;
7194   dst    : S3(write);
7195   src1   : S2(read);
7196   src2   : S2(read);
7197   INS01  : ISS;
7198   NEON_FP : S3;
7199 %}
7200 
7201 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
7202 %{
7203   single_instruction;
7204   dst    : S3(write);
7205   src1   : S2(read);
7206   src2   : S2(read);
7207   INS0   : ISS;
7208   NEON_FP : S3;
7209 %}
7210 
7211 pipe_class vshift64(vecD dst, vecD src, vecX shift)
7212 %{
7213   single_instruction;
7214   dst    : S3(write);
7215   src    : S1(read);
7216   shift  : S1(read);
7217   INS01  : ISS;
7218   NEON_FP : S3;
7219 %}
7220 
7221 pipe_class vshift128(vecX dst, vecX src, vecX shift)
7222 %{
7223   single_instruction;
7224   dst    : S3(write);
7225   src    : S1(read);
7226   shift  : S1(read);
7227   INS0   : ISS;
7228   NEON_FP : S3;
7229 %}
7230 
7231 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
7232 %{
7233   single_instruction;
7234   dst    : S3(write);
7235   src    : S1(read);
7236   INS01  : ISS;
7237   NEON_FP : S3;
7238 %}
7239 
7240 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
7241 %{
7242   single_instruction;
7243   dst    : S3(write);
7244   src    : S1(read);
7245   INS0   : ISS;
7246   NEON_FP : S3;
7247 %}
7248 
7249 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
7250 %{
7251   single_instruction;
7252   dst    : S5(write);
7253   src1   : S1(read);
7254   src2   : S1(read);
7255   INS01  : ISS;
7256   NEON_FP : S5;
7257 %}
7258 
7259 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
7260 %{
7261   single_instruction;
7262   dst    : S5(write);
7263   src1   : S1(read);
7264   src2   : S1(read);
7265   INS0   : ISS;
7266   NEON_FP : S5;
7267 %}
7268 
7269 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
7270 %{
7271   single_instruction;
7272   dst    : S5(write);
7273   src1   : S1(read);
7274   src2   : S1(read);
7275   INS0   : ISS;
7276   NEON_FP : S5;
7277 %}
7278 
7279 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
7280 %{
7281   single_instruction;
7282   dst    : S5(write);
7283   src1   : S1(read);
7284   src2   : S1(read);
7285   INS0   : ISS;
7286   NEON_FP : S5;
7287 %}
7288 
7289 pipe_class vsqrt_fp128(vecX dst, vecX src)
7290 %{
7291   single_instruction;
7292   dst    : S5(write);
7293   src    : S1(read);
7294   INS0   : ISS;
7295   NEON_FP : S5;
7296 %}
7297 
7298 pipe_class vunop_fp64(vecD dst, vecD src)
7299 %{
7300   single_instruction;
7301   dst    : S5(write);
7302   src    : S1(read);
7303   INS01  : ISS;
7304   NEON_FP : S5;
7305 %}
7306 
7307 pipe_class vunop_fp128(vecX dst, vecX src)
7308 %{
7309   single_instruction;
7310   dst    : S5(write);
7311   src    : S1(read);
7312   INS0   : ISS;
7313   NEON_FP : S5;
7314 %}
7315 
7316 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
7317 %{
7318   single_instruction;
7319   dst    : S3(write);
7320   src    : S1(read);
7321   INS01  : ISS;
7322   NEON_FP : S3;
7323 %}
7324 
7325 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
7326 %{
7327   single_instruction;
7328   dst    : S3(write);
7329   src    : S1(read);
7330   INS01  : ISS;
7331   NEON_FP : S3;
7332 %}
7333 
7334 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
7335 %{
7336   single_instruction;
7337   dst    : S3(write);
7338   src    : S1(read);
7339   INS01  : ISS;
7340   NEON_FP : S3;
7341 %}
7342 
7343 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
7344 %{
7345   single_instruction;
7346   dst    : S3(write);
7347   src    : S1(read);
7348   INS01  : ISS;
7349   NEON_FP : S3;
7350 %}
7351 
7352 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
7353 %{
7354   single_instruction;
7355   dst    : S3(write);
7356   src    : S1(read);
7357   INS01  : ISS;
7358   NEON_FP : S3;
7359 %}
7360 
7361 pipe_class vmovi_reg_imm64(vecD dst)
7362 %{
7363   single_instruction;
7364   dst    : S3(write);
7365   INS01  : ISS;
7366   NEON_FP : S3;
7367 %}
7368 
7369 pipe_class vmovi_reg_imm128(vecX dst)
7370 %{
7371   single_instruction;
7372   dst    : S3(write);
7373   INS0   : ISS;
7374   NEON_FP : S3;
7375 %}
7376 
7377 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
7378 %{
7379   single_instruction;
7380   dst    : S5(write);
7381   mem    : ISS(read);
7382   INS01  : ISS;
7383   NEON_FP : S3;
7384 %}
7385 
7386 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
7387 %{
7388   single_instruction;
7389   dst    : S5(write);
7390   mem    : ISS(read);
7391   INS01  : ISS;
7392   NEON_FP : S3;
7393 %}
7394 
7395 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
7396 %{
7397   single_instruction;
7398   mem    : ISS(read);
7399   src    : S2(read);
7400   INS01  : ISS;
7401   NEON_FP : S3;
7402 %}
7403 
7404 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
7405 %{
7406   single_instruction;
7407   mem    : ISS(read);
7408   src    : S2(read);
7409   INS01  : ISS;
7410   NEON_FP : S3;
7411 %}
7412 
7413 //------- Integer ALU operations --------------------------
7414 
7415 // Integer ALU reg-reg operation
7416 // Operands needed in EX1, result generated in EX2
7417 // Eg.  ADD     x0, x1, x2
7418 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7419 %{
7420   single_instruction;
7421   dst    : EX2(write);
7422   src1   : EX1(read);
7423   src2   : EX1(read);
7424   INS01  : ISS; // Dual issue as instruction 0 or 1
7425   ALU    : EX2;
7426 %}
7427 
7428 // Integer ALU reg-reg operation with constant shift
7429 // Shifted register must be available in LATE_ISS instead of EX1
7430 // Eg.  ADD     x0, x1, x2, LSL #2
7431 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
7432 %{
7433   single_instruction;
7434   dst    : EX2(write);
7435   src1   : EX1(read);
7436   src2   : ISS(read);
7437   INS01  : ISS;
7438   ALU    : EX2;
7439 %}
7440 
7441 // Integer ALU reg operation with constant shift
7442 // Eg.  LSL     x0, x1, #shift
7443 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
7444 %{
7445   single_instruction;
7446   dst    : EX2(write);
7447   src1   : ISS(read);
7448   INS01  : ISS;
7449   ALU    : EX2;
7450 %}
7451 
7452 // Integer ALU reg-reg operation with variable shift
7453 // Both operands must be available in LATE_ISS instead of EX1
7454 // Result is available in EX1 instead of EX2
7455 // Eg.  LSLV    x0, x1, x2
7456 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
7457 %{
7458   single_instruction;
7459   dst    : EX1(write);
7460   src1   : ISS(read);
7461   src2   : ISS(read);
7462   INS01  : ISS;
7463   ALU    : EX1;
7464 %}
7465 
7466 // Integer ALU reg-reg operation with extract
7467 // As for _vshift above, but result generated in EX2
7468 // Eg.  EXTR    x0, x1, x2, #N
7469 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
7470 %{
7471   single_instruction;
7472   dst    : EX2(write);
7473   src1   : ISS(read);
7474   src2   : ISS(read);
7475   INS1   : ISS; // Can only dual issue as Instruction 1
7476   ALU    : EX1;
7477 %}
7478 
7479 // Integer ALU reg operation
7480 // Eg.  NEG     x0, x1
7481 pipe_class ialu_reg(iRegI dst, iRegI src)
7482 %{
7483   single_instruction;
7484   dst    : EX2(write);
7485   src    : EX1(read);
7486   INS01  : ISS;
7487   ALU    : EX2;
7488 %}
7489 
7490 // Integer ALU reg mmediate operation
7491 // Eg.  ADD     x0, x1, #N
7492 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
7493 %{
7494   single_instruction;
7495   dst    : EX2(write);
7496   src1   : EX1(read);
7497   INS01  : ISS;
7498   ALU    : EX2;
7499 %}
7500 
7501 // Integer ALU immediate operation (no source operands)
7502 // Eg.  MOV     x0, #N
7503 pipe_class ialu_imm(iRegI dst)
7504 %{
7505   single_instruction;
7506   dst    : EX1(write);
7507   INS01  : ISS;
7508   ALU    : EX1;
7509 %}
7510 
7511 //------- Compare operation -------------------------------
7512 
7513 // Compare reg-reg
7514 // Eg.  CMP     x0, x1
7515 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
7516 %{
7517   single_instruction;
7518 //  fixed_latency(16);
7519   cr     : EX2(write);
7520   op1    : EX1(read);
7521   op2    : EX1(read);
7522   INS01  : ISS;
7523   ALU    : EX2;
7524 %}
7525 
7526 // Compare reg-reg
7527 // Eg.  CMP     x0, #N
7528 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
7529 %{
7530   single_instruction;
7531 //  fixed_latency(16);
7532   cr     : EX2(write);
7533   op1    : EX1(read);
7534   INS01  : ISS;
7535   ALU    : EX2;
7536 %}
7537 
7538 //------- Conditional instructions ------------------------
7539 
7540 // Conditional no operands
7541 // Eg.  CSINC   x0, zr, zr, <cond>
7542 pipe_class icond_none(iRegI dst, rFlagsReg cr)
7543 %{
7544   single_instruction;
7545   cr     : EX1(read);
7546   dst    : EX2(write);
7547   INS01  : ISS;
7548   ALU    : EX2;
7549 %}
7550 
7551 // Conditional 2 operand
7552 // EG.  CSEL    X0, X1, X2, <cond>
7553 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
7554 %{
7555   single_instruction;
7556   cr     : EX1(read);
7557   src1   : EX1(read);
7558   src2   : EX1(read);
7559   dst    : EX2(write);
7560   INS01  : ISS;
7561   ALU    : EX2;
7562 %}
7563 
7564 // Conditional 2 operand
7565 // EG.  CSEL    X0, X1, X2, <cond>
7566 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
7567 %{
7568   single_instruction;
7569   cr     : EX1(read);
7570   src    : EX1(read);
7571   dst    : EX2(write);
7572   INS01  : ISS;
7573   ALU    : EX2;
7574 %}
7575 
7576 //------- Multiply pipeline operations --------------------
7577 
7578 // Multiply reg-reg
7579 // Eg.  MUL     w0, w1, w2
7580 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7581 %{
7582   single_instruction;
7583   dst    : WR(write);
7584   src1   : ISS(read);
7585   src2   : ISS(read);
7586   INS01  : ISS;
7587   MAC    : WR;
7588 %}
7589 
7590 // Multiply accumulate
7591 // Eg.  MADD    w0, w1, w2, w3
7592 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7593 %{
7594   single_instruction;
7595   dst    : WR(write);
7596   src1   : ISS(read);
7597   src2   : ISS(read);
7598   src3   : ISS(read);
7599   INS01  : ISS;
7600   MAC    : WR;
7601 %}
7602 
7603 // Eg.  MUL     w0, w1, w2
7604 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7605 %{
7606   single_instruction;
7607   fixed_latency(3); // Maximum latency for 64 bit mul
7608   dst    : WR(write);
7609   src1   : ISS(read);
7610   src2   : ISS(read);
7611   INS01  : ISS;
7612   MAC    : WR;
7613 %}
7614 
7615 // Multiply accumulate
7616 // Eg.  MADD    w0, w1, w2, w3
7617 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7618 %{
7619   single_instruction;
7620   fixed_latency(3); // Maximum latency for 64 bit mul
7621   dst    : WR(write);
7622   src1   : ISS(read);
7623   src2   : ISS(read);
7624   src3   : ISS(read);
7625   INS01  : ISS;
7626   MAC    : WR;
7627 %}
7628 
7629 //------- Divide pipeline operations --------------------
7630 
7631 // Eg.  SDIV    w0, w1, w2
7632 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7633 %{
7634   single_instruction;
7635   fixed_latency(8); // Maximum latency for 32 bit divide
7636   dst    : WR(write);
7637   src1   : ISS(read);
7638   src2   : ISS(read);
7639   INS0   : ISS; // Can only dual issue as instruction 0
7640   DIV    : WR;
7641 %}
7642 
7643 // Eg.  SDIV    x0, x1, x2
7644 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7645 %{
7646   single_instruction;
7647   fixed_latency(16); // Maximum latency for 64 bit divide
7648   dst    : WR(write);
7649   src1   : ISS(read);
7650   src2   : ISS(read);
7651   INS0   : ISS; // Can only dual issue as instruction 0
7652   DIV    : WR;
7653 %}
7654 
7655 //------- Load pipeline operations ------------------------
7656 
7657 // Load - prefetch
7658 // Eg.  PFRM    <mem>
7659 pipe_class iload_prefetch(memory mem)
7660 %{
7661   single_instruction;
7662   mem    : ISS(read);
7663   INS01  : ISS;
7664   LDST   : WR;
7665 %}
7666 
7667 // Load - reg, mem
7668 // Eg.  LDR     x0, <mem>
7669 pipe_class iload_reg_mem(iRegI dst, memory mem)
7670 %{
7671   single_instruction;
7672   dst    : WR(write);
7673   mem    : ISS(read);
7674   INS01  : ISS;
7675   LDST   : WR;
7676 %}
7677 
7678 // Load - reg, reg
7679 // Eg.  LDR     x0, [sp, x1]
7680 pipe_class iload_reg_reg(iRegI dst, iRegI src)
7681 %{
7682   single_instruction;
7683   dst    : WR(write);
7684   src    : ISS(read);
7685   INS01  : ISS;
7686   LDST   : WR;
7687 %}
7688 
7689 //------- Store pipeline operations -----------------------
7690 
7691 // Store - zr, mem
7692 // Eg.  STR     zr, <mem>
7693 pipe_class istore_mem(memory mem)
7694 %{
7695   single_instruction;
7696   mem    : ISS(read);
7697   INS01  : ISS;
7698   LDST   : WR;
7699 %}
7700 
7701 // Store - reg, mem
7702 // Eg.  STR     x0, <mem>
7703 pipe_class istore_reg_mem(iRegI src, memory mem)
7704 %{
7705   single_instruction;
7706   mem    : ISS(read);
7707   src    : EX2(read);
7708   INS01  : ISS;
7709   LDST   : WR;
7710 %}
7711 
7712 // Store - reg, reg
7713 // Eg. STR      x0, [sp, x1]
7714 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7715 %{
7716   single_instruction;
7717   dst    : ISS(read);
7718   src    : EX2(read);
7719   INS01  : ISS;
7720   LDST   : WR;
7721 %}
7722 
7723 //------- Store pipeline operations -----------------------
7724 
7725 // Branch
7726 pipe_class pipe_branch()
7727 %{
7728   single_instruction;
7729   INS01  : ISS;
7730   BRANCH : EX1;
7731 %}
7732 
7733 // Conditional branch
7734 pipe_class pipe_branch_cond(rFlagsReg cr)
7735 %{
7736   single_instruction;
7737   cr     : EX1(read);
7738   INS01  : ISS;
7739   BRANCH : EX1;
7740 %}
7741 
7742 // Compare & Branch
7743 // EG.  CBZ/CBNZ
7744 pipe_class pipe_cmp_branch(iRegI op1)
7745 %{
7746   single_instruction;
7747   op1    : EX1(read);
7748   INS01  : ISS;
7749   BRANCH : EX1;
7750 %}
7751 
7752 //------- Synchronisation operations ----------------------
7753 
7754 // Any operation requiring serialization.
7755 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7756 pipe_class pipe_serial()
7757 %{
7758   single_instruction;
7759   force_serialization;
7760   fixed_latency(16);
7761   INS01  : ISS(2); // Cannot dual issue with any other instruction
7762   LDST   : WR;
7763 %}
7764 
7765 // Generic big/slow expanded idiom - also serialized
7766 pipe_class pipe_slow()
7767 %{
7768   instruction_count(10);
7769   multiple_bundles;
7770   force_serialization;
7771   fixed_latency(16);
7772   INS01  : ISS(2); // Cannot dual issue with any other instruction
7773   LDST   : WR;
7774 %}
7775 
7776 // Empty pipeline class
7777 pipe_class pipe_class_empty()
7778 %{
7779   single_instruction;
7780   fixed_latency(0);
7781 %}
7782 
7783 // Default pipeline class.
7784 pipe_class pipe_class_default()
7785 %{
7786   single_instruction;
7787   fixed_latency(2);
7788 %}
7789 
7790 // Pipeline class for compares.
7791 pipe_class pipe_class_compare()
7792 %{
7793   single_instruction;
7794   fixed_latency(16);
7795 %}
7796 
7797 // Pipeline class for memory operations.
7798 pipe_class pipe_class_memory()
7799 %{
7800   single_instruction;
7801   fixed_latency(16);
7802 %}
7803 
7804 // Pipeline class for call.
7805 pipe_class pipe_class_call()
7806 %{
7807   single_instruction;
7808   fixed_latency(100);
7809 %}
7810 
7811 // Define the class for the Nop node.
7812 define %{
7813    MachNop = pipe_class_empty;
7814 %}
7815 
7816 %}
7817 //----------INSTRUCTIONS-------------------------------------------------------
7818 //
7819 // match      -- States which machine-independent subtree may be replaced
7820 //               by this instruction.
7821 // ins_cost   -- The estimated cost of this instruction is used by instruction
7822 //               selection to identify a minimum cost tree of machine
7823 //               instructions that matches a tree of machine-independent
7824 //               instructions.
7825 // format     -- A string providing the disassembly for this instruction.
7826 //               The value of an instruction's operand may be inserted
7827 //               by referring to it with a '$' prefix.
7828 // opcode     -- Three instruction opcodes may be provided.  These are referred
7829 //               to within an encode class as $primary, $secondary, and $tertiary
7830 //               rrspectively.  The primary opcode is commonly used to
7831 //               indicate the type of machine instruction, while secondary
7832 //               and tertiary are often used for prefix options or addressing
7833 //               modes.
7834 // ins_encode -- A list of encode classes with parameters. The encode class
7835 //               name must have been defined in an 'enc_class' specification
7836 //               in the encode section of the architecture description.
7837 
7838 // ============================================================================
7839 // Memory (Load/Store) Instructions
7840 
7841 // Load Instructions
7842 
7843 // Load Byte (8 bit signed)
7844 instruct loadB(iRegINoSp dst, memory mem)
7845 %{
7846   match(Set dst (LoadB mem));
7847   predicate(!needs_acquiring_load(n));
7848 
7849   ins_cost(4 * INSN_COST);
7850   format %{ "ldrsbw  $dst, $mem\t# byte" %}
7851 
7852   ins_encode(aarch64_enc_ldrsbw(dst, mem));
7853 
7854   ins_pipe(iload_reg_mem);
7855 %}
7856 
7857 // Load Byte (8 bit signed) into long
7858 instruct loadB2L(iRegLNoSp dst, memory mem)
7859 %{
7860   match(Set dst (ConvI2L (LoadB mem)));
7861   predicate(!needs_acquiring_load(n->in(1)));
7862 
7863   ins_cost(4 * INSN_COST);
7864   format %{ "ldrsb  $dst, $mem\t# byte" %}
7865 
7866   ins_encode(aarch64_enc_ldrsb(dst, mem));
7867 
7868   ins_pipe(iload_reg_mem);
7869 %}
7870 
7871 // Load Byte (8 bit unsigned)
7872 instruct loadUB(iRegINoSp dst, memory mem)
7873 %{
7874   match(Set dst (LoadUB mem));
7875   predicate(!needs_acquiring_load(n));
7876 
7877   ins_cost(4 * INSN_COST);
7878   format %{ "ldrbw  $dst, $mem\t# byte" %}
7879 
7880   ins_encode(aarch64_enc_ldrb(dst, mem));
7881 
7882   ins_pipe(iload_reg_mem);
7883 %}
7884 
7885 // Load Byte (8 bit unsigned) into long
7886 instruct loadUB2L(iRegLNoSp dst, memory mem)
7887 %{
7888   match(Set dst (ConvI2L (LoadUB mem)));
7889   predicate(!needs_acquiring_load(n->in(1)));
7890 
7891   ins_cost(4 * INSN_COST);
7892   format %{ "ldrb  $dst, $mem\t# byte" %}
7893 
7894   ins_encode(aarch64_enc_ldrb(dst, mem));
7895 
7896   ins_pipe(iload_reg_mem);
7897 %}
7898 
7899 // Load Short (16 bit signed)
7900 instruct loadS(iRegINoSp dst, memory mem)
7901 %{
7902   match(Set dst (LoadS mem));
7903   predicate(!needs_acquiring_load(n));
7904 
7905   ins_cost(4 * INSN_COST);
7906   format %{ "ldrshw  $dst, $mem\t# short" %}
7907 
7908   ins_encode(aarch64_enc_ldrshw(dst, mem));
7909 
7910   ins_pipe(iload_reg_mem);
7911 %}
7912 
7913 // Load Short (16 bit signed) into long
7914 instruct loadS2L(iRegLNoSp dst, memory mem)
7915 %{
7916   match(Set dst (ConvI2L (LoadS mem)));
7917   predicate(!needs_acquiring_load(n->in(1)));
7918 
7919   ins_cost(4 * INSN_COST);
7920   format %{ "ldrsh  $dst, $mem\t# short" %}
7921 
7922   ins_encode(aarch64_enc_ldrsh(dst, mem));
7923 
7924   ins_pipe(iload_reg_mem);
7925 %}
7926 
7927 // Load Char (16 bit unsigned)
7928 instruct loadUS(iRegINoSp dst, memory mem)
7929 %{
7930   match(Set dst (LoadUS mem));
7931   predicate(!needs_acquiring_load(n));
7932 
7933   ins_cost(4 * INSN_COST);
7934   format %{ "ldrh  $dst, $mem\t# short" %}
7935 
7936   ins_encode(aarch64_enc_ldrh(dst, mem));
7937 
7938   ins_pipe(iload_reg_mem);
7939 %}
7940 
7941 // Load Short/Char (16 bit unsigned) into long
7942 instruct loadUS2L(iRegLNoSp dst, memory mem)
7943 %{
7944   match(Set dst (ConvI2L (LoadUS mem)));
7945   predicate(!needs_acquiring_load(n->in(1)));
7946 
7947   ins_cost(4 * INSN_COST);
7948   format %{ "ldrh  $dst, $mem\t# short" %}
7949 
7950   ins_encode(aarch64_enc_ldrh(dst, mem));
7951 
7952   ins_pipe(iload_reg_mem);
7953 %}
7954 
7955 // Load Integer (32 bit signed)
7956 instruct loadI(iRegINoSp dst, memory mem)
7957 %{
7958   match(Set dst (LoadI mem));
7959   predicate(!needs_acquiring_load(n));
7960 
7961   ins_cost(4 * INSN_COST);
7962   format %{ "ldrw  $dst, $mem\t# int" %}
7963 
7964   ins_encode(aarch64_enc_ldrw(dst, mem));
7965 
7966   ins_pipe(iload_reg_mem);
7967 %}
7968 
7969 // Load Integer (32 bit signed) into long
7970 instruct loadI2L(iRegLNoSp dst, memory mem)
7971 %{
7972   match(Set dst (ConvI2L (LoadI mem)));
7973   predicate(!needs_acquiring_load(n->in(1)));
7974 
7975   ins_cost(4 * INSN_COST);
7976   format %{ "ldrsw  $dst, $mem\t# int" %}
7977 
7978   ins_encode(aarch64_enc_ldrsw(dst, mem));
7979 
7980   ins_pipe(iload_reg_mem);
7981 %}
7982 
7983 // Load Integer (32 bit unsigned) into long
7984 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
7985 %{
7986   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7987   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
7988 
7989   ins_cost(4 * INSN_COST);
7990   format %{ "ldrw  $dst, $mem\t# int" %}
7991 
7992   ins_encode(aarch64_enc_ldrw(dst, mem));
7993 
7994   ins_pipe(iload_reg_mem);
7995 %}
7996 
7997 // Load Long (64 bit signed)
7998 instruct loadL(iRegLNoSp dst, memory mem)
7999 %{
8000   match(Set dst (LoadL mem));
8001   predicate(!needs_acquiring_load(n));
8002 
8003   ins_cost(4 * INSN_COST);
8004   format %{ "ldr  $dst, $mem\t# int" %}
8005 
8006   ins_encode(aarch64_enc_ldr(dst, mem));
8007 
8008   ins_pipe(iload_reg_mem);
8009 %}
8010 
8011 // Load Range
8012 instruct loadRange(iRegINoSp dst, memory mem)
8013 %{
8014   match(Set dst (LoadRange mem));
8015 
8016   ins_cost(4 * INSN_COST);
8017   format %{ "ldrw  $dst, $mem\t# range" %}
8018 
8019   ins_encode(aarch64_enc_ldrw(dst, mem));
8020 
8021   ins_pipe(iload_reg_mem);
8022 %}
8023 
8024 // Load Pointer
8025 instruct loadP(iRegPNoSp dst, memory mem)
8026 %{
8027   match(Set dst (LoadP mem));
8028   predicate(!needs_acquiring_load(n));
8029 
8030   ins_cost(4 * INSN_COST);
8031   format %{ "ldr  $dst, $mem\t# ptr" %}
8032 
8033   ins_encode(aarch64_enc_ldr(dst, mem));
8034 
8035   ins_pipe(iload_reg_mem);
8036 %}
8037 
8038 // Load Compressed Pointer
8039 instruct loadN(iRegNNoSp dst, memory mem)
8040 %{
8041   match(Set dst (LoadN mem));
8042   predicate(!needs_acquiring_load(n));
8043 
8044   ins_cost(4 * INSN_COST);
8045   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
8046 
8047   ins_encode(aarch64_enc_ldrw(dst, mem));
8048 
8049   ins_pipe(iload_reg_mem);
8050 %}
8051 
8052 // Load Klass Pointer
8053 instruct loadKlass(iRegPNoSp dst, memory mem)
8054 %{
8055   match(Set dst (LoadKlass mem));
8056   predicate(!needs_acquiring_load(n));
8057 
8058   ins_cost(4 * INSN_COST);
8059   format %{ "ldr  $dst, $mem\t# class" %}
8060 
8061   ins_encode(aarch64_enc_ldr(dst, mem));
8062 
8063   ins_pipe(iload_reg_mem);
8064 %}
8065 
8066 // Load Narrow Klass Pointer
8067 instruct loadNKlass(iRegNNoSp dst, memory mem)
8068 %{
8069   match(Set dst (LoadNKlass mem));
8070   predicate(!needs_acquiring_load(n));
8071 
8072   ins_cost(4 * INSN_COST);
8073   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
8074 
8075   ins_encode(aarch64_enc_ldrw(dst, mem));
8076 
8077   ins_pipe(iload_reg_mem);
8078 %}
8079 
8080 // Load Float
8081 instruct loadF(vRegF dst, memory mem)
8082 %{
8083   match(Set dst (LoadF mem));
8084   predicate(!needs_acquiring_load(n));
8085 
8086   ins_cost(4 * INSN_COST);
8087   format %{ "ldrs  $dst, $mem\t# float" %}
8088 
8089   ins_encode( aarch64_enc_ldrs(dst, mem) );
8090 
8091   ins_pipe(pipe_class_memory);
8092 %}
8093 
8094 // Load Double
8095 instruct loadD(vRegD dst, memory mem)
8096 %{
8097   match(Set dst (LoadD mem));
8098   predicate(!needs_acquiring_load(n));
8099 
8100   ins_cost(4 * INSN_COST);
8101   format %{ "ldrd  $dst, $mem\t# double" %}
8102 
8103   ins_encode( aarch64_enc_ldrd(dst, mem) );
8104 
8105   ins_pipe(pipe_class_memory);
8106 %}
8107 
8108 
8109 // Load Int Constant
8110 instruct loadConI(iRegINoSp dst, immI src)
8111 %{
8112   match(Set dst src);
8113 
8114   ins_cost(INSN_COST);
8115   format %{ "mov $dst, $src\t# int" %}
8116 
8117   ins_encode( aarch64_enc_movw_imm(dst, src) );
8118 
8119   ins_pipe(ialu_imm);
8120 %}
8121 
8122 // Load Long Constant
8123 instruct loadConL(iRegLNoSp dst, immL src)
8124 %{
8125   match(Set dst src);
8126 
8127   ins_cost(INSN_COST);
8128   format %{ "mov $dst, $src\t# long" %}
8129 
8130   ins_encode( aarch64_enc_mov_imm(dst, src) );
8131 
8132   ins_pipe(ialu_imm);
8133 %}
8134 
8135 // Load Pointer Constant
8136 
8137 instruct loadConP(iRegPNoSp dst, immP con)
8138 %{
8139   match(Set dst con);
8140 
8141   ins_cost(INSN_COST * 4);
8142   format %{
8143     "mov  $dst, $con\t# ptr\n\t"
8144   %}
8145 
8146   ins_encode(aarch64_enc_mov_p(dst, con));
8147 
8148   ins_pipe(ialu_imm);
8149 %}
8150 
8151 // Load Null Pointer Constant
8152 
8153 instruct loadConP0(iRegPNoSp dst, immP0 con)
8154 %{
8155   match(Set dst con);
8156 
8157   ins_cost(INSN_COST);
8158   format %{ "mov  $dst, $con\t# NULL ptr" %}
8159 
8160   ins_encode(aarch64_enc_mov_p0(dst, con));
8161 
8162   ins_pipe(ialu_imm);
8163 %}
8164 
8165 // Load Pointer Constant One
8166 
8167 instruct loadConP1(iRegPNoSp dst, immP_1 con)
8168 %{
8169   match(Set dst con);
8170 
8171   ins_cost(INSN_COST);
8172   format %{ "mov  $dst, $con\t# NULL ptr" %}
8173 
8174   ins_encode(aarch64_enc_mov_p1(dst, con));
8175 
8176   ins_pipe(ialu_imm);
8177 %}
8178 
8179 // Load Poll Page Constant
8180 
8181 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
8182 %{
8183   match(Set dst con);
8184 
8185   ins_cost(INSN_COST);
8186   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
8187 
8188   ins_encode(aarch64_enc_mov_poll_page(dst, con));
8189 
8190   ins_pipe(ialu_imm);
8191 %}
8192 
8193 // Load Byte Map Base Constant
8194 
8195 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
8196 %{
8197   match(Set dst con);
8198 
8199   ins_cost(INSN_COST);
8200   format %{ "adr  $dst, $con\t# Byte Map Base" %}
8201 
8202   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
8203 
8204   ins_pipe(ialu_imm);
8205 %}
8206 
8207 // Load Narrow Pointer Constant
8208 
8209 instruct loadConN(iRegNNoSp dst, immN con)
8210 %{
8211   match(Set dst con);
8212 
8213   ins_cost(INSN_COST * 4);
8214   format %{ "mov  $dst, $con\t# compressed ptr" %}
8215 
8216   ins_encode(aarch64_enc_mov_n(dst, con));
8217 
8218   ins_pipe(ialu_imm);
8219 %}
8220 
8221 // Load Narrow Null Pointer Constant
8222 
8223 instruct loadConN0(iRegNNoSp dst, immN0 con)
8224 %{
8225   match(Set dst con);
8226 
8227   ins_cost(INSN_COST);
8228   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
8229 
8230   ins_encode(aarch64_enc_mov_n0(dst, con));
8231 
8232   ins_pipe(ialu_imm);
8233 %}
8234 
8235 // Load Narrow Klass Constant
8236 
8237 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
8238 %{
8239   match(Set dst con);
8240 
8241   ins_cost(INSN_COST);
8242   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
8243 
8244   ins_encode(aarch64_enc_mov_nk(dst, con));
8245 
8246   ins_pipe(ialu_imm);
8247 %}
8248 
8249 // Load Packed Float Constant
8250 
8251 instruct loadConF_packed(vRegF dst, immFPacked con) %{
8252   match(Set dst con);
8253   ins_cost(INSN_COST * 4);
8254   format %{ "fmovs  $dst, $con"%}
8255   ins_encode %{
8256     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
8257   %}
8258 
8259   ins_pipe(fp_imm_s);
8260 %}
8261 
8262 // Load Float Constant
8263 
8264 instruct loadConF(vRegF dst, immF con) %{
8265   match(Set dst con);
8266 
8267   ins_cost(INSN_COST * 4);
8268 
8269   format %{
8270     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8271   %}
8272 
8273   ins_encode %{
8274     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
8275   %}
8276 
8277   ins_pipe(fp_load_constant_s);
8278 %}
8279 
8280 // Load Packed Double Constant
8281 
8282 instruct loadConD_packed(vRegD dst, immDPacked con) %{
8283   match(Set dst con);
8284   ins_cost(INSN_COST);
8285   format %{ "fmovd  $dst, $con"%}
8286   ins_encode %{
8287     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
8288   %}
8289 
8290   ins_pipe(fp_imm_d);
8291 %}
8292 
8293 // Load Double Constant
8294 
8295 instruct loadConD(vRegD dst, immD con) %{
8296   match(Set dst con);
8297 
8298   ins_cost(INSN_COST * 5);
8299   format %{
8300     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8301   %}
8302 
8303   ins_encode %{
8304     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
8305   %}
8306 
8307   ins_pipe(fp_load_constant_d);
8308 %}
8309 
8310 // Store Instructions
8311 
8312 // Store CMS card-mark Immediate
8313 instruct storeimmCM0(immI0 zero, memory mem)
8314 %{
8315   match(Set mem (StoreCM mem zero));
8316   predicate(unnecessary_storestore(n));
8317 
8318   ins_cost(INSN_COST);
8319   format %{ "storestore (elided)\n\t"
8320             "strb zr, $mem\t# byte" %}
8321 
8322   ins_encode(aarch64_enc_strb0(mem));
8323 
8324   ins_pipe(istore_mem);
8325 %}
8326 
8327 // Store CMS card-mark Immediate with intervening StoreStore
8328 // needed when using CMS with no conditional card marking
8329 instruct storeimmCM0_ordered(immI0 zero, memory mem)
8330 %{
8331   match(Set mem (StoreCM mem zero));
8332 
8333   ins_cost(INSN_COST * 2);
8334   format %{ "storestore\n\t"
8335             "dmb ishst"
8336             "\n\tstrb zr, $mem\t# byte" %}
8337 
8338   ins_encode(aarch64_enc_strb0_ordered(mem));
8339 
8340   ins_pipe(istore_mem);
8341 %}
8342 
8343 // Store Byte
8344 instruct storeB(iRegIorL2I src, memory mem)
8345 %{
8346   match(Set mem (StoreB mem src));
8347   predicate(!needs_releasing_store(n));
8348 
8349   ins_cost(INSN_COST);
8350   format %{ "strb  $src, $mem\t# byte" %}
8351 
8352   ins_encode(aarch64_enc_strb(src, mem));
8353 
8354   ins_pipe(istore_reg_mem);
8355 %}
8356 
8357 
8358 instruct storeimmB0(immI0 zero, memory mem)
8359 %{
8360   match(Set mem (StoreB mem zero));
8361   predicate(!needs_releasing_store(n));
8362 
8363   ins_cost(INSN_COST);
8364   format %{ "strb rscractch2, $mem\t# byte" %}
8365 
8366   ins_encode(aarch64_enc_strb0(mem));
8367 
8368   ins_pipe(istore_mem);
8369 %}
8370 
8371 // Store Char/Short
8372 instruct storeC(iRegIorL2I src, memory mem)
8373 %{
8374   match(Set mem (StoreC mem src));
8375   predicate(!needs_releasing_store(n));
8376 
8377   ins_cost(INSN_COST);
8378   format %{ "strh  $src, $mem\t# short" %}
8379 
8380   ins_encode(aarch64_enc_strh(src, mem));
8381 
8382   ins_pipe(istore_reg_mem);
8383 %}
8384 
8385 instruct storeimmC0(immI0 zero, memory mem)
8386 %{
8387   match(Set mem (StoreC mem zero));
8388   predicate(!needs_releasing_store(n));
8389 
8390   ins_cost(INSN_COST);
8391   format %{ "strh  zr, $mem\t# short" %}
8392 
8393   ins_encode(aarch64_enc_strh0(mem));
8394 
8395   ins_pipe(istore_mem);
8396 %}
8397 
8398 // Store Integer
8399 
8400 instruct storeI(iRegIorL2I src, memory mem)
8401 %{
8402   match(Set mem(StoreI mem src));
8403   predicate(!needs_releasing_store(n));
8404 
8405   ins_cost(INSN_COST);
8406   format %{ "strw  $src, $mem\t# int" %}
8407 
8408   ins_encode(aarch64_enc_strw(src, mem));
8409 
8410   ins_pipe(istore_reg_mem);
8411 %}
8412 
8413 instruct storeimmI0(immI0 zero, memory mem)
8414 %{
8415   match(Set mem(StoreI mem zero));
8416   predicate(!needs_releasing_store(n));
8417 
8418   ins_cost(INSN_COST);
8419   format %{ "strw  zr, $mem\t# int" %}
8420 
8421   ins_encode(aarch64_enc_strw0(mem));
8422 
8423   ins_pipe(istore_mem);
8424 %}
8425 
8426 // Store Long (64 bit signed)
8427 instruct storeL(iRegL src, memory mem)
8428 %{
8429   match(Set mem (StoreL mem src));
8430   predicate(!needs_releasing_store(n));
8431 
8432   ins_cost(INSN_COST);
8433   format %{ "str  $src, $mem\t# int" %}
8434 
8435   ins_encode(aarch64_enc_str(src, mem));
8436 
8437   ins_pipe(istore_reg_mem);
8438 %}
8439 
8440 // Store Long (64 bit signed)
8441 instruct storeimmL0(immL0 zero, memory mem)
8442 %{
8443   match(Set mem (StoreL mem zero));
8444   predicate(!needs_releasing_store(n));
8445 
8446   ins_cost(INSN_COST);
8447   format %{ "str  zr, $mem\t# int" %}
8448 
8449   ins_encode(aarch64_enc_str0(mem));
8450 
8451   ins_pipe(istore_mem);
8452 %}
8453 
8454 // Store Pointer
8455 instruct storeP(iRegP src, memory mem)
8456 %{
8457   match(Set mem (StoreP mem src));
8458   predicate(!needs_releasing_store(n));
8459 
8460   ins_cost(INSN_COST);
8461   format %{ "str  $src, $mem\t# ptr" %}
8462 
8463   ins_encode(aarch64_enc_str(src, mem));
8464 
8465   ins_pipe(istore_reg_mem);
8466 %}
8467 
8468 // Store Pointer
8469 instruct storeimmP0(immP0 zero, memory mem)
8470 %{
8471   match(Set mem (StoreP mem zero));
8472   predicate(!needs_releasing_store(n));
8473 
8474   ins_cost(INSN_COST);
8475   format %{ "str zr, $mem\t# ptr" %}
8476 
8477   ins_encode(aarch64_enc_str0(mem));
8478 
8479   ins_pipe(istore_mem);
8480 %}
8481 
8482 // Store Compressed Pointer
8483 instruct storeN(iRegN src, memory mem)
8484 %{
8485   match(Set mem (StoreN mem src));
8486   predicate(!needs_releasing_store(n));
8487 
8488   ins_cost(INSN_COST);
8489   format %{ "strw  $src, $mem\t# compressed ptr" %}
8490 
8491   ins_encode(aarch64_enc_strw(src, mem));
8492 
8493   ins_pipe(istore_reg_mem);
8494 %}
8495 
8496 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
8497 %{
8498   match(Set mem (StoreN mem zero));
8499   predicate(Universe::narrow_oop_base() == NULL &&
8500             Universe::narrow_klass_base() == NULL &&
8501             (!needs_releasing_store(n)));
8502 
8503   ins_cost(INSN_COST);
8504   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
8505 
8506   ins_encode(aarch64_enc_strw(heapbase, mem));
8507 
8508   ins_pipe(istore_reg_mem);
8509 %}
8510 
8511 // Store Float
8512 instruct storeF(vRegF src, memory mem)
8513 %{
8514   match(Set mem (StoreF mem src));
8515   predicate(!needs_releasing_store(n));
8516 
8517   ins_cost(INSN_COST);
8518   format %{ "strs  $src, $mem\t# float" %}
8519 
8520   ins_encode( aarch64_enc_strs(src, mem) );
8521 
8522   ins_pipe(pipe_class_memory);
8523 %}
8524 
8525 // TODO
8526 // implement storeImmF0 and storeFImmPacked
8527 
8528 // Store Double
8529 instruct storeD(vRegD src, memory mem)
8530 %{
8531   match(Set mem (StoreD mem src));
8532   predicate(!needs_releasing_store(n));
8533 
8534   ins_cost(INSN_COST);
8535   format %{ "strd  $src, $mem\t# double" %}
8536 
8537   ins_encode( aarch64_enc_strd(src, mem) );
8538 
8539   ins_pipe(pipe_class_memory);
8540 %}
8541 
8542 // Store Compressed Klass Pointer
8543 instruct storeNKlass(iRegN src, memory mem)
8544 %{
8545   predicate(!needs_releasing_store(n));
8546   match(Set mem (StoreNKlass mem src));
8547 
8548   ins_cost(INSN_COST);
8549   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
8550 
8551   ins_encode(aarch64_enc_strw(src, mem));
8552 
8553   ins_pipe(istore_reg_mem);
8554 %}
8555 
8556 // TODO
8557 // implement storeImmD0 and storeDImmPacked
8558 
8559 // prefetch instructions
8560 // Must be safe to execute with invalid address (cannot fault).
8561 
8562 instruct prefetchalloc( memory mem ) %{
8563   match(PrefetchAllocation mem);
8564 
8565   ins_cost(INSN_COST);
8566   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
8567 
8568   ins_encode( aarch64_enc_prefetchw(mem) );
8569 
8570   ins_pipe(iload_prefetch);
8571 %}
8572 
8573 //  ---------------- volatile loads and stores ----------------
8574 
8575 // Load Byte (8 bit signed)
8576 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8577 %{
8578   match(Set dst (LoadB mem));
8579 
8580   ins_cost(VOLATILE_REF_COST);
8581   format %{ "ldarsb  $dst, $mem\t# byte" %}
8582 
8583   ins_encode(aarch64_enc_ldarsb(dst, mem));
8584 
8585   ins_pipe(pipe_serial);
8586 %}
8587 
8588 // Load Byte (8 bit signed) into long
8589 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8590 %{
8591   match(Set dst (ConvI2L (LoadB mem)));
8592 
8593   ins_cost(VOLATILE_REF_COST);
8594   format %{ "ldarsb  $dst, $mem\t# byte" %}
8595 
8596   ins_encode(aarch64_enc_ldarsb(dst, mem));
8597 
8598   ins_pipe(pipe_serial);
8599 %}
8600 
8601 // Load Byte (8 bit unsigned)
8602 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8603 %{
8604   match(Set dst (LoadUB mem));
8605 
8606   ins_cost(VOLATILE_REF_COST);
8607   format %{ "ldarb  $dst, $mem\t# byte" %}
8608 
8609   ins_encode(aarch64_enc_ldarb(dst, mem));
8610 
8611   ins_pipe(pipe_serial);
8612 %}
8613 
8614 // Load Byte (8 bit unsigned) into long
8615 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8616 %{
8617   match(Set dst (ConvI2L (LoadUB mem)));
8618 
8619   ins_cost(VOLATILE_REF_COST);
8620   format %{ "ldarb  $dst, $mem\t# byte" %}
8621 
8622   ins_encode(aarch64_enc_ldarb(dst, mem));
8623 
8624   ins_pipe(pipe_serial);
8625 %}
8626 
8627 // Load Short (16 bit signed)
8628 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8629 %{
8630   match(Set dst (LoadS mem));
8631 
8632   ins_cost(VOLATILE_REF_COST);
8633   format %{ "ldarshw  $dst, $mem\t# short" %}
8634 
8635   ins_encode(aarch64_enc_ldarshw(dst, mem));
8636 
8637   ins_pipe(pipe_serial);
8638 %}
8639 
8640 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8641 %{
8642   match(Set dst (LoadUS mem));
8643 
8644   ins_cost(VOLATILE_REF_COST);
8645   format %{ "ldarhw  $dst, $mem\t# short" %}
8646 
8647   ins_encode(aarch64_enc_ldarhw(dst, mem));
8648 
8649   ins_pipe(pipe_serial);
8650 %}
8651 
8652 // Load Short/Char (16 bit unsigned) into long
8653 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8654 %{
8655   match(Set dst (ConvI2L (LoadUS mem)));
8656 
8657   ins_cost(VOLATILE_REF_COST);
8658   format %{ "ldarh  $dst, $mem\t# short" %}
8659 
8660   ins_encode(aarch64_enc_ldarh(dst, mem));
8661 
8662   ins_pipe(pipe_serial);
8663 %}
8664 
8665 // Load Short/Char (16 bit signed) into long
8666 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8667 %{
8668   match(Set dst (ConvI2L (LoadS mem)));
8669 
8670   ins_cost(VOLATILE_REF_COST);
8671   format %{ "ldarh  $dst, $mem\t# short" %}
8672 
8673   ins_encode(aarch64_enc_ldarsh(dst, mem));
8674 
8675   ins_pipe(pipe_serial);
8676 %}
8677 
8678 // Load Integer (32 bit signed)
8679 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8680 %{
8681   match(Set dst (LoadI mem));
8682 
8683   ins_cost(VOLATILE_REF_COST);
8684   format %{ "ldarw  $dst, $mem\t# int" %}
8685 
8686   ins_encode(aarch64_enc_ldarw(dst, mem));
8687 
8688   ins_pipe(pipe_serial);
8689 %}
8690 
8691 // Load Integer (32 bit unsigned) into long
8692 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
8693 %{
8694   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8695 
8696   ins_cost(VOLATILE_REF_COST);
8697   format %{ "ldarw  $dst, $mem\t# int" %}
8698 
8699   ins_encode(aarch64_enc_ldarw(dst, mem));
8700 
8701   ins_pipe(pipe_serial);
8702 %}
8703 
8704 // Load Long (64 bit signed)
8705 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8706 %{
8707   match(Set dst (LoadL mem));
8708 
8709   ins_cost(VOLATILE_REF_COST);
8710   format %{ "ldar  $dst, $mem\t# int" %}
8711 
8712   ins_encode(aarch64_enc_ldar(dst, mem));
8713 
8714   ins_pipe(pipe_serial);
8715 %}
8716 
8717 // Load Pointer
8718 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8719 %{
8720   match(Set dst (LoadP mem));
8721 
8722   ins_cost(VOLATILE_REF_COST);
8723   format %{ "ldar  $dst, $mem\t# ptr" %}
8724 
8725   ins_encode(aarch64_enc_ldar(dst, mem));
8726 
8727   ins_pipe(pipe_serial);
8728 %}
8729 
8730 // Load Compressed Pointer
8731 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8732 %{
8733   match(Set dst (LoadN mem));
8734 
8735   ins_cost(VOLATILE_REF_COST);
8736   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8737 
8738   ins_encode(aarch64_enc_ldarw(dst, mem));
8739 
8740   ins_pipe(pipe_serial);
8741 %}
8742 
8743 // Load Float
8744 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8745 %{
8746   match(Set dst (LoadF mem));
8747 
8748   ins_cost(VOLATILE_REF_COST);
8749   format %{ "ldars  $dst, $mem\t# float" %}
8750 
8751   ins_encode( aarch64_enc_fldars(dst, mem) );
8752 
8753   ins_pipe(pipe_serial);
8754 %}
8755 
8756 // Load Double
8757 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8758 %{
8759   match(Set dst (LoadD mem));
8760 
8761   ins_cost(VOLATILE_REF_COST);
8762   format %{ "ldard  $dst, $mem\t# double" %}
8763 
8764   ins_encode( aarch64_enc_fldard(dst, mem) );
8765 
8766   ins_pipe(pipe_serial);
8767 %}
8768 
8769 // Store Byte
8770 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8771 %{
8772   match(Set mem (StoreB mem src));
8773 
8774   ins_cost(VOLATILE_REF_COST);
8775   format %{ "stlrb  $src, $mem\t# byte" %}
8776 
8777   ins_encode(aarch64_enc_stlrb(src, mem));
8778 
8779   ins_pipe(pipe_class_memory);
8780 %}
8781 
8782 // Store Char/Short
8783 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8784 %{
8785   match(Set mem (StoreC mem src));
8786 
8787   ins_cost(VOLATILE_REF_COST);
8788   format %{ "stlrh  $src, $mem\t# short" %}
8789 
8790   ins_encode(aarch64_enc_stlrh(src, mem));
8791 
8792   ins_pipe(pipe_class_memory);
8793 %}
8794 
8795 // Store Integer
8796 
8797 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8798 %{
8799   match(Set mem(StoreI mem src));
8800 
8801   ins_cost(VOLATILE_REF_COST);
8802   format %{ "stlrw  $src, $mem\t# int" %}
8803 
8804   ins_encode(aarch64_enc_stlrw(src, mem));
8805 
8806   ins_pipe(pipe_class_memory);
8807 %}
8808 
8809 // Store Long (64 bit signed)
8810 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8811 %{
8812   match(Set mem (StoreL mem src));
8813 
8814   ins_cost(VOLATILE_REF_COST);
8815   format %{ "stlr  $src, $mem\t# int" %}
8816 
8817   ins_encode(aarch64_enc_stlr(src, mem));
8818 
8819   ins_pipe(pipe_class_memory);
8820 %}
8821 
8822 // Store Pointer
8823 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8824 %{
8825   match(Set mem (StoreP mem src));
8826 
8827   ins_cost(VOLATILE_REF_COST);
8828   format %{ "stlr  $src, $mem\t# ptr" %}
8829 
8830   ins_encode(aarch64_enc_stlr(src, mem));
8831 
8832   ins_pipe(pipe_class_memory);
8833 %}
8834 
8835 // Store Compressed Pointer
8836 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8837 %{
8838   match(Set mem (StoreN mem src));
8839 
8840   ins_cost(VOLATILE_REF_COST);
8841   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
8842 
8843   ins_encode(aarch64_enc_stlrw(src, mem));
8844 
8845   ins_pipe(pipe_class_memory);
8846 %}
8847 
8848 // Store Float
8849 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8850 %{
8851   match(Set mem (StoreF mem src));
8852 
8853   ins_cost(VOLATILE_REF_COST);
8854   format %{ "stlrs  $src, $mem\t# float" %}
8855 
8856   ins_encode( aarch64_enc_fstlrs(src, mem) );
8857 
8858   ins_pipe(pipe_class_memory);
8859 %}
8860 
8861 // TODO
8862 // implement storeImmF0 and storeFImmPacked
8863 
8864 // Store Double
8865 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
8866 %{
8867   match(Set mem (StoreD mem src));
8868 
8869   ins_cost(VOLATILE_REF_COST);
8870   format %{ "stlrd  $src, $mem\t# double" %}
8871 
8872   ins_encode( aarch64_enc_fstlrd(src, mem) );
8873 
8874   ins_pipe(pipe_class_memory);
8875 %}
8876 
8877 //  ---------------- end of volatile loads and stores ----------------
8878 
8879 // ============================================================================
8880 // BSWAP Instructions
8881 
8882 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
8883   match(Set dst (ReverseBytesI src));
8884 
8885   ins_cost(INSN_COST);
8886   format %{ "revw  $dst, $src" %}
8887 
8888   ins_encode %{
8889     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
8890   %}
8891 
8892   ins_pipe(ialu_reg);
8893 %}
8894 
8895 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
8896   match(Set dst (ReverseBytesL src));
8897 
8898   ins_cost(INSN_COST);
8899   format %{ "rev  $dst, $src" %}
8900 
8901   ins_encode %{
8902     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
8903   %}
8904 
8905   ins_pipe(ialu_reg);
8906 %}
8907 
8908 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
8909   match(Set dst (ReverseBytesUS src));
8910 
8911   ins_cost(INSN_COST);
8912   format %{ "rev16w  $dst, $src" %}
8913 
8914   ins_encode %{
8915     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8916   %}
8917 
8918   ins_pipe(ialu_reg);
8919 %}
8920 
8921 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
8922   match(Set dst (ReverseBytesS src));
8923 
8924   ins_cost(INSN_COST);
8925   format %{ "rev16w  $dst, $src\n\t"
8926             "sbfmw $dst, $dst, #0, #15" %}
8927 
8928   ins_encode %{
8929     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8930     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
8931   %}
8932 
8933   ins_pipe(ialu_reg);
8934 %}
8935 
8936 // ============================================================================
8937 // Zero Count Instructions
8938 
8939 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8940   match(Set dst (CountLeadingZerosI src));
8941 
8942   ins_cost(INSN_COST);
8943   format %{ "clzw  $dst, $src" %}
8944   ins_encode %{
8945     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
8946   %}
8947 
8948   ins_pipe(ialu_reg);
8949 %}
8950 
8951 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
8952   match(Set dst (CountLeadingZerosL src));
8953 
8954   ins_cost(INSN_COST);
8955   format %{ "clz   $dst, $src" %}
8956   ins_encode %{
8957     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
8958   %}
8959 
8960   ins_pipe(ialu_reg);
8961 %}
8962 
8963 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8964   match(Set dst (CountTrailingZerosI src));
8965 
8966   ins_cost(INSN_COST * 2);
8967   format %{ "rbitw  $dst, $src\n\t"
8968             "clzw   $dst, $dst" %}
8969   ins_encode %{
8970     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
8971     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
8972   %}
8973 
8974   ins_pipe(ialu_reg);
8975 %}
8976 
8977 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
8978   match(Set dst (CountTrailingZerosL src));
8979 
8980   ins_cost(INSN_COST * 2);
8981   format %{ "rbit   $dst, $src\n\t"
8982             "clz    $dst, $dst" %}
8983   ins_encode %{
8984     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
8985     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
8986   %}
8987 
8988   ins_pipe(ialu_reg);
8989 %}
8990 
8991 //---------- Population Count Instructions -------------------------------------
8992 //
8993 
8994 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
8995   predicate(UsePopCountInstruction);
8996   match(Set dst (PopCountI src));
8997   effect(TEMP tmp);
8998   ins_cost(INSN_COST * 13);
8999 
9000   format %{ "movw   $src, $src\n\t"
9001             "mov    $tmp, $src\t# vector (1D)\n\t"
9002             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9003             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9004             "mov    $dst, $tmp\t# vector (1D)" %}
9005   ins_encode %{
9006     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
9007     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9008     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9009     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9010     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9011   %}
9012 
9013   ins_pipe(pipe_class_default);
9014 %}
9015 
9016 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
9017   predicate(UsePopCountInstruction);
9018   match(Set dst (PopCountI (LoadI mem)));
9019   effect(TEMP tmp);
9020   ins_cost(INSN_COST * 13);
9021 
9022   format %{ "ldrs   $tmp, $mem\n\t"
9023             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9024             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9025             "mov    $dst, $tmp\t# vector (1D)" %}
9026   ins_encode %{
9027     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9028     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
9029                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9030     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9031     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9032     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9033   %}
9034 
9035   ins_pipe(pipe_class_default);
9036 %}
9037 
9038 // Note: Long.bitCount(long) returns an int.
9039 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
9040   predicate(UsePopCountInstruction);
9041   match(Set dst (PopCountL src));
9042   effect(TEMP tmp);
9043   ins_cost(INSN_COST * 13);
9044 
9045   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
9046             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9047             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9048             "mov    $dst, $tmp\t# vector (1D)" %}
9049   ins_encode %{
9050     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9051     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9052     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9053     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9054   %}
9055 
9056   ins_pipe(pipe_class_default);
9057 %}
9058 
9059 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
9060   predicate(UsePopCountInstruction);
9061   match(Set dst (PopCountL (LoadL mem)));
9062   effect(TEMP tmp);
9063   ins_cost(INSN_COST * 13);
9064 
9065   format %{ "ldrd   $tmp, $mem\n\t"
9066             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9067             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9068             "mov    $dst, $tmp\t# vector (1D)" %}
9069   ins_encode %{
9070     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9071     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
9072                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9073     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9074     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9075     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9076   %}
9077 
9078   ins_pipe(pipe_class_default);
9079 %}
9080 
9081 // ============================================================================
9082 // MemBar Instruction
9083 
9084 instruct load_fence() %{
9085   match(LoadFence);
9086   ins_cost(VOLATILE_REF_COST);
9087 
9088   format %{ "load_fence" %}
9089 
9090   ins_encode %{
9091     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9092   %}
9093   ins_pipe(pipe_serial);
9094 %}
9095 
9096 instruct unnecessary_membar_acquire() %{
9097   predicate(unnecessary_acquire(n));
9098   match(MemBarAcquire);
9099   ins_cost(0);
9100 
9101   format %{ "membar_acquire (elided)" %}
9102 
9103   ins_encode %{
9104     __ block_comment("membar_acquire (elided)");
9105   %}
9106 
9107   ins_pipe(pipe_class_empty);
9108 %}
9109 
9110 instruct membar_acquire() %{
9111   match(MemBarAcquire);
9112   ins_cost(VOLATILE_REF_COST);
9113 
9114   format %{ "membar_acquire\n\t"
9115             "dmb ish" %}
9116 
9117   ins_encode %{
9118     __ block_comment("membar_acquire");
9119     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9120   %}
9121 
9122   ins_pipe(pipe_serial);
9123 %}
9124 
9125 
9126 instruct membar_acquire_lock() %{
9127   match(MemBarAcquireLock);
9128   ins_cost(VOLATILE_REF_COST);
9129 
9130   format %{ "membar_acquire_lock (elided)" %}
9131 
9132   ins_encode %{
9133     __ block_comment("membar_acquire_lock (elided)");
9134   %}
9135 
9136   ins_pipe(pipe_serial);
9137 %}
9138 
9139 instruct store_fence() %{
9140   match(StoreFence);
9141   ins_cost(VOLATILE_REF_COST);
9142 
9143   format %{ "store_fence" %}
9144 
9145   ins_encode %{
9146     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9147   %}
9148   ins_pipe(pipe_serial);
9149 %}
9150 
9151 instruct unnecessary_membar_release() %{
9152   predicate(unnecessary_release(n));
9153   match(MemBarRelease);
9154   ins_cost(0);
9155 
9156   format %{ "membar_release (elided)" %}
9157 
9158   ins_encode %{
9159     __ block_comment("membar_release (elided)");
9160   %}
9161   ins_pipe(pipe_serial);
9162 %}
9163 
9164 instruct membar_release() %{
9165   match(MemBarRelease);
9166   ins_cost(VOLATILE_REF_COST);
9167 
9168   format %{ "membar_release\n\t"
9169             "dmb ish" %}
9170 
9171   ins_encode %{
9172     __ block_comment("membar_release");
9173     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9174   %}
9175   ins_pipe(pipe_serial);
9176 %}
9177 
9178 instruct membar_storestore() %{
9179   match(MemBarStoreStore);
9180   ins_cost(VOLATILE_REF_COST);
9181 
9182   format %{ "MEMBAR-store-store" %}
9183 
9184   ins_encode %{
9185     __ membar(Assembler::StoreStore);
9186   %}
9187   ins_pipe(pipe_serial);
9188 %}
9189 
9190 instruct membar_release_lock() %{
9191   match(MemBarReleaseLock);
9192   ins_cost(VOLATILE_REF_COST);
9193 
9194   format %{ "membar_release_lock (elided)" %}
9195 
9196   ins_encode %{
9197     __ block_comment("membar_release_lock (elided)");
9198   %}
9199 
9200   ins_pipe(pipe_serial);
9201 %}
9202 
9203 instruct unnecessary_membar_volatile() %{
9204   predicate(unnecessary_volatile(n));
9205   match(MemBarVolatile);
9206   ins_cost(0);
9207 
9208   format %{ "membar_volatile (elided)" %}
9209 
9210   ins_encode %{
9211     __ block_comment("membar_volatile (elided)");
9212   %}
9213 
9214   ins_pipe(pipe_serial);
9215 %}
9216 
9217 instruct membar_volatile() %{
9218   match(MemBarVolatile);
9219   ins_cost(VOLATILE_REF_COST*100);
9220 
9221   format %{ "membar_volatile\n\t"
9222              "dmb ish"%}
9223 
9224   ins_encode %{
9225     __ block_comment("membar_volatile");
9226     __ membar(Assembler::StoreLoad);
9227   %}
9228 
9229   ins_pipe(pipe_serial);
9230 %}
9231 
9232 // ============================================================================
9233 // Cast/Convert Instructions
9234 
9235 instruct castX2P(iRegPNoSp dst, iRegL src) %{
9236   match(Set dst (CastX2P src));
9237 
9238   ins_cost(INSN_COST);
9239   format %{ "mov $dst, $src\t# long -> ptr" %}
9240 
9241   ins_encode %{
9242     if ($dst$$reg != $src$$reg) {
9243       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9244     }
9245   %}
9246 
9247   ins_pipe(ialu_reg);
9248 %}
9249 
9250 instruct castP2X(iRegLNoSp dst, iRegP src) %{
9251   match(Set dst (CastP2X src));
9252 
9253   ins_cost(INSN_COST);
9254   format %{ "mov $dst, $src\t# ptr -> long" %}
9255 
9256   ins_encode %{
9257     if ($dst$$reg != $src$$reg) {
9258       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9259     }
9260   %}
9261 
9262   ins_pipe(ialu_reg);
9263 %}
9264 
9265 // Convert oop into int for vectors alignment masking
9266 instruct convP2I(iRegINoSp dst, iRegP src) %{
9267   match(Set dst (ConvL2I (CastP2X src)));
9268 
9269   ins_cost(INSN_COST);
9270   format %{ "movw $dst, $src\t# ptr -> int" %}
9271   ins_encode %{
9272     __ movw($dst$$Register, $src$$Register);
9273   %}
9274 
9275   ins_pipe(ialu_reg);
9276 %}
9277 
9278 // Convert compressed oop into int for vectors alignment masking
9279 // in case of 32bit oops (heap < 4Gb).
9280 instruct convN2I(iRegINoSp dst, iRegN src)
9281 %{
9282   predicate(Universe::narrow_oop_shift() == 0);
9283   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9284 
9285   ins_cost(INSN_COST);
9286   format %{ "mov dst, $src\t# compressed ptr -> int" %}
9287   ins_encode %{
9288     __ movw($dst$$Register, $src$$Register);
9289   %}
9290 
9291   ins_pipe(ialu_reg);
9292 %}
9293 
9294 
9295 // Convert oop pointer into compressed form
9296 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9297   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9298   match(Set dst (EncodeP src));
9299   effect(KILL cr);
9300   ins_cost(INSN_COST * 3);
9301   format %{ "encode_heap_oop $dst, $src" %}
9302   ins_encode %{
9303     Register s = $src$$Register;
9304     Register d = $dst$$Register;
9305     __ encode_heap_oop(d, s);
9306   %}
9307   ins_pipe(ialu_reg);
9308 %}
9309 
9310 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9311   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9312   match(Set dst (EncodeP src));
9313   ins_cost(INSN_COST * 3);
9314   format %{ "encode_heap_oop_not_null $dst, $src" %}
9315   ins_encode %{
9316     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9317   %}
9318   ins_pipe(ialu_reg);
9319 %}
9320 
9321 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9322   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9323             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9324   match(Set dst (DecodeN src));
9325   ins_cost(INSN_COST * 3);
9326   format %{ "decode_heap_oop $dst, $src" %}
9327   ins_encode %{
9328     Register s = $src$$Register;
9329     Register d = $dst$$Register;
9330     __ decode_heap_oop(d, s);
9331   %}
9332   ins_pipe(ialu_reg);
9333 %}
9334 
9335 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9336   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9337             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9338   match(Set dst (DecodeN src));
9339   ins_cost(INSN_COST * 3);
9340   format %{ "decode_heap_oop_not_null $dst, $src" %}
9341   ins_encode %{
9342     Register s = $src$$Register;
9343     Register d = $dst$$Register;
9344     __ decode_heap_oop_not_null(d, s);
9345   %}
9346   ins_pipe(ialu_reg);
9347 %}
9348 
9349 // n.b. AArch64 implementations of encode_klass_not_null and
9350 // decode_klass_not_null do not modify the flags register so, unlike
9351 // Intel, we don't kill CR as a side effect here
9352 
9353 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
9354   match(Set dst (EncodePKlass src));
9355 
9356   ins_cost(INSN_COST * 3);
9357   format %{ "encode_klass_not_null $dst,$src" %}
9358 
9359   ins_encode %{
9360     Register src_reg = as_Register($src$$reg);
9361     Register dst_reg = as_Register($dst$$reg);
9362     __ encode_klass_not_null(dst_reg, src_reg);
9363   %}
9364 
9365    ins_pipe(ialu_reg);
9366 %}
9367 
9368 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
9369   match(Set dst (DecodeNKlass src));
9370 
9371   ins_cost(INSN_COST * 3);
9372   format %{ "decode_klass_not_null $dst,$src" %}
9373 
9374   ins_encode %{
9375     Register src_reg = as_Register($src$$reg);
9376     Register dst_reg = as_Register($dst$$reg);
9377     if (dst_reg != src_reg) {
9378       __ decode_klass_not_null(dst_reg, src_reg);
9379     } else {
9380       __ decode_klass_not_null(dst_reg);
9381     }
9382   %}
9383 
9384    ins_pipe(ialu_reg);
9385 %}
9386 
9387 instruct checkCastPP(iRegPNoSp dst)
9388 %{
9389   match(Set dst (CheckCastPP dst));
9390 
9391   size(0);
9392   format %{ "# checkcastPP of $dst" %}
9393   ins_encode(/* empty encoding */);
9394   ins_pipe(pipe_class_empty);
9395 %}
9396 
9397 instruct castPP(iRegPNoSp dst)
9398 %{
9399   match(Set dst (CastPP dst));
9400 
9401   size(0);
9402   format %{ "# castPP of $dst" %}
9403   ins_encode(/* empty encoding */);
9404   ins_pipe(pipe_class_empty);
9405 %}
9406 
9407 instruct castII(iRegI dst)
9408 %{
9409   match(Set dst (CastII dst));
9410 
9411   size(0);
9412   format %{ "# castII of $dst" %}
9413   ins_encode(/* empty encoding */);
9414   ins_cost(0);
9415   ins_pipe(pipe_class_empty);
9416 %}
9417 
9418 // ============================================================================
9419 // Atomic operation instructions
9420 //
9421 // Intel and SPARC both implement Ideal Node LoadPLocked and
9422 // Store{PIL}Conditional instructions using a normal load for the
9423 // LoadPLocked and a CAS for the Store{PIL}Conditional.
9424 //
9425 // The ideal code appears only to use LoadPLocked/StorePLocked as a
9426 // pair to lock object allocations from Eden space when not using
9427 // TLABs.
9428 //
9429 // There does not appear to be a Load{IL}Locked Ideal Node and the
9430 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
9431 // and to use StoreIConditional only for 32-bit and StoreLConditional
9432 // only for 64-bit.
9433 //
9434 // We implement LoadPLocked and StorePLocked instructions using,
9435 // respectively the AArch64 hw load-exclusive and store-conditional
9436 // instructions. Whereas we must implement each of
9437 // Store{IL}Conditional using a CAS which employs a pair of
9438 // instructions comprising a load-exclusive followed by a
9439 // store-conditional.
9440 
9441 
9442 // Locked-load (linked load) of the current heap-top
9443 // used when updating the eden heap top
9444 // implemented using ldaxr on AArch64
9445 
9446 instruct loadPLocked(iRegPNoSp dst, indirect mem)
9447 %{
9448   match(Set dst (LoadPLocked mem));
9449 
9450   ins_cost(VOLATILE_REF_COST);
9451 
9452   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
9453 
9454   ins_encode(aarch64_enc_ldaxr(dst, mem));
9455 
9456   ins_pipe(pipe_serial);
9457 %}
9458 
9459 // Conditional-store of the updated heap-top.
9460 // Used during allocation of the shared heap.
9461 // Sets flag (EQ) on success.
9462 // implemented using stlxr on AArch64.
9463 
9464 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
9465 %{
9466   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
9467 
9468   ins_cost(VOLATILE_REF_COST);
9469 
9470  // TODO
9471  // do we need to do a store-conditional release or can we just use a
9472  // plain store-conditional?
9473 
9474   format %{
9475     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
9476     "cmpw rscratch1, zr\t# EQ on successful write"
9477   %}
9478 
9479   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
9480 
9481   ins_pipe(pipe_serial);
9482 %}
9483 
9484 
9485 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
9486 // when attempting to rebias a lock towards the current thread.  We
9487 // must use the acquire form of cmpxchg in order to guarantee acquire
9488 // semantics in this case.
9489 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
9490 %{
9491   match(Set cr (StoreLConditional mem (Binary oldval newval)));
9492 
9493   ins_cost(VOLATILE_REF_COST);
9494 
9495   format %{
9496     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9497     "cmpw rscratch1, zr\t# EQ on successful write"
9498   %}
9499 
9500   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
9501 
9502   ins_pipe(pipe_slow);
9503 %}
9504 
9505 // storeIConditional also has acquire semantics, for no better reason
9506 // than matching storeLConditional.  At the time of writing this
9507 // comment storeIConditional was not used anywhere by AArch64.
9508 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
9509 %{
9510   match(Set cr (StoreIConditional mem (Binary oldval newval)));
9511 
9512   ins_cost(VOLATILE_REF_COST);
9513 
9514   format %{
9515     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9516     "cmpw rscratch1, zr\t# EQ on successful write"
9517   %}
9518 
9519   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
9520 
9521   ins_pipe(pipe_slow);
9522 %}
9523 
9524 // standard CompareAndSwapX when we are using barriers
9525 // these have higher priority than the rules selected by a predicate
9526 
9527 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
9528 // can't match them
9529 
9530 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9531 
9532   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
9533   ins_cost(2 * VOLATILE_REF_COST);
9534 
9535   effect(KILL cr);
9536 
9537   format %{
9538     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9539     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9540   %}
9541 
9542   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
9543             aarch64_enc_cset_eq(res));
9544 
9545   ins_pipe(pipe_slow);
9546 %}
9547 
9548 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9549 
9550   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
9551   ins_cost(2 * VOLATILE_REF_COST);
9552 
9553   effect(KILL cr);
9554 
9555   format %{
9556     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9557     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9558   %}
9559 
9560   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
9561             aarch64_enc_cset_eq(res));
9562 
9563   ins_pipe(pipe_slow);
9564 %}
9565 
9566 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9567 
9568   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9569   ins_cost(2 * VOLATILE_REF_COST);
9570 
9571   effect(KILL cr);
9572 
9573  format %{
9574     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9575     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9576  %}
9577 
9578  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9579             aarch64_enc_cset_eq(res));
9580 
9581   ins_pipe(pipe_slow);
9582 %}
9583 
9584 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9585 
9586   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9587   ins_cost(2 * VOLATILE_REF_COST);
9588 
9589   effect(KILL cr);
9590 
9591  format %{
9592     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9593     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9594  %}
9595 
9596  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9597             aarch64_enc_cset_eq(res));
9598 
9599   ins_pipe(pipe_slow);
9600 %}
9601 
9602 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9603 
9604   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9605   ins_cost(2 * VOLATILE_REF_COST);
9606 
9607   effect(KILL cr);
9608 
9609  format %{
9610     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9611     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9612  %}
9613 
9614  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9615             aarch64_enc_cset_eq(res));
9616 
9617   ins_pipe(pipe_slow);
9618 %}
9619 
9620 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9621 
9622   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9623   ins_cost(2 * VOLATILE_REF_COST);
9624 
9625   effect(KILL cr);
9626 
9627  format %{
9628     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9629     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9630  %}
9631 
9632  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9633             aarch64_enc_cset_eq(res));
9634 
9635   ins_pipe(pipe_slow);
9636 %}
9637 
9638 // alternative CompareAndSwapX when we are eliding barriers
9639 
9640 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9641 
9642   predicate(needs_acquiring_load_exclusive(n));
9643   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9644   ins_cost(VOLATILE_REF_COST);
9645 
9646   effect(KILL cr);
9647 
9648  format %{
9649     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9650     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9651  %}
9652 
9653  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9654             aarch64_enc_cset_eq(res));
9655 
9656   ins_pipe(pipe_slow);
9657 %}
9658 
9659 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9660 
9661   predicate(needs_acquiring_load_exclusive(n));
9662   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9663   ins_cost(VOLATILE_REF_COST);
9664 
9665   effect(KILL cr);
9666 
9667  format %{
9668     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9669     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9670  %}
9671 
9672  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9673             aarch64_enc_cset_eq(res));
9674 
9675   ins_pipe(pipe_slow);
9676 %}
9677 
9678 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9679 
9680   predicate(needs_acquiring_load_exclusive(n));
9681   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9682   ins_cost(VOLATILE_REF_COST);
9683 
9684   effect(KILL cr);
9685 
9686  format %{
9687     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9688     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9689  %}
9690 
9691  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9692             aarch64_enc_cset_eq(res));
9693 
9694   ins_pipe(pipe_slow);
9695 %}
9696 
9697 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9698 
9699   predicate(needs_acquiring_load_exclusive(n));
9700   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9701   ins_cost(VOLATILE_REF_COST);
9702 
9703   effect(KILL cr);
9704 
9705  format %{
9706     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9707     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9708  %}
9709 
9710  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9711             aarch64_enc_cset_eq(res));
9712 
9713   ins_pipe(pipe_slow);
9714 %}
9715 
9716 
9717 // ---------------------------------------------------------------------
9718 
9719 
9720 // BEGIN This section of the file is automatically generated. Do not edit --------------
9721 
9722 // Sundry CAS operations.  Note that release is always true,
9723 // regardless of the memory ordering of the CAS.  This is because we
9724 // need the volatile case to be sequentially consistent but there is
9725 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
9726 // can't check the type of memory ordering here, so we always emit a
9727 // STLXR.
9728 
9729 // This section is generated from aarch64_ad_cas.m4
9730 
9731 
9732 
9733 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9734   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
9735   ins_cost(2 * VOLATILE_REF_COST);
9736   effect(TEMP_DEF res, KILL cr);
9737   format %{
9738     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9739   %}
9740   ins_encode %{
9741     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9742                Assembler::byte, /*acquire*/ false, /*release*/ true,
9743                /*weak*/ false, $res$$Register);
9744     __ sxtbw($res$$Register, $res$$Register);
9745   %}
9746   ins_pipe(pipe_slow);
9747 %}
9748 
9749 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9750   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
9751   ins_cost(2 * VOLATILE_REF_COST);
9752   effect(TEMP_DEF res, KILL cr);
9753   format %{
9754     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9755   %}
9756   ins_encode %{
9757     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9758                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9759                /*weak*/ false, $res$$Register);
9760     __ sxthw($res$$Register, $res$$Register);
9761   %}
9762   ins_pipe(pipe_slow);
9763 %}
9764 
9765 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9766   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
9767   ins_cost(2 * VOLATILE_REF_COST);
9768   effect(TEMP_DEF res, KILL cr);
9769   format %{
9770     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9771   %}
9772   ins_encode %{
9773     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9774                Assembler::word, /*acquire*/ false, /*release*/ true,
9775                /*weak*/ false, $res$$Register);
9776   %}
9777   ins_pipe(pipe_slow);
9778 %}
9779 
9780 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9781   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
9782   ins_cost(2 * VOLATILE_REF_COST);
9783   effect(TEMP_DEF res, KILL cr);
9784   format %{
9785     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9786   %}
9787   ins_encode %{
9788     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9789                Assembler::xword, /*acquire*/ false, /*release*/ true,
9790                /*weak*/ false, $res$$Register);
9791   %}
9792   ins_pipe(pipe_slow);
9793 %}
9794 
9795 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9796   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
9797   ins_cost(2 * VOLATILE_REF_COST);
9798   effect(TEMP_DEF res, KILL cr);
9799   format %{
9800     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9801   %}
9802   ins_encode %{
9803     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9804                Assembler::word, /*acquire*/ false, /*release*/ true,
9805                /*weak*/ false, $res$$Register);
9806   %}
9807   ins_pipe(pipe_slow);
9808 %}
9809 
9810 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9811   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
9812   ins_cost(2 * VOLATILE_REF_COST);
9813   effect(TEMP_DEF res, KILL cr);
9814   format %{
9815     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9816   %}
9817   ins_encode %{
9818     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9819                Assembler::xword, /*acquire*/ false, /*release*/ true,
9820                /*weak*/ false, $res$$Register);
9821   %}
9822   ins_pipe(pipe_slow);
9823 %}
9824 
9825 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9826   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
9827   ins_cost(2 * VOLATILE_REF_COST);
9828   effect(KILL cr);
9829   format %{
9830     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9831     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9832   %}
9833   ins_encode %{
9834     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9835                Assembler::byte, /*acquire*/ false, /*release*/ true,
9836                /*weak*/ true, noreg);
9837     __ csetw($res$$Register, Assembler::EQ);
9838   %}
9839   ins_pipe(pipe_slow);
9840 %}
9841 
9842 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9843   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
9844   ins_cost(2 * VOLATILE_REF_COST);
9845   effect(KILL cr);
9846   format %{
9847     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9848     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9849   %}
9850   ins_encode %{
9851     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9852                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9853                /*weak*/ true, noreg);
9854     __ csetw($res$$Register, Assembler::EQ);
9855   %}
9856   ins_pipe(pipe_slow);
9857 %}
9858 
9859 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9860   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
9861   ins_cost(2 * VOLATILE_REF_COST);
9862   effect(KILL cr);
9863   format %{
9864     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9865     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9866   %}
9867   ins_encode %{
9868     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9869                Assembler::word, /*acquire*/ false, /*release*/ true,
9870                /*weak*/ true, noreg);
9871     __ csetw($res$$Register, Assembler::EQ);
9872   %}
9873   ins_pipe(pipe_slow);
9874 %}
9875 
9876 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9877   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
9878   ins_cost(2 * VOLATILE_REF_COST);
9879   effect(KILL cr);
9880   format %{
9881     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9882     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9883   %}
9884   ins_encode %{
9885     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9886                Assembler::xword, /*acquire*/ false, /*release*/ true,
9887                /*weak*/ true, noreg);
9888     __ csetw($res$$Register, Assembler::EQ);
9889   %}
9890   ins_pipe(pipe_slow);
9891 %}
9892 
9893 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9894   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
9895   ins_cost(2 * VOLATILE_REF_COST);
9896   effect(KILL cr);
9897   format %{
9898     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9899     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9900   %}
9901   ins_encode %{
9902     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9903                Assembler::word, /*acquire*/ false, /*release*/ true,
9904                /*weak*/ true, noreg);
9905     __ csetw($res$$Register, Assembler::EQ);
9906   %}
9907   ins_pipe(pipe_slow);
9908 %}
9909 
9910 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9911   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
9912   ins_cost(2 * VOLATILE_REF_COST);
9913   effect(KILL cr);
9914   format %{
9915     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9916     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9917   %}
9918   ins_encode %{
9919     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9920                Assembler::xword, /*acquire*/ false, /*release*/ true,
9921                /*weak*/ true, noreg);
9922     __ csetw($res$$Register, Assembler::EQ);
9923   %}
9924   ins_pipe(pipe_slow);
9925 %}
9926 
9927 // END This section of the file is automatically generated. Do not edit --------------
9928 // ---------------------------------------------------------------------
9929 
9930 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
9931   match(Set prev (GetAndSetI mem newv));
9932   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
9933   ins_encode %{
9934     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
9935   %}
9936   ins_pipe(pipe_serial);
9937 %}
9938 
9939 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
9940   match(Set prev (GetAndSetL mem newv));
9941   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
9942   ins_encode %{
9943     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
9944   %}
9945   ins_pipe(pipe_serial);
9946 %}
9947 
9948 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
9949   match(Set prev (GetAndSetN mem newv));
9950   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
9951   ins_encode %{
9952     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
9953   %}
9954   ins_pipe(pipe_serial);
9955 %}
9956 
9957 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
9958   match(Set prev (GetAndSetP mem newv));
9959   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
9960   ins_encode %{
9961     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
9962   %}
9963   ins_pipe(pipe_serial);
9964 %}
9965 
9966 
9967 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
9968   match(Set newval (GetAndAddL mem incr));
9969   ins_cost(INSN_COST * 10);
9970   format %{ "get_and_addL $newval, [$mem], $incr" %}
9971   ins_encode %{
9972     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
9973   %}
9974   ins_pipe(pipe_serial);
9975 %}
9976 
9977 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
9978   predicate(n->as_LoadStore()->result_not_used());
9979   match(Set dummy (GetAndAddL mem incr));
9980   ins_cost(INSN_COST * 9);
9981   format %{ "get_and_addL [$mem], $incr" %}
9982   ins_encode %{
9983     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
9984   %}
9985   ins_pipe(pipe_serial);
9986 %}
9987 
9988 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
9989   match(Set newval (GetAndAddL mem incr));
9990   ins_cost(INSN_COST * 10);
9991   format %{ "get_and_addL $newval, [$mem], $incr" %}
9992   ins_encode %{
9993     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
9994   %}
9995   ins_pipe(pipe_serial);
9996 %}
9997 
9998 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
9999   predicate(n->as_LoadStore()->result_not_used());
10000   match(Set dummy (GetAndAddL mem incr));
10001   ins_cost(INSN_COST * 9);
10002   format %{ "get_and_addL [$mem], $incr" %}
10003   ins_encode %{
10004     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
10005   %}
10006   ins_pipe(pipe_serial);
10007 %}
10008 
10009 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
10010   match(Set newval (GetAndAddI mem incr));
10011   ins_cost(INSN_COST * 10);
10012   format %{ "get_and_addI $newval, [$mem], $incr" %}
10013   ins_encode %{
10014     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
10015   %}
10016   ins_pipe(pipe_serial);
10017 %}
10018 
10019 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
10020   predicate(n->as_LoadStore()->result_not_used());
10021   match(Set dummy (GetAndAddI mem incr));
10022   ins_cost(INSN_COST * 9);
10023   format %{ "get_and_addI [$mem], $incr" %}
10024   ins_encode %{
10025     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
10026   %}
10027   ins_pipe(pipe_serial);
10028 %}
10029 
10030 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
10031   match(Set newval (GetAndAddI mem incr));
10032   ins_cost(INSN_COST * 10);
10033   format %{ "get_and_addI $newval, [$mem], $incr" %}
10034   ins_encode %{
10035     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
10036   %}
10037   ins_pipe(pipe_serial);
10038 %}
10039 
10040 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
10041   predicate(n->as_LoadStore()->result_not_used());
10042   match(Set dummy (GetAndAddI mem incr));
10043   ins_cost(INSN_COST * 9);
10044   format %{ "get_and_addI [$mem], $incr" %}
10045   ins_encode %{
10046     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
10047   %}
10048   ins_pipe(pipe_serial);
10049 %}
10050 
10051 // Manifest a CmpL result in an integer register.
10052 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
10053 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
10054 %{
10055   match(Set dst (CmpL3 src1 src2));
10056   effect(KILL flags);
10057 
10058   ins_cost(INSN_COST * 6);
10059   format %{
10060       "cmp $src1, $src2"
10061       "csetw $dst, ne"
10062       "cnegw $dst, lt"
10063   %}
10064   // format %{ "CmpL3 $dst, $src1, $src2" %}
10065   ins_encode %{
10066     __ cmp($src1$$Register, $src2$$Register);
10067     __ csetw($dst$$Register, Assembler::NE);
10068     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10069   %}
10070 
10071   ins_pipe(pipe_class_default);
10072 %}
10073 
10074 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
10075 %{
10076   match(Set dst (CmpL3 src1 src2));
10077   effect(KILL flags);
10078 
10079   ins_cost(INSN_COST * 6);
10080   format %{
10081       "cmp $src1, $src2"
10082       "csetw $dst, ne"
10083       "cnegw $dst, lt"
10084   %}
10085   ins_encode %{
10086     int32_t con = (int32_t)$src2$$constant;
10087      if (con < 0) {
10088       __ adds(zr, $src1$$Register, -con);
10089     } else {
10090       __ subs(zr, $src1$$Register, con);
10091     }
10092     __ csetw($dst$$Register, Assembler::NE);
10093     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10094   %}
10095 
10096   ins_pipe(pipe_class_default);
10097 %}
10098 
10099 // ============================================================================
10100 // Conditional Move Instructions
10101 
10102 // n.b. we have identical rules for both a signed compare op (cmpOp)
10103 // and an unsigned compare op (cmpOpU). it would be nice if we could
10104 // define an op class which merged both inputs and use it to type the
10105 // argument to a single rule. unfortunatelyt his fails because the
10106 // opclass does not live up to the COND_INTER interface of its
10107 // component operands. When the generic code tries to negate the
10108 // operand it ends up running the generci Machoper::negate method
10109 // which throws a ShouldNotHappen. So, we have to provide two flavours
10110 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
10111 
10112 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10113   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10114 
10115   ins_cost(INSN_COST * 2);
10116   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
10117 
10118   ins_encode %{
10119     __ cselw(as_Register($dst$$reg),
10120              as_Register($src2$$reg),
10121              as_Register($src1$$reg),
10122              (Assembler::Condition)$cmp$$cmpcode);
10123   %}
10124 
10125   ins_pipe(icond_reg_reg);
10126 %}
10127 
10128 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10129   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10130 
10131   ins_cost(INSN_COST * 2);
10132   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
10133 
10134   ins_encode %{
10135     __ cselw(as_Register($dst$$reg),
10136              as_Register($src2$$reg),
10137              as_Register($src1$$reg),
10138              (Assembler::Condition)$cmp$$cmpcode);
10139   %}
10140 
10141   ins_pipe(icond_reg_reg);
10142 %}
10143 
10144 // special cases where one arg is zero
10145 
10146 // n.b. this is selected in preference to the rule above because it
10147 // avoids loading constant 0 into a source register
10148 
10149 // TODO
10150 // we ought only to be able to cull one of these variants as the ideal
10151 // transforms ought always to order the zero consistently (to left/right?)
10152 
10153 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10154   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10155 
10156   ins_cost(INSN_COST * 2);
10157   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
10158 
10159   ins_encode %{
10160     __ cselw(as_Register($dst$$reg),
10161              as_Register($src$$reg),
10162              zr,
10163              (Assembler::Condition)$cmp$$cmpcode);
10164   %}
10165 
10166   ins_pipe(icond_reg);
10167 %}
10168 
10169 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10170   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10171 
10172   ins_cost(INSN_COST * 2);
10173   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
10174 
10175   ins_encode %{
10176     __ cselw(as_Register($dst$$reg),
10177              as_Register($src$$reg),
10178              zr,
10179              (Assembler::Condition)$cmp$$cmpcode);
10180   %}
10181 
10182   ins_pipe(icond_reg);
10183 %}
10184 
10185 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10186   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10187 
10188   ins_cost(INSN_COST * 2);
10189   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
10190 
10191   ins_encode %{
10192     __ cselw(as_Register($dst$$reg),
10193              zr,
10194              as_Register($src$$reg),
10195              (Assembler::Condition)$cmp$$cmpcode);
10196   %}
10197 
10198   ins_pipe(icond_reg);
10199 %}
10200 
10201 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10202   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10203 
10204   ins_cost(INSN_COST * 2);
10205   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
10206 
10207   ins_encode %{
10208     __ cselw(as_Register($dst$$reg),
10209              zr,
10210              as_Register($src$$reg),
10211              (Assembler::Condition)$cmp$$cmpcode);
10212   %}
10213 
10214   ins_pipe(icond_reg);
10215 %}
10216 
10217 // special case for creating a boolean 0 or 1
10218 
10219 // n.b. this is selected in preference to the rule above because it
10220 // avoids loading constants 0 and 1 into a source register
10221 
10222 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10223   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10224 
10225   ins_cost(INSN_COST * 2);
10226   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
10227 
10228   ins_encode %{
10229     // equivalently
10230     // cset(as_Register($dst$$reg),
10231     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10232     __ csincw(as_Register($dst$$reg),
10233              zr,
10234              zr,
10235              (Assembler::Condition)$cmp$$cmpcode);
10236   %}
10237 
10238   ins_pipe(icond_none);
10239 %}
10240 
10241 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10242   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10243 
10244   ins_cost(INSN_COST * 2);
10245   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
10246 
10247   ins_encode %{
10248     // equivalently
10249     // cset(as_Register($dst$$reg),
10250     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10251     __ csincw(as_Register($dst$$reg),
10252              zr,
10253              zr,
10254              (Assembler::Condition)$cmp$$cmpcode);
10255   %}
10256 
10257   ins_pipe(icond_none);
10258 %}
10259 
10260 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10261   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10262 
10263   ins_cost(INSN_COST * 2);
10264   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
10265 
10266   ins_encode %{
10267     __ csel(as_Register($dst$$reg),
10268             as_Register($src2$$reg),
10269             as_Register($src1$$reg),
10270             (Assembler::Condition)$cmp$$cmpcode);
10271   %}
10272 
10273   ins_pipe(icond_reg_reg);
10274 %}
10275 
10276 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10277   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10278 
10279   ins_cost(INSN_COST * 2);
10280   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
10281 
10282   ins_encode %{
10283     __ csel(as_Register($dst$$reg),
10284             as_Register($src2$$reg),
10285             as_Register($src1$$reg),
10286             (Assembler::Condition)$cmp$$cmpcode);
10287   %}
10288 
10289   ins_pipe(icond_reg_reg);
10290 %}
10291 
10292 // special cases where one arg is zero
10293 
10294 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10295   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10296 
10297   ins_cost(INSN_COST * 2);
10298   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
10299 
10300   ins_encode %{
10301     __ csel(as_Register($dst$$reg),
10302             zr,
10303             as_Register($src$$reg),
10304             (Assembler::Condition)$cmp$$cmpcode);
10305   %}
10306 
10307   ins_pipe(icond_reg);
10308 %}
10309 
10310 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10311   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10312 
10313   ins_cost(INSN_COST * 2);
10314   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
10315 
10316   ins_encode %{
10317     __ csel(as_Register($dst$$reg),
10318             zr,
10319             as_Register($src$$reg),
10320             (Assembler::Condition)$cmp$$cmpcode);
10321   %}
10322 
10323   ins_pipe(icond_reg);
10324 %}
10325 
10326 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10327   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10328 
10329   ins_cost(INSN_COST * 2);
10330   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
10331 
10332   ins_encode %{
10333     __ csel(as_Register($dst$$reg),
10334             as_Register($src$$reg),
10335             zr,
10336             (Assembler::Condition)$cmp$$cmpcode);
10337   %}
10338 
10339   ins_pipe(icond_reg);
10340 %}
10341 
10342 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10343   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10344 
10345   ins_cost(INSN_COST * 2);
10346   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
10347 
10348   ins_encode %{
10349     __ csel(as_Register($dst$$reg),
10350             as_Register($src$$reg),
10351             zr,
10352             (Assembler::Condition)$cmp$$cmpcode);
10353   %}
10354 
10355   ins_pipe(icond_reg);
10356 %}
10357 
10358 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10359   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10360 
10361   ins_cost(INSN_COST * 2);
10362   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
10363 
10364   ins_encode %{
10365     __ csel(as_Register($dst$$reg),
10366             as_Register($src2$$reg),
10367             as_Register($src1$$reg),
10368             (Assembler::Condition)$cmp$$cmpcode);
10369   %}
10370 
10371   ins_pipe(icond_reg_reg);
10372 %}
10373 
10374 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10375   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10376 
10377   ins_cost(INSN_COST * 2);
10378   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
10379 
10380   ins_encode %{
10381     __ csel(as_Register($dst$$reg),
10382             as_Register($src2$$reg),
10383             as_Register($src1$$reg),
10384             (Assembler::Condition)$cmp$$cmpcode);
10385   %}
10386 
10387   ins_pipe(icond_reg_reg);
10388 %}
10389 
10390 // special cases where one arg is zero
10391 
10392 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10393   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10394 
10395   ins_cost(INSN_COST * 2);
10396   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
10397 
10398   ins_encode %{
10399     __ csel(as_Register($dst$$reg),
10400             zr,
10401             as_Register($src$$reg),
10402             (Assembler::Condition)$cmp$$cmpcode);
10403   %}
10404 
10405   ins_pipe(icond_reg);
10406 %}
10407 
10408 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10409   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10410 
10411   ins_cost(INSN_COST * 2);
10412   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
10413 
10414   ins_encode %{
10415     __ csel(as_Register($dst$$reg),
10416             zr,
10417             as_Register($src$$reg),
10418             (Assembler::Condition)$cmp$$cmpcode);
10419   %}
10420 
10421   ins_pipe(icond_reg);
10422 %}
10423 
10424 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10425   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10426 
10427   ins_cost(INSN_COST * 2);
10428   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
10429 
10430   ins_encode %{
10431     __ csel(as_Register($dst$$reg),
10432             as_Register($src$$reg),
10433             zr,
10434             (Assembler::Condition)$cmp$$cmpcode);
10435   %}
10436 
10437   ins_pipe(icond_reg);
10438 %}
10439 
10440 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10441   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10442 
10443   ins_cost(INSN_COST * 2);
10444   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
10445 
10446   ins_encode %{
10447     __ csel(as_Register($dst$$reg),
10448             as_Register($src$$reg),
10449             zr,
10450             (Assembler::Condition)$cmp$$cmpcode);
10451   %}
10452 
10453   ins_pipe(icond_reg);
10454 %}
10455 
10456 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10457   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10458 
10459   ins_cost(INSN_COST * 2);
10460   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10461 
10462   ins_encode %{
10463     __ cselw(as_Register($dst$$reg),
10464              as_Register($src2$$reg),
10465              as_Register($src1$$reg),
10466              (Assembler::Condition)$cmp$$cmpcode);
10467   %}
10468 
10469   ins_pipe(icond_reg_reg);
10470 %}
10471 
10472 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10473   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10474 
10475   ins_cost(INSN_COST * 2);
10476   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10477 
10478   ins_encode %{
10479     __ cselw(as_Register($dst$$reg),
10480              as_Register($src2$$reg),
10481              as_Register($src1$$reg),
10482              (Assembler::Condition)$cmp$$cmpcode);
10483   %}
10484 
10485   ins_pipe(icond_reg_reg);
10486 %}
10487 
10488 // special cases where one arg is zero
10489 
10490 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10491   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10492 
10493   ins_cost(INSN_COST * 2);
10494   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
10495 
10496   ins_encode %{
10497     __ cselw(as_Register($dst$$reg),
10498              zr,
10499              as_Register($src$$reg),
10500              (Assembler::Condition)$cmp$$cmpcode);
10501   %}
10502 
10503   ins_pipe(icond_reg);
10504 %}
10505 
10506 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10507   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10508 
10509   ins_cost(INSN_COST * 2);
10510   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
10511 
10512   ins_encode %{
10513     __ cselw(as_Register($dst$$reg),
10514              zr,
10515              as_Register($src$$reg),
10516              (Assembler::Condition)$cmp$$cmpcode);
10517   %}
10518 
10519   ins_pipe(icond_reg);
10520 %}
10521 
10522 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10523   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10524 
10525   ins_cost(INSN_COST * 2);
10526   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
10527 
10528   ins_encode %{
10529     __ cselw(as_Register($dst$$reg),
10530              as_Register($src$$reg),
10531              zr,
10532              (Assembler::Condition)$cmp$$cmpcode);
10533   %}
10534 
10535   ins_pipe(icond_reg);
10536 %}
10537 
10538 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10539   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10540 
10541   ins_cost(INSN_COST * 2);
10542   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
10543 
10544   ins_encode %{
10545     __ cselw(as_Register($dst$$reg),
10546              as_Register($src$$reg),
10547              zr,
10548              (Assembler::Condition)$cmp$$cmpcode);
10549   %}
10550 
10551   ins_pipe(icond_reg);
10552 %}
10553 
10554 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
10555 %{
10556   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10557 
10558   ins_cost(INSN_COST * 3);
10559 
10560   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10561   ins_encode %{
10562     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10563     __ fcsels(as_FloatRegister($dst$$reg),
10564               as_FloatRegister($src2$$reg),
10565               as_FloatRegister($src1$$reg),
10566               cond);
10567   %}
10568 
10569   ins_pipe(fp_cond_reg_reg_s);
10570 %}
10571 
10572 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
10573 %{
10574   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10575 
10576   ins_cost(INSN_COST * 3);
10577 
10578   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10579   ins_encode %{
10580     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10581     __ fcsels(as_FloatRegister($dst$$reg),
10582               as_FloatRegister($src2$$reg),
10583               as_FloatRegister($src1$$reg),
10584               cond);
10585   %}
10586 
10587   ins_pipe(fp_cond_reg_reg_s);
10588 %}
10589 
10590 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
10591 %{
10592   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10593 
10594   ins_cost(INSN_COST * 3);
10595 
10596   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10597   ins_encode %{
10598     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10599     __ fcseld(as_FloatRegister($dst$$reg),
10600               as_FloatRegister($src2$$reg),
10601               as_FloatRegister($src1$$reg),
10602               cond);
10603   %}
10604 
10605   ins_pipe(fp_cond_reg_reg_d);
10606 %}
10607 
10608 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
10609 %{
10610   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10611 
10612   ins_cost(INSN_COST * 3);
10613 
10614   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10615   ins_encode %{
10616     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10617     __ fcseld(as_FloatRegister($dst$$reg),
10618               as_FloatRegister($src2$$reg),
10619               as_FloatRegister($src1$$reg),
10620               cond);
10621   %}
10622 
10623   ins_pipe(fp_cond_reg_reg_d);
10624 %}
10625 
10626 // ============================================================================
10627 // Arithmetic Instructions
10628 //
10629 
10630 // Integer Addition
10631 
10632 // TODO
10633 // these currently employ operations which do not set CR and hence are
10634 // not flagged as killing CR but we would like to isolate the cases
10635 // where we want to set flags from those where we don't. need to work
10636 // out how to do that.
10637 
10638 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10639   match(Set dst (AddI src1 src2));
10640 
10641   ins_cost(INSN_COST);
10642   format %{ "addw  $dst, $src1, $src2" %}
10643 
10644   ins_encode %{
10645     __ addw(as_Register($dst$$reg),
10646             as_Register($src1$$reg),
10647             as_Register($src2$$reg));
10648   %}
10649 
10650   ins_pipe(ialu_reg_reg);
10651 %}
10652 
10653 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10654   match(Set dst (AddI src1 src2));
10655 
10656   ins_cost(INSN_COST);
10657   format %{ "addw $dst, $src1, $src2" %}
10658 
10659   // use opcode to indicate that this is an add not a sub
10660   opcode(0x0);
10661 
10662   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10663 
10664   ins_pipe(ialu_reg_imm);
10665 %}
10666 
10667 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
10668   match(Set dst (AddI (ConvL2I src1) src2));
10669 
10670   ins_cost(INSN_COST);
10671   format %{ "addw $dst, $src1, $src2" %}
10672 
10673   // use opcode to indicate that this is an add not a sub
10674   opcode(0x0);
10675 
10676   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10677 
10678   ins_pipe(ialu_reg_imm);
10679 %}
10680 
10681 // Pointer Addition
10682 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
10683   match(Set dst (AddP src1 src2));
10684 
10685   ins_cost(INSN_COST);
10686   format %{ "add $dst, $src1, $src2\t# ptr" %}
10687 
10688   ins_encode %{
10689     __ add(as_Register($dst$$reg),
10690            as_Register($src1$$reg),
10691            as_Register($src2$$reg));
10692   %}
10693 
10694   ins_pipe(ialu_reg_reg);
10695 %}
10696 
10697 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
10698   match(Set dst (AddP src1 (ConvI2L src2)));
10699 
10700   ins_cost(1.9 * INSN_COST);
10701   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
10702 
10703   ins_encode %{
10704     __ add(as_Register($dst$$reg),
10705            as_Register($src1$$reg),
10706            as_Register($src2$$reg), ext::sxtw);
10707   %}
10708 
10709   ins_pipe(ialu_reg_reg);
10710 %}
10711 
10712 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
10713   match(Set dst (AddP src1 (LShiftL src2 scale)));
10714 
10715   ins_cost(1.9 * INSN_COST);
10716   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
10717 
10718   ins_encode %{
10719     __ lea(as_Register($dst$$reg),
10720            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10721                    Address::lsl($scale$$constant)));
10722   %}
10723 
10724   ins_pipe(ialu_reg_reg_shift);
10725 %}
10726 
10727 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
10728   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
10729 
10730   ins_cost(1.9 * INSN_COST);
10731   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
10732 
10733   ins_encode %{
10734     __ lea(as_Register($dst$$reg),
10735            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10736                    Address::sxtw($scale$$constant)));
10737   %}
10738 
10739   ins_pipe(ialu_reg_reg_shift);
10740 %}
10741 
10742 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
10743   match(Set dst (LShiftL (ConvI2L src) scale));
10744 
10745   ins_cost(INSN_COST);
10746   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
10747 
10748   ins_encode %{
10749     __ sbfiz(as_Register($dst$$reg),
10750           as_Register($src$$reg),
10751           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
10752   %}
10753 
10754   ins_pipe(ialu_reg_shift);
10755 %}
10756 
10757 // Pointer Immediate Addition
10758 // n.b. this needs to be more expensive than using an indirect memory
10759 // operand
10760 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
10761   match(Set dst (AddP src1 src2));
10762 
10763   ins_cost(INSN_COST);
10764   format %{ "add $dst, $src1, $src2\t# ptr" %}
10765 
10766   // use opcode to indicate that this is an add not a sub
10767   opcode(0x0);
10768 
10769   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10770 
10771   ins_pipe(ialu_reg_imm);
10772 %}
10773 
10774 // Long Addition
10775 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10776 
10777   match(Set dst (AddL src1 src2));
10778 
10779   ins_cost(INSN_COST);
10780   format %{ "add  $dst, $src1, $src2" %}
10781 
10782   ins_encode %{
10783     __ add(as_Register($dst$$reg),
10784            as_Register($src1$$reg),
10785            as_Register($src2$$reg));
10786   %}
10787 
10788   ins_pipe(ialu_reg_reg);
10789 %}
10790 
10791 // No constant pool entries requiredLong Immediate Addition.
10792 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10793   match(Set dst (AddL src1 src2));
10794 
10795   ins_cost(INSN_COST);
10796   format %{ "add $dst, $src1, $src2" %}
10797 
10798   // use opcode to indicate that this is an add not a sub
10799   opcode(0x0);
10800 
10801   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10802 
10803   ins_pipe(ialu_reg_imm);
10804 %}
10805 
10806 // Integer Subtraction
10807 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10808   match(Set dst (SubI src1 src2));
10809 
10810   ins_cost(INSN_COST);
10811   format %{ "subw  $dst, $src1, $src2" %}
10812 
10813   ins_encode %{
10814     __ subw(as_Register($dst$$reg),
10815             as_Register($src1$$reg),
10816             as_Register($src2$$reg));
10817   %}
10818 
10819   ins_pipe(ialu_reg_reg);
10820 %}
10821 
10822 // Immediate Subtraction
10823 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10824   match(Set dst (SubI src1 src2));
10825 
10826   ins_cost(INSN_COST);
10827   format %{ "subw $dst, $src1, $src2" %}
10828 
10829   // use opcode to indicate that this is a sub not an add
10830   opcode(0x1);
10831 
10832   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10833 
10834   ins_pipe(ialu_reg_imm);
10835 %}
10836 
10837 // Long Subtraction
10838 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10839 
10840   match(Set dst (SubL src1 src2));
10841 
10842   ins_cost(INSN_COST);
10843   format %{ "sub  $dst, $src1, $src2" %}
10844 
10845   ins_encode %{
10846     __ sub(as_Register($dst$$reg),
10847            as_Register($src1$$reg),
10848            as_Register($src2$$reg));
10849   %}
10850 
10851   ins_pipe(ialu_reg_reg);
10852 %}
10853 
10854 // No constant pool entries requiredLong Immediate Subtraction.
10855 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10856   match(Set dst (SubL src1 src2));
10857 
10858   ins_cost(INSN_COST);
10859   format %{ "sub$dst, $src1, $src2" %}
10860 
10861   // use opcode to indicate that this is a sub not an add
10862   opcode(0x1);
10863 
10864   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10865 
10866   ins_pipe(ialu_reg_imm);
10867 %}
10868 
10869 // Integer Negation (special case for sub)
10870 
10871 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
10872   match(Set dst (SubI zero src));
10873 
10874   ins_cost(INSN_COST);
10875   format %{ "negw $dst, $src\t# int" %}
10876 
10877   ins_encode %{
10878     __ negw(as_Register($dst$$reg),
10879             as_Register($src$$reg));
10880   %}
10881 
10882   ins_pipe(ialu_reg);
10883 %}
10884 
10885 // Long Negation
10886 
10887 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
10888   match(Set dst (SubL zero src));
10889 
10890   ins_cost(INSN_COST);
10891   format %{ "neg $dst, $src\t# long" %}
10892 
10893   ins_encode %{
10894     __ neg(as_Register($dst$$reg),
10895            as_Register($src$$reg));
10896   %}
10897 
10898   ins_pipe(ialu_reg);
10899 %}
10900 
10901 // Integer Multiply
10902 
10903 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10904   match(Set dst (MulI src1 src2));
10905 
10906   ins_cost(INSN_COST * 3);
10907   format %{ "mulw  $dst, $src1, $src2" %}
10908 
10909   ins_encode %{
10910     __ mulw(as_Register($dst$$reg),
10911             as_Register($src1$$reg),
10912             as_Register($src2$$reg));
10913   %}
10914 
10915   ins_pipe(imul_reg_reg);
10916 %}
10917 
10918 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10919   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
10920 
10921   ins_cost(INSN_COST * 3);
10922   format %{ "smull  $dst, $src1, $src2" %}
10923 
10924   ins_encode %{
10925     __ smull(as_Register($dst$$reg),
10926              as_Register($src1$$reg),
10927              as_Register($src2$$reg));
10928   %}
10929 
10930   ins_pipe(imul_reg_reg);
10931 %}
10932 
10933 // Long Multiply
10934 
10935 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10936   match(Set dst (MulL src1 src2));
10937 
10938   ins_cost(INSN_COST * 5);
10939   format %{ "mul  $dst, $src1, $src2" %}
10940 
10941   ins_encode %{
10942     __ mul(as_Register($dst$$reg),
10943            as_Register($src1$$reg),
10944            as_Register($src2$$reg));
10945   %}
10946 
10947   ins_pipe(lmul_reg_reg);
10948 %}
10949 
10950 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
10951 %{
10952   match(Set dst (MulHiL src1 src2));
10953 
10954   ins_cost(INSN_COST * 7);
10955   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
10956 
10957   ins_encode %{
10958     __ smulh(as_Register($dst$$reg),
10959              as_Register($src1$$reg),
10960              as_Register($src2$$reg));
10961   %}
10962 
10963   ins_pipe(lmul_reg_reg);
10964 %}
10965 
10966 // Combined Integer Multiply & Add/Sub
10967 
10968 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10969   match(Set dst (AddI src3 (MulI src1 src2)));
10970 
10971   ins_cost(INSN_COST * 3);
10972   format %{ "madd  $dst, $src1, $src2, $src3" %}
10973 
10974   ins_encode %{
10975     __ maddw(as_Register($dst$$reg),
10976              as_Register($src1$$reg),
10977              as_Register($src2$$reg),
10978              as_Register($src3$$reg));
10979   %}
10980 
10981   ins_pipe(imac_reg_reg);
10982 %}
10983 
10984 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10985   match(Set dst (SubI src3 (MulI src1 src2)));
10986 
10987   ins_cost(INSN_COST * 3);
10988   format %{ "msub  $dst, $src1, $src2, $src3" %}
10989 
10990   ins_encode %{
10991     __ msubw(as_Register($dst$$reg),
10992              as_Register($src1$$reg),
10993              as_Register($src2$$reg),
10994              as_Register($src3$$reg));
10995   %}
10996 
10997   ins_pipe(imac_reg_reg);
10998 %}
10999 
11000 // Combined Long Multiply & Add/Sub
11001 
11002 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11003   match(Set dst (AddL src3 (MulL src1 src2)));
11004 
11005   ins_cost(INSN_COST * 5);
11006   format %{ "madd  $dst, $src1, $src2, $src3" %}
11007 
11008   ins_encode %{
11009     __ madd(as_Register($dst$$reg),
11010             as_Register($src1$$reg),
11011             as_Register($src2$$reg),
11012             as_Register($src3$$reg));
11013   %}
11014 
11015   ins_pipe(lmac_reg_reg);
11016 %}
11017 
11018 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11019   match(Set dst (SubL src3 (MulL src1 src2)));
11020 
11021   ins_cost(INSN_COST * 5);
11022   format %{ "msub  $dst, $src1, $src2, $src3" %}
11023 
11024   ins_encode %{
11025     __ msub(as_Register($dst$$reg),
11026             as_Register($src1$$reg),
11027             as_Register($src2$$reg),
11028             as_Register($src3$$reg));
11029   %}
11030 
11031   ins_pipe(lmac_reg_reg);
11032 %}
11033 
11034 // Integer Divide
11035 
11036 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11037   match(Set dst (DivI src1 src2));
11038 
11039   ins_cost(INSN_COST * 19);
11040   format %{ "sdivw  $dst, $src1, $src2" %}
11041 
11042   ins_encode(aarch64_enc_divw(dst, src1, src2));
11043   ins_pipe(idiv_reg_reg);
11044 %}
11045 
11046 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
11047   match(Set dst (URShiftI (RShiftI src1 div1) div2));
11048   ins_cost(INSN_COST);
11049   format %{ "lsrw $dst, $src1, $div1" %}
11050   ins_encode %{
11051     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
11052   %}
11053   ins_pipe(ialu_reg_shift);
11054 %}
11055 
11056 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
11057   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
11058   ins_cost(INSN_COST);
11059   format %{ "addw $dst, $src, LSR $div1" %}
11060 
11061   ins_encode %{
11062     __ addw(as_Register($dst$$reg),
11063               as_Register($src$$reg),
11064               as_Register($src$$reg),
11065               Assembler::LSR, 31);
11066   %}
11067   ins_pipe(ialu_reg);
11068 %}
11069 
11070 // Long Divide
11071 
11072 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11073   match(Set dst (DivL src1 src2));
11074 
11075   ins_cost(INSN_COST * 35);
11076   format %{ "sdiv   $dst, $src1, $src2" %}
11077 
11078   ins_encode(aarch64_enc_div(dst, src1, src2));
11079   ins_pipe(ldiv_reg_reg);
11080 %}
11081 
11082 instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
11083   match(Set dst (URShiftL (RShiftL src1 div1) div2));
11084   ins_cost(INSN_COST);
11085   format %{ "lsr $dst, $src1, $div1" %}
11086   ins_encode %{
11087     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
11088   %}
11089   ins_pipe(ialu_reg_shift);
11090 %}
11091 
11092 instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
11093   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
11094   ins_cost(INSN_COST);
11095   format %{ "add $dst, $src, $div1" %}
11096 
11097   ins_encode %{
11098     __ add(as_Register($dst$$reg),
11099               as_Register($src$$reg),
11100               as_Register($src$$reg),
11101               Assembler::LSR, 63);
11102   %}
11103   ins_pipe(ialu_reg);
11104 %}
11105 
11106 // Integer Remainder
11107 
11108 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11109   match(Set dst (ModI src1 src2));
11110 
11111   ins_cost(INSN_COST * 22);
11112   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
11113             "msubw($dst, rscratch1, $src2, $src1" %}
11114 
11115   ins_encode(aarch64_enc_modw(dst, src1, src2));
11116   ins_pipe(idiv_reg_reg);
11117 %}
11118 
11119 // Long Remainder
11120 
11121 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11122   match(Set dst (ModL src1 src2));
11123 
11124   ins_cost(INSN_COST * 38);
11125   format %{ "sdiv   rscratch1, $src1, $src2\n"
11126             "msub($dst, rscratch1, $src2, $src1" %}
11127 
11128   ins_encode(aarch64_enc_mod(dst, src1, src2));
11129   ins_pipe(ldiv_reg_reg);
11130 %}
11131 
11132 // Integer Shifts
11133 
11134 // Shift Left Register
11135 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11136   match(Set dst (LShiftI src1 src2));
11137 
11138   ins_cost(INSN_COST * 2);
11139   format %{ "lslvw  $dst, $src1, $src2" %}
11140 
11141   ins_encode %{
11142     __ lslvw(as_Register($dst$$reg),
11143              as_Register($src1$$reg),
11144              as_Register($src2$$reg));
11145   %}
11146 
11147   ins_pipe(ialu_reg_reg_vshift);
11148 %}
11149 
11150 // Shift Left Immediate
11151 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11152   match(Set dst (LShiftI src1 src2));
11153 
11154   ins_cost(INSN_COST);
11155   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
11156 
11157   ins_encode %{
11158     __ lslw(as_Register($dst$$reg),
11159             as_Register($src1$$reg),
11160             $src2$$constant & 0x1f);
11161   %}
11162 
11163   ins_pipe(ialu_reg_shift);
11164 %}
11165 
11166 // Shift Right Logical Register
11167 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11168   match(Set dst (URShiftI src1 src2));
11169 
11170   ins_cost(INSN_COST * 2);
11171   format %{ "lsrvw  $dst, $src1, $src2" %}
11172 
11173   ins_encode %{
11174     __ lsrvw(as_Register($dst$$reg),
11175              as_Register($src1$$reg),
11176              as_Register($src2$$reg));
11177   %}
11178 
11179   ins_pipe(ialu_reg_reg_vshift);
11180 %}
11181 
11182 // Shift Right Logical Immediate
11183 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11184   match(Set dst (URShiftI src1 src2));
11185 
11186   ins_cost(INSN_COST);
11187   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
11188 
11189   ins_encode %{
11190     __ lsrw(as_Register($dst$$reg),
11191             as_Register($src1$$reg),
11192             $src2$$constant & 0x1f);
11193   %}
11194 
11195   ins_pipe(ialu_reg_shift);
11196 %}
11197 
11198 // Shift Right Arithmetic Register
11199 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11200   match(Set dst (RShiftI src1 src2));
11201 
11202   ins_cost(INSN_COST * 2);
11203   format %{ "asrvw  $dst, $src1, $src2" %}
11204 
11205   ins_encode %{
11206     __ asrvw(as_Register($dst$$reg),
11207              as_Register($src1$$reg),
11208              as_Register($src2$$reg));
11209   %}
11210 
11211   ins_pipe(ialu_reg_reg_vshift);
11212 %}
11213 
11214 // Shift Right Arithmetic Immediate
11215 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11216   match(Set dst (RShiftI src1 src2));
11217 
11218   ins_cost(INSN_COST);
11219   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
11220 
11221   ins_encode %{
11222     __ asrw(as_Register($dst$$reg),
11223             as_Register($src1$$reg),
11224             $src2$$constant & 0x1f);
11225   %}
11226 
11227   ins_pipe(ialu_reg_shift);
11228 %}
11229 
11230 // Combined Int Mask and Right Shift (using UBFM)
11231 // TODO
11232 
11233 // Long Shifts
11234 
11235 // Shift Left Register
11236 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11237   match(Set dst (LShiftL src1 src2));
11238 
11239   ins_cost(INSN_COST * 2);
11240   format %{ "lslv  $dst, $src1, $src2" %}
11241 
11242   ins_encode %{
11243     __ lslv(as_Register($dst$$reg),
11244             as_Register($src1$$reg),
11245             as_Register($src2$$reg));
11246   %}
11247 
11248   ins_pipe(ialu_reg_reg_vshift);
11249 %}
11250 
11251 // Shift Left Immediate
11252 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11253   match(Set dst (LShiftL src1 src2));
11254 
11255   ins_cost(INSN_COST);
11256   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
11257 
11258   ins_encode %{
11259     __ lsl(as_Register($dst$$reg),
11260             as_Register($src1$$reg),
11261             $src2$$constant & 0x3f);
11262   %}
11263 
11264   ins_pipe(ialu_reg_shift);
11265 %}
11266 
11267 // Shift Right Logical Register
11268 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11269   match(Set dst (URShiftL src1 src2));
11270 
11271   ins_cost(INSN_COST * 2);
11272   format %{ "lsrv  $dst, $src1, $src2" %}
11273 
11274   ins_encode %{
11275     __ lsrv(as_Register($dst$$reg),
11276             as_Register($src1$$reg),
11277             as_Register($src2$$reg));
11278   %}
11279 
11280   ins_pipe(ialu_reg_reg_vshift);
11281 %}
11282 
11283 // Shift Right Logical Immediate
11284 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11285   match(Set dst (URShiftL src1 src2));
11286 
11287   ins_cost(INSN_COST);
11288   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
11289 
11290   ins_encode %{
11291     __ lsr(as_Register($dst$$reg),
11292            as_Register($src1$$reg),
11293            $src2$$constant & 0x3f);
11294   %}
11295 
11296   ins_pipe(ialu_reg_shift);
11297 %}
11298 
11299 // A special-case pattern for card table stores.
11300 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
11301   match(Set dst (URShiftL (CastP2X src1) src2));
11302 
11303   ins_cost(INSN_COST);
11304   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
11305 
11306   ins_encode %{
11307     __ lsr(as_Register($dst$$reg),
11308            as_Register($src1$$reg),
11309            $src2$$constant & 0x3f);
11310   %}
11311 
11312   ins_pipe(ialu_reg_shift);
11313 %}
11314 
11315 // Shift Right Arithmetic Register
11316 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11317   match(Set dst (RShiftL src1 src2));
11318 
11319   ins_cost(INSN_COST * 2);
11320   format %{ "asrv  $dst, $src1, $src2" %}
11321 
11322   ins_encode %{
11323     __ asrv(as_Register($dst$$reg),
11324             as_Register($src1$$reg),
11325             as_Register($src2$$reg));
11326   %}
11327 
11328   ins_pipe(ialu_reg_reg_vshift);
11329 %}
11330 
11331 // Shift Right Arithmetic Immediate
11332 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11333   match(Set dst (RShiftL src1 src2));
11334 
11335   ins_cost(INSN_COST);
11336   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
11337 
11338   ins_encode %{
11339     __ asr(as_Register($dst$$reg),
11340            as_Register($src1$$reg),
11341            $src2$$constant & 0x3f);
11342   %}
11343 
11344   ins_pipe(ialu_reg_shift);
11345 %}
11346 
11347 // BEGIN This section of the file is automatically generated. Do not edit --------------
11348 
11349 instruct regL_not_reg(iRegLNoSp dst,
11350                          iRegL src1, immL_M1 m1,
11351                          rFlagsReg cr) %{
11352   match(Set dst (XorL src1 m1));
11353   ins_cost(INSN_COST);
11354   format %{ "eon  $dst, $src1, zr" %}
11355 
11356   ins_encode %{
11357     __ eon(as_Register($dst$$reg),
11358               as_Register($src1$$reg),
11359               zr,
11360               Assembler::LSL, 0);
11361   %}
11362 
11363   ins_pipe(ialu_reg);
11364 %}
11365 instruct regI_not_reg(iRegINoSp dst,
11366                          iRegIorL2I src1, immI_M1 m1,
11367                          rFlagsReg cr) %{
11368   match(Set dst (XorI src1 m1));
11369   ins_cost(INSN_COST);
11370   format %{ "eonw  $dst, $src1, zr" %}
11371 
11372   ins_encode %{
11373     __ eonw(as_Register($dst$$reg),
11374               as_Register($src1$$reg),
11375               zr,
11376               Assembler::LSL, 0);
11377   %}
11378 
11379   ins_pipe(ialu_reg);
11380 %}
11381 
11382 instruct AndI_reg_not_reg(iRegINoSp dst,
11383                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11384                          rFlagsReg cr) %{
11385   match(Set dst (AndI src1 (XorI src2 m1)));
11386   ins_cost(INSN_COST);
11387   format %{ "bicw  $dst, $src1, $src2" %}
11388 
11389   ins_encode %{
11390     __ bicw(as_Register($dst$$reg),
11391               as_Register($src1$$reg),
11392               as_Register($src2$$reg),
11393               Assembler::LSL, 0);
11394   %}
11395 
11396   ins_pipe(ialu_reg_reg);
11397 %}
11398 
11399 instruct AndL_reg_not_reg(iRegLNoSp dst,
11400                          iRegL src1, iRegL src2, immL_M1 m1,
11401                          rFlagsReg cr) %{
11402   match(Set dst (AndL src1 (XorL src2 m1)));
11403   ins_cost(INSN_COST);
11404   format %{ "bic  $dst, $src1, $src2" %}
11405 
11406   ins_encode %{
11407     __ bic(as_Register($dst$$reg),
11408               as_Register($src1$$reg),
11409               as_Register($src2$$reg),
11410               Assembler::LSL, 0);
11411   %}
11412 
11413   ins_pipe(ialu_reg_reg);
11414 %}
11415 
11416 instruct OrI_reg_not_reg(iRegINoSp dst,
11417                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11418                          rFlagsReg cr) %{
11419   match(Set dst (OrI src1 (XorI src2 m1)));
11420   ins_cost(INSN_COST);
11421   format %{ "ornw  $dst, $src1, $src2" %}
11422 
11423   ins_encode %{
11424     __ ornw(as_Register($dst$$reg),
11425               as_Register($src1$$reg),
11426               as_Register($src2$$reg),
11427               Assembler::LSL, 0);
11428   %}
11429 
11430   ins_pipe(ialu_reg_reg);
11431 %}
11432 
11433 instruct OrL_reg_not_reg(iRegLNoSp dst,
11434                          iRegL src1, iRegL src2, immL_M1 m1,
11435                          rFlagsReg cr) %{
11436   match(Set dst (OrL src1 (XorL src2 m1)));
11437   ins_cost(INSN_COST);
11438   format %{ "orn  $dst, $src1, $src2" %}
11439 
11440   ins_encode %{
11441     __ orn(as_Register($dst$$reg),
11442               as_Register($src1$$reg),
11443               as_Register($src2$$reg),
11444               Assembler::LSL, 0);
11445   %}
11446 
11447   ins_pipe(ialu_reg_reg);
11448 %}
11449 
11450 instruct XorI_reg_not_reg(iRegINoSp dst,
11451                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11452                          rFlagsReg cr) %{
11453   match(Set dst (XorI m1 (XorI src2 src1)));
11454   ins_cost(INSN_COST);
11455   format %{ "eonw  $dst, $src1, $src2" %}
11456 
11457   ins_encode %{
11458     __ eonw(as_Register($dst$$reg),
11459               as_Register($src1$$reg),
11460               as_Register($src2$$reg),
11461               Assembler::LSL, 0);
11462   %}
11463 
11464   ins_pipe(ialu_reg_reg);
11465 %}
11466 
11467 instruct XorL_reg_not_reg(iRegLNoSp dst,
11468                          iRegL src1, iRegL src2, immL_M1 m1,
11469                          rFlagsReg cr) %{
11470   match(Set dst (XorL m1 (XorL src2 src1)));
11471   ins_cost(INSN_COST);
11472   format %{ "eon  $dst, $src1, $src2" %}
11473 
11474   ins_encode %{
11475     __ eon(as_Register($dst$$reg),
11476               as_Register($src1$$reg),
11477               as_Register($src2$$reg),
11478               Assembler::LSL, 0);
11479   %}
11480 
11481   ins_pipe(ialu_reg_reg);
11482 %}
11483 
11484 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
11485                          iRegIorL2I src1, iRegIorL2I src2,
11486                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11487   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
11488   ins_cost(1.9 * INSN_COST);
11489   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
11490 
11491   ins_encode %{
11492     __ bicw(as_Register($dst$$reg),
11493               as_Register($src1$$reg),
11494               as_Register($src2$$reg),
11495               Assembler::LSR,
11496               $src3$$constant & 0x1f);
11497   %}
11498 
11499   ins_pipe(ialu_reg_reg_shift);
11500 %}
11501 
11502 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
11503                          iRegL src1, iRegL src2,
11504                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11505   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
11506   ins_cost(1.9 * INSN_COST);
11507   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
11508 
11509   ins_encode %{
11510     __ bic(as_Register($dst$$reg),
11511               as_Register($src1$$reg),
11512               as_Register($src2$$reg),
11513               Assembler::LSR,
11514               $src3$$constant & 0x3f);
11515   %}
11516 
11517   ins_pipe(ialu_reg_reg_shift);
11518 %}
11519 
11520 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11521                          iRegIorL2I src1, iRegIorL2I src2,
11522                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11523   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11524   ins_cost(1.9 * INSN_COST);
11525   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
11526 
11527   ins_encode %{
11528     __ bicw(as_Register($dst$$reg),
11529               as_Register($src1$$reg),
11530               as_Register($src2$$reg),
11531               Assembler::ASR,
11532               $src3$$constant & 0x1f);
11533   %}
11534 
11535   ins_pipe(ialu_reg_reg_shift);
11536 %}
11537 
11538 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11539                          iRegL src1, iRegL src2,
11540                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11541   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11542   ins_cost(1.9 * INSN_COST);
11543   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
11544 
11545   ins_encode %{
11546     __ bic(as_Register($dst$$reg),
11547               as_Register($src1$$reg),
11548               as_Register($src2$$reg),
11549               Assembler::ASR,
11550               $src3$$constant & 0x3f);
11551   %}
11552 
11553   ins_pipe(ialu_reg_reg_shift);
11554 %}
11555 
11556 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11557                          iRegIorL2I src1, iRegIorL2I src2,
11558                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11559   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11560   ins_cost(1.9 * INSN_COST);
11561   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
11562 
11563   ins_encode %{
11564     __ bicw(as_Register($dst$$reg),
11565               as_Register($src1$$reg),
11566               as_Register($src2$$reg),
11567               Assembler::LSL,
11568               $src3$$constant & 0x1f);
11569   %}
11570 
11571   ins_pipe(ialu_reg_reg_shift);
11572 %}
11573 
11574 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11575                          iRegL src1, iRegL src2,
11576                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11577   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11578   ins_cost(1.9 * INSN_COST);
11579   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
11580 
11581   ins_encode %{
11582     __ bic(as_Register($dst$$reg),
11583               as_Register($src1$$reg),
11584               as_Register($src2$$reg),
11585               Assembler::LSL,
11586               $src3$$constant & 0x3f);
11587   %}
11588 
11589   ins_pipe(ialu_reg_reg_shift);
11590 %}
11591 
11592 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11593                          iRegIorL2I src1, iRegIorL2I src2,
11594                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11595   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11596   ins_cost(1.9 * INSN_COST);
11597   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
11598 
11599   ins_encode %{
11600     __ eonw(as_Register($dst$$reg),
11601               as_Register($src1$$reg),
11602               as_Register($src2$$reg),
11603               Assembler::LSR,
11604               $src3$$constant & 0x1f);
11605   %}
11606 
11607   ins_pipe(ialu_reg_reg_shift);
11608 %}
11609 
11610 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11611                          iRegL src1, iRegL src2,
11612                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11613   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11614   ins_cost(1.9 * INSN_COST);
11615   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
11616 
11617   ins_encode %{
11618     __ eon(as_Register($dst$$reg),
11619               as_Register($src1$$reg),
11620               as_Register($src2$$reg),
11621               Assembler::LSR,
11622               $src3$$constant & 0x3f);
11623   %}
11624 
11625   ins_pipe(ialu_reg_reg_shift);
11626 %}
11627 
11628 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
11629                          iRegIorL2I src1, iRegIorL2I src2,
11630                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11631   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
11632   ins_cost(1.9 * INSN_COST);
11633   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
11634 
11635   ins_encode %{
11636     __ eonw(as_Register($dst$$reg),
11637               as_Register($src1$$reg),
11638               as_Register($src2$$reg),
11639               Assembler::ASR,
11640               $src3$$constant & 0x1f);
11641   %}
11642 
11643   ins_pipe(ialu_reg_reg_shift);
11644 %}
11645 
11646 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
11647                          iRegL src1, iRegL src2,
11648                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11649   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
11650   ins_cost(1.9 * INSN_COST);
11651   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
11652 
11653   ins_encode %{
11654     __ eon(as_Register($dst$$reg),
11655               as_Register($src1$$reg),
11656               as_Register($src2$$reg),
11657               Assembler::ASR,
11658               $src3$$constant & 0x3f);
11659   %}
11660 
11661   ins_pipe(ialu_reg_reg_shift);
11662 %}
11663 
11664 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
11665                          iRegIorL2I src1, iRegIorL2I src2,
11666                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11667   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
11668   ins_cost(1.9 * INSN_COST);
11669   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
11670 
11671   ins_encode %{
11672     __ eonw(as_Register($dst$$reg),
11673               as_Register($src1$$reg),
11674               as_Register($src2$$reg),
11675               Assembler::LSL,
11676               $src3$$constant & 0x1f);
11677   %}
11678 
11679   ins_pipe(ialu_reg_reg_shift);
11680 %}
11681 
11682 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
11683                          iRegL src1, iRegL src2,
11684                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11685   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
11686   ins_cost(1.9 * INSN_COST);
11687   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
11688 
11689   ins_encode %{
11690     __ eon(as_Register($dst$$reg),
11691               as_Register($src1$$reg),
11692               as_Register($src2$$reg),
11693               Assembler::LSL,
11694               $src3$$constant & 0x3f);
11695   %}
11696 
11697   ins_pipe(ialu_reg_reg_shift);
11698 %}
11699 
11700 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
11701                          iRegIorL2I src1, iRegIorL2I src2,
11702                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11703   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
11704   ins_cost(1.9 * INSN_COST);
11705   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
11706 
11707   ins_encode %{
11708     __ ornw(as_Register($dst$$reg),
11709               as_Register($src1$$reg),
11710               as_Register($src2$$reg),
11711               Assembler::LSR,
11712               $src3$$constant & 0x1f);
11713   %}
11714 
11715   ins_pipe(ialu_reg_reg_shift);
11716 %}
11717 
11718 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
11719                          iRegL src1, iRegL src2,
11720                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11721   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
11722   ins_cost(1.9 * INSN_COST);
11723   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
11724 
11725   ins_encode %{
11726     __ orn(as_Register($dst$$reg),
11727               as_Register($src1$$reg),
11728               as_Register($src2$$reg),
11729               Assembler::LSR,
11730               $src3$$constant & 0x3f);
11731   %}
11732 
11733   ins_pipe(ialu_reg_reg_shift);
11734 %}
11735 
11736 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
11737                          iRegIorL2I src1, iRegIorL2I src2,
11738                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11739   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
11740   ins_cost(1.9 * INSN_COST);
11741   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
11742 
11743   ins_encode %{
11744     __ ornw(as_Register($dst$$reg),
11745               as_Register($src1$$reg),
11746               as_Register($src2$$reg),
11747               Assembler::ASR,
11748               $src3$$constant & 0x1f);
11749   %}
11750 
11751   ins_pipe(ialu_reg_reg_shift);
11752 %}
11753 
11754 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
11755                          iRegL src1, iRegL src2,
11756                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11757   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
11758   ins_cost(1.9 * INSN_COST);
11759   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
11760 
11761   ins_encode %{
11762     __ orn(as_Register($dst$$reg),
11763               as_Register($src1$$reg),
11764               as_Register($src2$$reg),
11765               Assembler::ASR,
11766               $src3$$constant & 0x3f);
11767   %}
11768 
11769   ins_pipe(ialu_reg_reg_shift);
11770 %}
11771 
11772 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
11773                          iRegIorL2I src1, iRegIorL2I src2,
11774                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11775   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
11776   ins_cost(1.9 * INSN_COST);
11777   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
11778 
11779   ins_encode %{
11780     __ ornw(as_Register($dst$$reg),
11781               as_Register($src1$$reg),
11782               as_Register($src2$$reg),
11783               Assembler::LSL,
11784               $src3$$constant & 0x1f);
11785   %}
11786 
11787   ins_pipe(ialu_reg_reg_shift);
11788 %}
11789 
11790 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
11791                          iRegL src1, iRegL src2,
11792                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11793   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
11794   ins_cost(1.9 * INSN_COST);
11795   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
11796 
11797   ins_encode %{
11798     __ orn(as_Register($dst$$reg),
11799               as_Register($src1$$reg),
11800               as_Register($src2$$reg),
11801               Assembler::LSL,
11802               $src3$$constant & 0x3f);
11803   %}
11804 
11805   ins_pipe(ialu_reg_reg_shift);
11806 %}
11807 
11808 instruct AndI_reg_URShift_reg(iRegINoSp dst,
11809                          iRegIorL2I src1, iRegIorL2I src2,
11810                          immI src3, rFlagsReg cr) %{
11811   match(Set dst (AndI src1 (URShiftI src2 src3)));
11812 
11813   ins_cost(1.9 * INSN_COST);
11814   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
11815 
11816   ins_encode %{
11817     __ andw(as_Register($dst$$reg),
11818               as_Register($src1$$reg),
11819               as_Register($src2$$reg),
11820               Assembler::LSR,
11821               $src3$$constant & 0x1f);
11822   %}
11823 
11824   ins_pipe(ialu_reg_reg_shift);
11825 %}
11826 
11827 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
11828                          iRegL src1, iRegL src2,
11829                          immI src3, rFlagsReg cr) %{
11830   match(Set dst (AndL src1 (URShiftL src2 src3)));
11831 
11832   ins_cost(1.9 * INSN_COST);
11833   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
11834 
11835   ins_encode %{
11836     __ andr(as_Register($dst$$reg),
11837               as_Register($src1$$reg),
11838               as_Register($src2$$reg),
11839               Assembler::LSR,
11840               $src3$$constant & 0x3f);
11841   %}
11842 
11843   ins_pipe(ialu_reg_reg_shift);
11844 %}
11845 
11846 instruct AndI_reg_RShift_reg(iRegINoSp dst,
11847                          iRegIorL2I src1, iRegIorL2I src2,
11848                          immI src3, rFlagsReg cr) %{
11849   match(Set dst (AndI src1 (RShiftI src2 src3)));
11850 
11851   ins_cost(1.9 * INSN_COST);
11852   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
11853 
11854   ins_encode %{
11855     __ andw(as_Register($dst$$reg),
11856               as_Register($src1$$reg),
11857               as_Register($src2$$reg),
11858               Assembler::ASR,
11859               $src3$$constant & 0x1f);
11860   %}
11861 
11862   ins_pipe(ialu_reg_reg_shift);
11863 %}
11864 
11865 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
11866                          iRegL src1, iRegL src2,
11867                          immI src3, rFlagsReg cr) %{
11868   match(Set dst (AndL src1 (RShiftL src2 src3)));
11869 
11870   ins_cost(1.9 * INSN_COST);
11871   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
11872 
11873   ins_encode %{
11874     __ andr(as_Register($dst$$reg),
11875               as_Register($src1$$reg),
11876               as_Register($src2$$reg),
11877               Assembler::ASR,
11878               $src3$$constant & 0x3f);
11879   %}
11880 
11881   ins_pipe(ialu_reg_reg_shift);
11882 %}
11883 
11884 instruct AndI_reg_LShift_reg(iRegINoSp dst,
11885                          iRegIorL2I src1, iRegIorL2I src2,
11886                          immI src3, rFlagsReg cr) %{
11887   match(Set dst (AndI src1 (LShiftI src2 src3)));
11888 
11889   ins_cost(1.9 * INSN_COST);
11890   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
11891 
11892   ins_encode %{
11893     __ andw(as_Register($dst$$reg),
11894               as_Register($src1$$reg),
11895               as_Register($src2$$reg),
11896               Assembler::LSL,
11897               $src3$$constant & 0x1f);
11898   %}
11899 
11900   ins_pipe(ialu_reg_reg_shift);
11901 %}
11902 
11903 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
11904                          iRegL src1, iRegL src2,
11905                          immI src3, rFlagsReg cr) %{
11906   match(Set dst (AndL src1 (LShiftL src2 src3)));
11907 
11908   ins_cost(1.9 * INSN_COST);
11909   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
11910 
11911   ins_encode %{
11912     __ andr(as_Register($dst$$reg),
11913               as_Register($src1$$reg),
11914               as_Register($src2$$reg),
11915               Assembler::LSL,
11916               $src3$$constant & 0x3f);
11917   %}
11918 
11919   ins_pipe(ialu_reg_reg_shift);
11920 %}
11921 
11922 instruct XorI_reg_URShift_reg(iRegINoSp dst,
11923                          iRegIorL2I src1, iRegIorL2I src2,
11924                          immI src3, rFlagsReg cr) %{
11925   match(Set dst (XorI src1 (URShiftI src2 src3)));
11926 
11927   ins_cost(1.9 * INSN_COST);
11928   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
11929 
11930   ins_encode %{
11931     __ eorw(as_Register($dst$$reg),
11932               as_Register($src1$$reg),
11933               as_Register($src2$$reg),
11934               Assembler::LSR,
11935               $src3$$constant & 0x1f);
11936   %}
11937 
11938   ins_pipe(ialu_reg_reg_shift);
11939 %}
11940 
11941 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
11942                          iRegL src1, iRegL src2,
11943                          immI src3, rFlagsReg cr) %{
11944   match(Set dst (XorL src1 (URShiftL src2 src3)));
11945 
11946   ins_cost(1.9 * INSN_COST);
11947   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
11948 
11949   ins_encode %{
11950     __ eor(as_Register($dst$$reg),
11951               as_Register($src1$$reg),
11952               as_Register($src2$$reg),
11953               Assembler::LSR,
11954               $src3$$constant & 0x3f);
11955   %}
11956 
11957   ins_pipe(ialu_reg_reg_shift);
11958 %}
11959 
11960 instruct XorI_reg_RShift_reg(iRegINoSp dst,
11961                          iRegIorL2I src1, iRegIorL2I src2,
11962                          immI src3, rFlagsReg cr) %{
11963   match(Set dst (XorI src1 (RShiftI src2 src3)));
11964 
11965   ins_cost(1.9 * INSN_COST);
11966   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
11967 
11968   ins_encode %{
11969     __ eorw(as_Register($dst$$reg),
11970               as_Register($src1$$reg),
11971               as_Register($src2$$reg),
11972               Assembler::ASR,
11973               $src3$$constant & 0x1f);
11974   %}
11975 
11976   ins_pipe(ialu_reg_reg_shift);
11977 %}
11978 
11979 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
11980                          iRegL src1, iRegL src2,
11981                          immI src3, rFlagsReg cr) %{
11982   match(Set dst (XorL src1 (RShiftL src2 src3)));
11983 
11984   ins_cost(1.9 * INSN_COST);
11985   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
11986 
11987   ins_encode %{
11988     __ eor(as_Register($dst$$reg),
11989               as_Register($src1$$reg),
11990               as_Register($src2$$reg),
11991               Assembler::ASR,
11992               $src3$$constant & 0x3f);
11993   %}
11994 
11995   ins_pipe(ialu_reg_reg_shift);
11996 %}
11997 
11998 instruct XorI_reg_LShift_reg(iRegINoSp dst,
11999                          iRegIorL2I src1, iRegIorL2I src2,
12000                          immI src3, rFlagsReg cr) %{
12001   match(Set dst (XorI src1 (LShiftI src2 src3)));
12002 
12003   ins_cost(1.9 * INSN_COST);
12004   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
12005 
12006   ins_encode %{
12007     __ eorw(as_Register($dst$$reg),
12008               as_Register($src1$$reg),
12009               as_Register($src2$$reg),
12010               Assembler::LSL,
12011               $src3$$constant & 0x1f);
12012   %}
12013 
12014   ins_pipe(ialu_reg_reg_shift);
12015 %}
12016 
12017 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
12018                          iRegL src1, iRegL src2,
12019                          immI src3, rFlagsReg cr) %{
12020   match(Set dst (XorL src1 (LShiftL src2 src3)));
12021 
12022   ins_cost(1.9 * INSN_COST);
12023   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
12024 
12025   ins_encode %{
12026     __ eor(as_Register($dst$$reg),
12027               as_Register($src1$$reg),
12028               as_Register($src2$$reg),
12029               Assembler::LSL,
12030               $src3$$constant & 0x3f);
12031   %}
12032 
12033   ins_pipe(ialu_reg_reg_shift);
12034 %}
12035 
12036 instruct OrI_reg_URShift_reg(iRegINoSp dst,
12037                          iRegIorL2I src1, iRegIorL2I src2,
12038                          immI src3, rFlagsReg cr) %{
12039   match(Set dst (OrI src1 (URShiftI src2 src3)));
12040 
12041   ins_cost(1.9 * INSN_COST);
12042   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
12043 
12044   ins_encode %{
12045     __ orrw(as_Register($dst$$reg),
12046               as_Register($src1$$reg),
12047               as_Register($src2$$reg),
12048               Assembler::LSR,
12049               $src3$$constant & 0x1f);
12050   %}
12051 
12052   ins_pipe(ialu_reg_reg_shift);
12053 %}
12054 
12055 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
12056                          iRegL src1, iRegL src2,
12057                          immI src3, rFlagsReg cr) %{
12058   match(Set dst (OrL src1 (URShiftL src2 src3)));
12059 
12060   ins_cost(1.9 * INSN_COST);
12061   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
12062 
12063   ins_encode %{
12064     __ orr(as_Register($dst$$reg),
12065               as_Register($src1$$reg),
12066               as_Register($src2$$reg),
12067               Assembler::LSR,
12068               $src3$$constant & 0x3f);
12069   %}
12070 
12071   ins_pipe(ialu_reg_reg_shift);
12072 %}
12073 
12074 instruct OrI_reg_RShift_reg(iRegINoSp dst,
12075                          iRegIorL2I src1, iRegIorL2I src2,
12076                          immI src3, rFlagsReg cr) %{
12077   match(Set dst (OrI src1 (RShiftI src2 src3)));
12078 
12079   ins_cost(1.9 * INSN_COST);
12080   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
12081 
12082   ins_encode %{
12083     __ orrw(as_Register($dst$$reg),
12084               as_Register($src1$$reg),
12085               as_Register($src2$$reg),
12086               Assembler::ASR,
12087               $src3$$constant & 0x1f);
12088   %}
12089 
12090   ins_pipe(ialu_reg_reg_shift);
12091 %}
12092 
12093 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
12094                          iRegL src1, iRegL src2,
12095                          immI src3, rFlagsReg cr) %{
12096   match(Set dst (OrL src1 (RShiftL src2 src3)));
12097 
12098   ins_cost(1.9 * INSN_COST);
12099   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
12100 
12101   ins_encode %{
12102     __ orr(as_Register($dst$$reg),
12103               as_Register($src1$$reg),
12104               as_Register($src2$$reg),
12105               Assembler::ASR,
12106               $src3$$constant & 0x3f);
12107   %}
12108 
12109   ins_pipe(ialu_reg_reg_shift);
12110 %}
12111 
12112 instruct OrI_reg_LShift_reg(iRegINoSp dst,
12113                          iRegIorL2I src1, iRegIorL2I src2,
12114                          immI src3, rFlagsReg cr) %{
12115   match(Set dst (OrI src1 (LShiftI src2 src3)));
12116 
12117   ins_cost(1.9 * INSN_COST);
12118   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
12119 
12120   ins_encode %{
12121     __ orrw(as_Register($dst$$reg),
12122               as_Register($src1$$reg),
12123               as_Register($src2$$reg),
12124               Assembler::LSL,
12125               $src3$$constant & 0x1f);
12126   %}
12127 
12128   ins_pipe(ialu_reg_reg_shift);
12129 %}
12130 
12131 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
12132                          iRegL src1, iRegL src2,
12133                          immI src3, rFlagsReg cr) %{
12134   match(Set dst (OrL src1 (LShiftL src2 src3)));
12135 
12136   ins_cost(1.9 * INSN_COST);
12137   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
12138 
12139   ins_encode %{
12140     __ orr(as_Register($dst$$reg),
12141               as_Register($src1$$reg),
12142               as_Register($src2$$reg),
12143               Assembler::LSL,
12144               $src3$$constant & 0x3f);
12145   %}
12146 
12147   ins_pipe(ialu_reg_reg_shift);
12148 %}
12149 
12150 instruct AddI_reg_URShift_reg(iRegINoSp dst,
12151                          iRegIorL2I src1, iRegIorL2I src2,
12152                          immI src3, rFlagsReg cr) %{
12153   match(Set dst (AddI src1 (URShiftI src2 src3)));
12154 
12155   ins_cost(1.9 * INSN_COST);
12156   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
12157 
12158   ins_encode %{
12159     __ addw(as_Register($dst$$reg),
12160               as_Register($src1$$reg),
12161               as_Register($src2$$reg),
12162               Assembler::LSR,
12163               $src3$$constant & 0x1f);
12164   %}
12165 
12166   ins_pipe(ialu_reg_reg_shift);
12167 %}
12168 
12169 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
12170                          iRegL src1, iRegL src2,
12171                          immI src3, rFlagsReg cr) %{
12172   match(Set dst (AddL src1 (URShiftL src2 src3)));
12173 
12174   ins_cost(1.9 * INSN_COST);
12175   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
12176 
12177   ins_encode %{
12178     __ add(as_Register($dst$$reg),
12179               as_Register($src1$$reg),
12180               as_Register($src2$$reg),
12181               Assembler::LSR,
12182               $src3$$constant & 0x3f);
12183   %}
12184 
12185   ins_pipe(ialu_reg_reg_shift);
12186 %}
12187 
12188 instruct AddI_reg_RShift_reg(iRegINoSp dst,
12189                          iRegIorL2I src1, iRegIorL2I src2,
12190                          immI src3, rFlagsReg cr) %{
12191   match(Set dst (AddI src1 (RShiftI src2 src3)));
12192 
12193   ins_cost(1.9 * INSN_COST);
12194   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
12195 
12196   ins_encode %{
12197     __ addw(as_Register($dst$$reg),
12198               as_Register($src1$$reg),
12199               as_Register($src2$$reg),
12200               Assembler::ASR,
12201               $src3$$constant & 0x1f);
12202   %}
12203 
12204   ins_pipe(ialu_reg_reg_shift);
12205 %}
12206 
12207 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
12208                          iRegL src1, iRegL src2,
12209                          immI src3, rFlagsReg cr) %{
12210   match(Set dst (AddL src1 (RShiftL src2 src3)));
12211 
12212   ins_cost(1.9 * INSN_COST);
12213   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
12214 
12215   ins_encode %{
12216     __ add(as_Register($dst$$reg),
12217               as_Register($src1$$reg),
12218               as_Register($src2$$reg),
12219               Assembler::ASR,
12220               $src3$$constant & 0x3f);
12221   %}
12222 
12223   ins_pipe(ialu_reg_reg_shift);
12224 %}
12225 
12226 instruct AddI_reg_LShift_reg(iRegINoSp dst,
12227                          iRegIorL2I src1, iRegIorL2I src2,
12228                          immI src3, rFlagsReg cr) %{
12229   match(Set dst (AddI src1 (LShiftI src2 src3)));
12230 
12231   ins_cost(1.9 * INSN_COST);
12232   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
12233 
12234   ins_encode %{
12235     __ addw(as_Register($dst$$reg),
12236               as_Register($src1$$reg),
12237               as_Register($src2$$reg),
12238               Assembler::LSL,
12239               $src3$$constant & 0x1f);
12240   %}
12241 
12242   ins_pipe(ialu_reg_reg_shift);
12243 %}
12244 
12245 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
12246                          iRegL src1, iRegL src2,
12247                          immI src3, rFlagsReg cr) %{
12248   match(Set dst (AddL src1 (LShiftL src2 src3)));
12249 
12250   ins_cost(1.9 * INSN_COST);
12251   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
12252 
12253   ins_encode %{
12254     __ add(as_Register($dst$$reg),
12255               as_Register($src1$$reg),
12256               as_Register($src2$$reg),
12257               Assembler::LSL,
12258               $src3$$constant & 0x3f);
12259   %}
12260 
12261   ins_pipe(ialu_reg_reg_shift);
12262 %}
12263 
12264 instruct SubI_reg_URShift_reg(iRegINoSp dst,
12265                          iRegIorL2I src1, iRegIorL2I src2,
12266                          immI src3, rFlagsReg cr) %{
12267   match(Set dst (SubI src1 (URShiftI src2 src3)));
12268 
12269   ins_cost(1.9 * INSN_COST);
12270   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
12271 
12272   ins_encode %{
12273     __ subw(as_Register($dst$$reg),
12274               as_Register($src1$$reg),
12275               as_Register($src2$$reg),
12276               Assembler::LSR,
12277               $src3$$constant & 0x1f);
12278   %}
12279 
12280   ins_pipe(ialu_reg_reg_shift);
12281 %}
12282 
12283 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
12284                          iRegL src1, iRegL src2,
12285                          immI src3, rFlagsReg cr) %{
12286   match(Set dst (SubL src1 (URShiftL src2 src3)));
12287 
12288   ins_cost(1.9 * INSN_COST);
12289   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
12290 
12291   ins_encode %{
12292     __ sub(as_Register($dst$$reg),
12293               as_Register($src1$$reg),
12294               as_Register($src2$$reg),
12295               Assembler::LSR,
12296               $src3$$constant & 0x3f);
12297   %}
12298 
12299   ins_pipe(ialu_reg_reg_shift);
12300 %}
12301 
12302 instruct SubI_reg_RShift_reg(iRegINoSp dst,
12303                          iRegIorL2I src1, iRegIorL2I src2,
12304                          immI src3, rFlagsReg cr) %{
12305   match(Set dst (SubI src1 (RShiftI src2 src3)));
12306 
12307   ins_cost(1.9 * INSN_COST);
12308   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
12309 
12310   ins_encode %{
12311     __ subw(as_Register($dst$$reg),
12312               as_Register($src1$$reg),
12313               as_Register($src2$$reg),
12314               Assembler::ASR,
12315               $src3$$constant & 0x1f);
12316   %}
12317 
12318   ins_pipe(ialu_reg_reg_shift);
12319 %}
12320 
12321 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
12322                          iRegL src1, iRegL src2,
12323                          immI src3, rFlagsReg cr) %{
12324   match(Set dst (SubL src1 (RShiftL src2 src3)));
12325 
12326   ins_cost(1.9 * INSN_COST);
12327   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
12328 
12329   ins_encode %{
12330     __ sub(as_Register($dst$$reg),
12331               as_Register($src1$$reg),
12332               as_Register($src2$$reg),
12333               Assembler::ASR,
12334               $src3$$constant & 0x3f);
12335   %}
12336 
12337   ins_pipe(ialu_reg_reg_shift);
12338 %}
12339 
12340 instruct SubI_reg_LShift_reg(iRegINoSp dst,
12341                          iRegIorL2I src1, iRegIorL2I src2,
12342                          immI src3, rFlagsReg cr) %{
12343   match(Set dst (SubI src1 (LShiftI src2 src3)));
12344 
12345   ins_cost(1.9 * INSN_COST);
12346   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
12347 
12348   ins_encode %{
12349     __ subw(as_Register($dst$$reg),
12350               as_Register($src1$$reg),
12351               as_Register($src2$$reg),
12352               Assembler::LSL,
12353               $src3$$constant & 0x1f);
12354   %}
12355 
12356   ins_pipe(ialu_reg_reg_shift);
12357 %}
12358 
12359 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
12360                          iRegL src1, iRegL src2,
12361                          immI src3, rFlagsReg cr) %{
12362   match(Set dst (SubL src1 (LShiftL src2 src3)));
12363 
12364   ins_cost(1.9 * INSN_COST);
12365   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
12366 
12367   ins_encode %{
12368     __ sub(as_Register($dst$$reg),
12369               as_Register($src1$$reg),
12370               as_Register($src2$$reg),
12371               Assembler::LSL,
12372               $src3$$constant & 0x3f);
12373   %}
12374 
12375   ins_pipe(ialu_reg_reg_shift);
12376 %}
12377 
12378 
12379 
12380 // Shift Left followed by Shift Right.
12381 // This idiom is used by the compiler for the i2b bytecode etc.
12382 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12383 %{
12384   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
12385   // Make sure we are not going to exceed what sbfm can do.
12386   predicate((unsigned int)n->in(2)->get_int() <= 63
12387             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12388 
12389   ins_cost(INSN_COST * 2);
12390   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12391   ins_encode %{
12392     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12393     int s = 63 - lshift;
12394     int r = (rshift - lshift) & 63;
12395     __ sbfm(as_Register($dst$$reg),
12396             as_Register($src$$reg),
12397             r, s);
12398   %}
12399 
12400   ins_pipe(ialu_reg_shift);
12401 %}
12402 
12403 // Shift Left followed by Shift Right.
12404 // This idiom is used by the compiler for the i2b bytecode etc.
12405 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12406 %{
12407   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
12408   // Make sure we are not going to exceed what sbfmw can do.
12409   predicate((unsigned int)n->in(2)->get_int() <= 31
12410             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12411 
12412   ins_cost(INSN_COST * 2);
12413   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12414   ins_encode %{
12415     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12416     int s = 31 - lshift;
12417     int r = (rshift - lshift) & 31;
12418     __ sbfmw(as_Register($dst$$reg),
12419             as_Register($src$$reg),
12420             r, s);
12421   %}
12422 
12423   ins_pipe(ialu_reg_shift);
12424 %}
12425 
12426 // Shift Left followed by Shift Right.
12427 // This idiom is used by the compiler for the i2b bytecode etc.
12428 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12429 %{
12430   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
12431   // Make sure we are not going to exceed what ubfm can do.
12432   predicate((unsigned int)n->in(2)->get_int() <= 63
12433             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12434 
12435   ins_cost(INSN_COST * 2);
12436   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12437   ins_encode %{
12438     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12439     int s = 63 - lshift;
12440     int r = (rshift - lshift) & 63;
12441     __ ubfm(as_Register($dst$$reg),
12442             as_Register($src$$reg),
12443             r, s);
12444   %}
12445 
12446   ins_pipe(ialu_reg_shift);
12447 %}
12448 
12449 // Shift Left followed by Shift Right.
12450 // This idiom is used by the compiler for the i2b bytecode etc.
12451 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12452 %{
12453   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
12454   // Make sure we are not going to exceed what ubfmw can do.
12455   predicate((unsigned int)n->in(2)->get_int() <= 31
12456             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12457 
12458   ins_cost(INSN_COST * 2);
12459   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12460   ins_encode %{
12461     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12462     int s = 31 - lshift;
12463     int r = (rshift - lshift) & 31;
12464     __ ubfmw(as_Register($dst$$reg),
12465             as_Register($src$$reg),
12466             r, s);
12467   %}
12468 
12469   ins_pipe(ialu_reg_shift);
12470 %}
12471 // Bitfield extract with shift & mask
12472 
12473 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12474 %{
12475   match(Set dst (AndI (URShiftI src rshift) mask));
12476 
12477   ins_cost(INSN_COST);
12478   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
12479   ins_encode %{
12480     int rshift = $rshift$$constant;
12481     long mask = $mask$$constant;
12482     int width = exact_log2(mask+1);
12483     __ ubfxw(as_Register($dst$$reg),
12484             as_Register($src$$reg), rshift, width);
12485   %}
12486   ins_pipe(ialu_reg_shift);
12487 %}
12488 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
12489 %{
12490   match(Set dst (AndL (URShiftL src rshift) mask));
12491 
12492   ins_cost(INSN_COST);
12493   format %{ "ubfx $dst, $src, $rshift, $mask" %}
12494   ins_encode %{
12495     int rshift = $rshift$$constant;
12496     long mask = $mask$$constant;
12497     int width = exact_log2(mask+1);
12498     __ ubfx(as_Register($dst$$reg),
12499             as_Register($src$$reg), rshift, width);
12500   %}
12501   ins_pipe(ialu_reg_shift);
12502 %}
12503 
12504 // We can use ubfx when extending an And with a mask when we know mask
12505 // is positive.  We know that because immI_bitmask guarantees it.
12506 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12507 %{
12508   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
12509 
12510   ins_cost(INSN_COST * 2);
12511   format %{ "ubfx $dst, $src, $rshift, $mask" %}
12512   ins_encode %{
12513     int rshift = $rshift$$constant;
12514     long mask = $mask$$constant;
12515     int width = exact_log2(mask+1);
12516     __ ubfx(as_Register($dst$$reg),
12517             as_Register($src$$reg), rshift, width);
12518   %}
12519   ins_pipe(ialu_reg_shift);
12520 %}
12521 
12522 // We can use ubfiz when masking by a positive number and then left shifting the result.
12523 // We know that the mask is positive because immI_bitmask guarantees it.
12524 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12525 %{
12526   match(Set dst (LShiftI (AndI src mask) lshift));
12527   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12528     (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
12529 
12530   ins_cost(INSN_COST);
12531   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
12532   ins_encode %{
12533     int lshift = $lshift$$constant;
12534     long mask = $mask$$constant;
12535     int width = exact_log2(mask+1);
12536     __ ubfizw(as_Register($dst$$reg),
12537           as_Register($src$$reg), lshift, width);
12538   %}
12539   ins_pipe(ialu_reg_shift);
12540 %}
12541 // We can use ubfiz when masking by a positive number and then left shifting the result.
12542 // We know that the mask is positive because immL_bitmask guarantees it.
12543 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
12544 %{
12545   match(Set dst (LShiftL (AndL src mask) lshift));
12546   predicate((unsigned int)n->in(2)->get_int() <= 63 &&
12547     (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
12548 
12549   ins_cost(INSN_COST);
12550   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12551   ins_encode %{
12552     int lshift = $lshift$$constant;
12553     long mask = $mask$$constant;
12554     int width = exact_log2(mask+1);
12555     __ ubfiz(as_Register($dst$$reg),
12556           as_Register($src$$reg), lshift, width);
12557   %}
12558   ins_pipe(ialu_reg_shift);
12559 %}
12560 
12561 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
12562 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12563 %{
12564   match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
12565   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12566     (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
12567 
12568   ins_cost(INSN_COST);
12569   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12570   ins_encode %{
12571     int lshift = $lshift$$constant;
12572     long mask = $mask$$constant;
12573     int width = exact_log2(mask+1);
12574     __ ubfiz(as_Register($dst$$reg),
12575              as_Register($src$$reg), lshift, width);
12576   %}
12577   ins_pipe(ialu_reg_shift);
12578 %}
12579 
12580 // Rotations
12581 
12582 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12583 %{
12584   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12585   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12586 
12587   ins_cost(INSN_COST);
12588   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12589 
12590   ins_encode %{
12591     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12592             $rshift$$constant & 63);
12593   %}
12594   ins_pipe(ialu_reg_reg_extr);
12595 %}
12596 
12597 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12598 %{
12599   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12600   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12601 
12602   ins_cost(INSN_COST);
12603   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12604 
12605   ins_encode %{
12606     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12607             $rshift$$constant & 31);
12608   %}
12609   ins_pipe(ialu_reg_reg_extr);
12610 %}
12611 
12612 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12613 %{
12614   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12615   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12616 
12617   ins_cost(INSN_COST);
12618   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12619 
12620   ins_encode %{
12621     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12622             $rshift$$constant & 63);
12623   %}
12624   ins_pipe(ialu_reg_reg_extr);
12625 %}
12626 
12627 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12628 %{
12629   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12630   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12631 
12632   ins_cost(INSN_COST);
12633   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12634 
12635   ins_encode %{
12636     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12637             $rshift$$constant & 31);
12638   %}
12639   ins_pipe(ialu_reg_reg_extr);
12640 %}
12641 
12642 
12643 // rol expander
12644 
12645 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12646 %{
12647   effect(DEF dst, USE src, USE shift);
12648 
12649   format %{ "rol    $dst, $src, $shift" %}
12650   ins_cost(INSN_COST * 3);
12651   ins_encode %{
12652     __ subw(rscratch1, zr, as_Register($shift$$reg));
12653     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12654             rscratch1);
12655     %}
12656   ins_pipe(ialu_reg_reg_vshift);
12657 %}
12658 
12659 // rol expander
12660 
12661 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12662 %{
12663   effect(DEF dst, USE src, USE shift);
12664 
12665   format %{ "rol    $dst, $src, $shift" %}
12666   ins_cost(INSN_COST * 3);
12667   ins_encode %{
12668     __ subw(rscratch1, zr, as_Register($shift$$reg));
12669     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12670             rscratch1);
12671     %}
12672   ins_pipe(ialu_reg_reg_vshift);
12673 %}
12674 
12675 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12676 %{
12677   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
12678 
12679   expand %{
12680     rolL_rReg(dst, src, shift, cr);
12681   %}
12682 %}
12683 
12684 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12685 %{
12686   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
12687 
12688   expand %{
12689     rolL_rReg(dst, src, shift, cr);
12690   %}
12691 %}
12692 
12693 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12694 %{
12695   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
12696 
12697   expand %{
12698     rolI_rReg(dst, src, shift, cr);
12699   %}
12700 %}
12701 
12702 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12703 %{
12704   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
12705 
12706   expand %{
12707     rolI_rReg(dst, src, shift, cr);
12708   %}
12709 %}
12710 
12711 // ror expander
12712 
12713 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12714 %{
12715   effect(DEF dst, USE src, USE shift);
12716 
12717   format %{ "ror    $dst, $src, $shift" %}
12718   ins_cost(INSN_COST);
12719   ins_encode %{
12720     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12721             as_Register($shift$$reg));
12722     %}
12723   ins_pipe(ialu_reg_reg_vshift);
12724 %}
12725 
12726 // ror expander
12727 
12728 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12729 %{
12730   effect(DEF dst, USE src, USE shift);
12731 
12732   format %{ "ror    $dst, $src, $shift" %}
12733   ins_cost(INSN_COST);
12734   ins_encode %{
12735     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12736             as_Register($shift$$reg));
12737     %}
12738   ins_pipe(ialu_reg_reg_vshift);
12739 %}
12740 
12741 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12742 %{
12743   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
12744 
12745   expand %{
12746     rorL_rReg(dst, src, shift, cr);
12747   %}
12748 %}
12749 
12750 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12751 %{
12752   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
12753 
12754   expand %{
12755     rorL_rReg(dst, src, shift, cr);
12756   %}
12757 %}
12758 
12759 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12760 %{
12761   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
12762 
12763   expand %{
12764     rorI_rReg(dst, src, shift, cr);
12765   %}
12766 %}
12767 
12768 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12769 %{
12770   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
12771 
12772   expand %{
12773     rorI_rReg(dst, src, shift, cr);
12774   %}
12775 %}
12776 
12777 // Add/subtract (extended)
12778 
12779 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12780 %{
12781   match(Set dst (AddL src1 (ConvI2L src2)));
12782   ins_cost(INSN_COST);
12783   format %{ "add  $dst, $src1, $src2, sxtw" %}
12784 
12785    ins_encode %{
12786      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12787             as_Register($src2$$reg), ext::sxtw);
12788    %}
12789   ins_pipe(ialu_reg_reg);
12790 %};
12791 
12792 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12793 %{
12794   match(Set dst (SubL src1 (ConvI2L src2)));
12795   ins_cost(INSN_COST);
12796   format %{ "sub  $dst, $src1, $src2, sxtw" %}
12797 
12798    ins_encode %{
12799      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12800             as_Register($src2$$reg), ext::sxtw);
12801    %}
12802   ins_pipe(ialu_reg_reg);
12803 %};
12804 
12805 
12806 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
12807 %{
12808   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12809   ins_cost(INSN_COST);
12810   format %{ "add  $dst, $src1, $src2, sxth" %}
12811 
12812    ins_encode %{
12813      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12814             as_Register($src2$$reg), ext::sxth);
12815    %}
12816   ins_pipe(ialu_reg_reg);
12817 %}
12818 
12819 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12820 %{
12821   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12822   ins_cost(INSN_COST);
12823   format %{ "add  $dst, $src1, $src2, sxtb" %}
12824 
12825    ins_encode %{
12826      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12827             as_Register($src2$$reg), ext::sxtb);
12828    %}
12829   ins_pipe(ialu_reg_reg);
12830 %}
12831 
12832 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12833 %{
12834   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
12835   ins_cost(INSN_COST);
12836   format %{ "add  $dst, $src1, $src2, uxtb" %}
12837 
12838    ins_encode %{
12839      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12840             as_Register($src2$$reg), ext::uxtb);
12841    %}
12842   ins_pipe(ialu_reg_reg);
12843 %}
12844 
12845 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
12846 %{
12847   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12848   ins_cost(INSN_COST);
12849   format %{ "add  $dst, $src1, $src2, sxth" %}
12850 
12851    ins_encode %{
12852      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12853             as_Register($src2$$reg), ext::sxth);
12854    %}
12855   ins_pipe(ialu_reg_reg);
12856 %}
12857 
12858 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
12859 %{
12860   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12861   ins_cost(INSN_COST);
12862   format %{ "add  $dst, $src1, $src2, sxtw" %}
12863 
12864    ins_encode %{
12865      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12866             as_Register($src2$$reg), ext::sxtw);
12867    %}
12868   ins_pipe(ialu_reg_reg);
12869 %}
12870 
12871 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12872 %{
12873   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12874   ins_cost(INSN_COST);
12875   format %{ "add  $dst, $src1, $src2, sxtb" %}
12876 
12877    ins_encode %{
12878      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12879             as_Register($src2$$reg), ext::sxtb);
12880    %}
12881   ins_pipe(ialu_reg_reg);
12882 %}
12883 
12884 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12885 %{
12886   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
12887   ins_cost(INSN_COST);
12888   format %{ "add  $dst, $src1, $src2, uxtb" %}
12889 
12890    ins_encode %{
12891      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12892             as_Register($src2$$reg), ext::uxtb);
12893    %}
12894   ins_pipe(ialu_reg_reg);
12895 %}
12896 
12897 
12898 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12899 %{
12900   match(Set dst (AddI src1 (AndI src2 mask)));
12901   ins_cost(INSN_COST);
12902   format %{ "addw  $dst, $src1, $src2, uxtb" %}
12903 
12904    ins_encode %{
12905      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12906             as_Register($src2$$reg), ext::uxtb);
12907    %}
12908   ins_pipe(ialu_reg_reg);
12909 %}
12910 
12911 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12912 %{
12913   match(Set dst (AddI src1 (AndI src2 mask)));
12914   ins_cost(INSN_COST);
12915   format %{ "addw  $dst, $src1, $src2, uxth" %}
12916 
12917    ins_encode %{
12918      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12919             as_Register($src2$$reg), ext::uxth);
12920    %}
12921   ins_pipe(ialu_reg_reg);
12922 %}
12923 
12924 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12925 %{
12926   match(Set dst (AddL src1 (AndL src2 mask)));
12927   ins_cost(INSN_COST);
12928   format %{ "add  $dst, $src1, $src2, uxtb" %}
12929 
12930    ins_encode %{
12931      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12932             as_Register($src2$$reg), ext::uxtb);
12933    %}
12934   ins_pipe(ialu_reg_reg);
12935 %}
12936 
12937 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
12938 %{
12939   match(Set dst (AddL src1 (AndL src2 mask)));
12940   ins_cost(INSN_COST);
12941   format %{ "add  $dst, $src1, $src2, uxth" %}
12942 
12943    ins_encode %{
12944      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12945             as_Register($src2$$reg), ext::uxth);
12946    %}
12947   ins_pipe(ialu_reg_reg);
12948 %}
12949 
12950 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12951 %{
12952   match(Set dst (AddL src1 (AndL src2 mask)));
12953   ins_cost(INSN_COST);
12954   format %{ "add  $dst, $src1, $src2, uxtw" %}
12955 
12956    ins_encode %{
12957      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12958             as_Register($src2$$reg), ext::uxtw);
12959    %}
12960   ins_pipe(ialu_reg_reg);
12961 %}
12962 
12963 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12964 %{
12965   match(Set dst (SubI src1 (AndI src2 mask)));
12966   ins_cost(INSN_COST);
12967   format %{ "subw  $dst, $src1, $src2, uxtb" %}
12968 
12969    ins_encode %{
12970      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12971             as_Register($src2$$reg), ext::uxtb);
12972    %}
12973   ins_pipe(ialu_reg_reg);
12974 %}
12975 
12976 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12977 %{
12978   match(Set dst (SubI src1 (AndI src2 mask)));
12979   ins_cost(INSN_COST);
12980   format %{ "subw  $dst, $src1, $src2, uxth" %}
12981 
12982    ins_encode %{
12983      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12984             as_Register($src2$$reg), ext::uxth);
12985    %}
12986   ins_pipe(ialu_reg_reg);
12987 %}
12988 
12989 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12990 %{
12991   match(Set dst (SubL src1 (AndL src2 mask)));
12992   ins_cost(INSN_COST);
12993   format %{ "sub  $dst, $src1, $src2, uxtb" %}
12994 
12995    ins_encode %{
12996      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12997             as_Register($src2$$reg), ext::uxtb);
12998    %}
12999   ins_pipe(ialu_reg_reg);
13000 %}
13001 
13002 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13003 %{
13004   match(Set dst (SubL src1 (AndL src2 mask)));
13005   ins_cost(INSN_COST);
13006   format %{ "sub  $dst, $src1, $src2, uxth" %}
13007 
13008    ins_encode %{
13009      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13010             as_Register($src2$$reg), ext::uxth);
13011    %}
13012   ins_pipe(ialu_reg_reg);
13013 %}
13014 
13015 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13016 %{
13017   match(Set dst (SubL src1 (AndL src2 mask)));
13018   ins_cost(INSN_COST);
13019   format %{ "sub  $dst, $src1, $src2, uxtw" %}
13020 
13021    ins_encode %{
13022      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13023             as_Register($src2$$reg), ext::uxtw);
13024    %}
13025   ins_pipe(ialu_reg_reg);
13026 %}
13027 
13028 
13029 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13030 %{
13031   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13032   ins_cost(1.9 * INSN_COST);
13033   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
13034 
13035    ins_encode %{
13036      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13037             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13038    %}
13039   ins_pipe(ialu_reg_reg_shift);
13040 %}
13041 
13042 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13043 %{
13044   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13045   ins_cost(1.9 * INSN_COST);
13046   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
13047 
13048    ins_encode %{
13049      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13050             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13051    %}
13052   ins_pipe(ialu_reg_reg_shift);
13053 %}
13054 
13055 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13056 %{
13057   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13058   ins_cost(1.9 * INSN_COST);
13059   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
13060 
13061    ins_encode %{
13062      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13063             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13064    %}
13065   ins_pipe(ialu_reg_reg_shift);
13066 %}
13067 
13068 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13069 %{
13070   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13071   ins_cost(1.9 * INSN_COST);
13072   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
13073 
13074    ins_encode %{
13075      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13076             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13077    %}
13078   ins_pipe(ialu_reg_reg_shift);
13079 %}
13080 
13081 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13082 %{
13083   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13084   ins_cost(1.9 * INSN_COST);
13085   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
13086 
13087    ins_encode %{
13088      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13089             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13090    %}
13091   ins_pipe(ialu_reg_reg_shift);
13092 %}
13093 
13094 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13095 %{
13096   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13097   ins_cost(1.9 * INSN_COST);
13098   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
13099 
13100    ins_encode %{
13101      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13102             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13103    %}
13104   ins_pipe(ialu_reg_reg_shift);
13105 %}
13106 
13107 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13108 %{
13109   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13110   ins_cost(1.9 * INSN_COST);
13111   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
13112 
13113    ins_encode %{
13114      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13115             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13116    %}
13117   ins_pipe(ialu_reg_reg_shift);
13118 %}
13119 
13120 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13121 %{
13122   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13123   ins_cost(1.9 * INSN_COST);
13124   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
13125 
13126    ins_encode %{
13127      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13128             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13129    %}
13130   ins_pipe(ialu_reg_reg_shift);
13131 %}
13132 
13133 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13134 %{
13135   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13136   ins_cost(1.9 * INSN_COST);
13137   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
13138 
13139    ins_encode %{
13140      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13141             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13142    %}
13143   ins_pipe(ialu_reg_reg_shift);
13144 %}
13145 
13146 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13147 %{
13148   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13149   ins_cost(1.9 * INSN_COST);
13150   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
13151 
13152    ins_encode %{
13153      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13154             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13155    %}
13156   ins_pipe(ialu_reg_reg_shift);
13157 %}
13158 
13159 
13160 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13161 %{
13162   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
13163   ins_cost(1.9 * INSN_COST);
13164   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
13165 
13166    ins_encode %{
13167      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13168             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13169    %}
13170   ins_pipe(ialu_reg_reg_shift);
13171 %};
13172 
13173 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13174 %{
13175   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
13176   ins_cost(1.9 * INSN_COST);
13177   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
13178 
13179    ins_encode %{
13180      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13181             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13182    %}
13183   ins_pipe(ialu_reg_reg_shift);
13184 %};
13185 
13186 
13187 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13188 %{
13189   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13190   ins_cost(1.9 * INSN_COST);
13191   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
13192 
13193    ins_encode %{
13194      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13195             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13196    %}
13197   ins_pipe(ialu_reg_reg_shift);
13198 %}
13199 
13200 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13201 %{
13202   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13203   ins_cost(1.9 * INSN_COST);
13204   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
13205 
13206    ins_encode %{
13207      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13208             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13209    %}
13210   ins_pipe(ialu_reg_reg_shift);
13211 %}
13212 
13213 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13214 %{
13215   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13216   ins_cost(1.9 * INSN_COST);
13217   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
13218 
13219    ins_encode %{
13220      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13221             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13222    %}
13223   ins_pipe(ialu_reg_reg_shift);
13224 %}
13225 
13226 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13227 %{
13228   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13229   ins_cost(1.9 * INSN_COST);
13230   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
13231 
13232    ins_encode %{
13233      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13234             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13235    %}
13236   ins_pipe(ialu_reg_reg_shift);
13237 %}
13238 
13239 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13240 %{
13241   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13242   ins_cost(1.9 * INSN_COST);
13243   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
13244 
13245    ins_encode %{
13246      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13247             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13248    %}
13249   ins_pipe(ialu_reg_reg_shift);
13250 %}
13251 
13252 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13253 %{
13254   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13255   ins_cost(1.9 * INSN_COST);
13256   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
13257 
13258    ins_encode %{
13259      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13260             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13261    %}
13262   ins_pipe(ialu_reg_reg_shift);
13263 %}
13264 
13265 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13266 %{
13267   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13268   ins_cost(1.9 * INSN_COST);
13269   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
13270 
13271    ins_encode %{
13272      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13273             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13274    %}
13275   ins_pipe(ialu_reg_reg_shift);
13276 %}
13277 
13278 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13279 %{
13280   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13281   ins_cost(1.9 * INSN_COST);
13282   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
13283 
13284    ins_encode %{
13285      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13286             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13287    %}
13288   ins_pipe(ialu_reg_reg_shift);
13289 %}
13290 
13291 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13292 %{
13293   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13294   ins_cost(1.9 * INSN_COST);
13295   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
13296 
13297    ins_encode %{
13298      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13299             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13300    %}
13301   ins_pipe(ialu_reg_reg_shift);
13302 %}
13303 
13304 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13305 %{
13306   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13307   ins_cost(1.9 * INSN_COST);
13308   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
13309 
13310    ins_encode %{
13311      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13312             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13313    %}
13314   ins_pipe(ialu_reg_reg_shift);
13315 %}
13316 // END This section of the file is automatically generated. Do not edit --------------
13317 
13318 // ============================================================================
13319 // Floating Point Arithmetic Instructions
13320 
13321 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13322   match(Set dst (AddF src1 src2));
13323 
13324   ins_cost(INSN_COST * 5);
13325   format %{ "fadds   $dst, $src1, $src2" %}
13326 
13327   ins_encode %{
13328     __ fadds(as_FloatRegister($dst$$reg),
13329              as_FloatRegister($src1$$reg),
13330              as_FloatRegister($src2$$reg));
13331   %}
13332 
13333   ins_pipe(fp_dop_reg_reg_s);
13334 %}
13335 
13336 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13337   match(Set dst (AddD src1 src2));
13338 
13339   ins_cost(INSN_COST * 5);
13340   format %{ "faddd   $dst, $src1, $src2" %}
13341 
13342   ins_encode %{
13343     __ faddd(as_FloatRegister($dst$$reg),
13344              as_FloatRegister($src1$$reg),
13345              as_FloatRegister($src2$$reg));
13346   %}
13347 
13348   ins_pipe(fp_dop_reg_reg_d);
13349 %}
13350 
13351 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13352   match(Set dst (SubF src1 src2));
13353 
13354   ins_cost(INSN_COST * 5);
13355   format %{ "fsubs   $dst, $src1, $src2" %}
13356 
13357   ins_encode %{
13358     __ fsubs(as_FloatRegister($dst$$reg),
13359              as_FloatRegister($src1$$reg),
13360              as_FloatRegister($src2$$reg));
13361   %}
13362 
13363   ins_pipe(fp_dop_reg_reg_s);
13364 %}
13365 
13366 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13367   match(Set dst (SubD src1 src2));
13368 
13369   ins_cost(INSN_COST * 5);
13370   format %{ "fsubd   $dst, $src1, $src2" %}
13371 
13372   ins_encode %{
13373     __ fsubd(as_FloatRegister($dst$$reg),
13374              as_FloatRegister($src1$$reg),
13375              as_FloatRegister($src2$$reg));
13376   %}
13377 
13378   ins_pipe(fp_dop_reg_reg_d);
13379 %}
13380 
13381 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13382   match(Set dst (MulF src1 src2));
13383 
13384   ins_cost(INSN_COST * 6);
13385   format %{ "fmuls   $dst, $src1, $src2" %}
13386 
13387   ins_encode %{
13388     __ fmuls(as_FloatRegister($dst$$reg),
13389              as_FloatRegister($src1$$reg),
13390              as_FloatRegister($src2$$reg));
13391   %}
13392 
13393   ins_pipe(fp_dop_reg_reg_s);
13394 %}
13395 
13396 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13397   match(Set dst (MulD src1 src2));
13398 
13399   ins_cost(INSN_COST * 6);
13400   format %{ "fmuld   $dst, $src1, $src2" %}
13401 
13402   ins_encode %{
13403     __ fmuld(as_FloatRegister($dst$$reg),
13404              as_FloatRegister($src1$$reg),
13405              as_FloatRegister($src2$$reg));
13406   %}
13407 
13408   ins_pipe(fp_dop_reg_reg_d);
13409 %}
13410 
13411 // src1 * src2 + src3
13412 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13413   predicate(UseFMA);
13414   match(Set dst (FmaF src3 (Binary src1 src2)));
13415 
13416   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
13417 
13418   ins_encode %{
13419     __ fmadds(as_FloatRegister($dst$$reg),
13420              as_FloatRegister($src1$$reg),
13421              as_FloatRegister($src2$$reg),
13422              as_FloatRegister($src3$$reg));
13423   %}
13424 
13425   ins_pipe(pipe_class_default);
13426 %}
13427 
13428 // src1 * src2 + src3
13429 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13430   predicate(UseFMA);
13431   match(Set dst (FmaD src3 (Binary src1 src2)));
13432 
13433   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
13434 
13435   ins_encode %{
13436     __ fmaddd(as_FloatRegister($dst$$reg),
13437              as_FloatRegister($src1$$reg),
13438              as_FloatRegister($src2$$reg),
13439              as_FloatRegister($src3$$reg));
13440   %}
13441 
13442   ins_pipe(pipe_class_default);
13443 %}
13444 
13445 // -src1 * src2 + src3
13446 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13447   predicate(UseFMA);
13448   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
13449   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
13450 
13451   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
13452 
13453   ins_encode %{
13454     __ fmsubs(as_FloatRegister($dst$$reg),
13455               as_FloatRegister($src1$$reg),
13456               as_FloatRegister($src2$$reg),
13457               as_FloatRegister($src3$$reg));
13458   %}
13459 
13460   ins_pipe(pipe_class_default);
13461 %}
13462 
13463 // -src1 * src2 + src3
13464 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13465   predicate(UseFMA);
13466   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
13467   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
13468 
13469   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
13470 
13471   ins_encode %{
13472     __ fmsubd(as_FloatRegister($dst$$reg),
13473               as_FloatRegister($src1$$reg),
13474               as_FloatRegister($src2$$reg),
13475               as_FloatRegister($src3$$reg));
13476   %}
13477 
13478   ins_pipe(pipe_class_default);
13479 %}
13480 
13481 // -src1 * src2 - src3
13482 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13483   predicate(UseFMA);
13484   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
13485   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
13486 
13487   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
13488 
13489   ins_encode %{
13490     __ fnmadds(as_FloatRegister($dst$$reg),
13491                as_FloatRegister($src1$$reg),
13492                as_FloatRegister($src2$$reg),
13493                as_FloatRegister($src3$$reg));
13494   %}
13495 
13496   ins_pipe(pipe_class_default);
13497 %}
13498 
13499 // -src1 * src2 - src3
13500 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13501   predicate(UseFMA);
13502   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
13503   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
13504 
13505   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
13506 
13507   ins_encode %{
13508     __ fnmaddd(as_FloatRegister($dst$$reg),
13509                as_FloatRegister($src1$$reg),
13510                as_FloatRegister($src2$$reg),
13511                as_FloatRegister($src3$$reg));
13512   %}
13513 
13514   ins_pipe(pipe_class_default);
13515 %}
13516 
13517 // src1 * src2 - src3
13518 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
13519   predicate(UseFMA);
13520   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
13521 
13522   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
13523 
13524   ins_encode %{
13525     __ fnmsubs(as_FloatRegister($dst$$reg),
13526                as_FloatRegister($src1$$reg),
13527                as_FloatRegister($src2$$reg),
13528                as_FloatRegister($src3$$reg));
13529   %}
13530 
13531   ins_pipe(pipe_class_default);
13532 %}
13533 
13534 // src1 * src2 - src3
13535 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
13536   predicate(UseFMA);
13537   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
13538 
13539   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
13540 
13541   ins_encode %{
13542   // n.b. insn name should be fnmsubd
13543     __ fnmsub(as_FloatRegister($dst$$reg),
13544               as_FloatRegister($src1$$reg),
13545               as_FloatRegister($src2$$reg),
13546               as_FloatRegister($src3$$reg));
13547   %}
13548 
13549   ins_pipe(pipe_class_default);
13550 %}
13551 
13552 
13553 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13554   match(Set dst (DivF src1  src2));
13555 
13556   ins_cost(INSN_COST * 18);
13557   format %{ "fdivs   $dst, $src1, $src2" %}
13558 
13559   ins_encode %{
13560     __ fdivs(as_FloatRegister($dst$$reg),
13561              as_FloatRegister($src1$$reg),
13562              as_FloatRegister($src2$$reg));
13563   %}
13564 
13565   ins_pipe(fp_div_s);
13566 %}
13567 
13568 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13569   match(Set dst (DivD src1  src2));
13570 
13571   ins_cost(INSN_COST * 32);
13572   format %{ "fdivd   $dst, $src1, $src2" %}
13573 
13574   ins_encode %{
13575     __ fdivd(as_FloatRegister($dst$$reg),
13576              as_FloatRegister($src1$$reg),
13577              as_FloatRegister($src2$$reg));
13578   %}
13579 
13580   ins_pipe(fp_div_d);
13581 %}
13582 
13583 instruct negF_reg_reg(vRegF dst, vRegF src) %{
13584   match(Set dst (NegF src));
13585 
13586   ins_cost(INSN_COST * 3);
13587   format %{ "fneg   $dst, $src" %}
13588 
13589   ins_encode %{
13590     __ fnegs(as_FloatRegister($dst$$reg),
13591              as_FloatRegister($src$$reg));
13592   %}
13593 
13594   ins_pipe(fp_uop_s);
13595 %}
13596 
13597 instruct negD_reg_reg(vRegD dst, vRegD src) %{
13598   match(Set dst (NegD src));
13599 
13600   ins_cost(INSN_COST * 3);
13601   format %{ "fnegd   $dst, $src" %}
13602 
13603   ins_encode %{
13604     __ fnegd(as_FloatRegister($dst$$reg),
13605              as_FloatRegister($src$$reg));
13606   %}
13607 
13608   ins_pipe(fp_uop_d);
13609 %}
13610 
13611 instruct absF_reg(vRegF dst, vRegF src) %{
13612   match(Set dst (AbsF src));
13613 
13614   ins_cost(INSN_COST * 3);
13615   format %{ "fabss   $dst, $src" %}
13616   ins_encode %{
13617     __ fabss(as_FloatRegister($dst$$reg),
13618              as_FloatRegister($src$$reg));
13619   %}
13620 
13621   ins_pipe(fp_uop_s);
13622 %}
13623 
13624 instruct absD_reg(vRegD dst, vRegD src) %{
13625   match(Set dst (AbsD src));
13626 
13627   ins_cost(INSN_COST * 3);
13628   format %{ "fabsd   $dst, $src" %}
13629   ins_encode %{
13630     __ fabsd(as_FloatRegister($dst$$reg),
13631              as_FloatRegister($src$$reg));
13632   %}
13633 
13634   ins_pipe(fp_uop_d);
13635 %}
13636 
13637 instruct sqrtD_reg(vRegD dst, vRegD src) %{
13638   match(Set dst (SqrtD src));
13639 
13640   ins_cost(INSN_COST * 50);
13641   format %{ "fsqrtd  $dst, $src" %}
13642   ins_encode %{
13643     __ fsqrtd(as_FloatRegister($dst$$reg),
13644              as_FloatRegister($src$$reg));
13645   %}
13646 
13647   ins_pipe(fp_div_s);
13648 %}
13649 
13650 instruct sqrtF_reg(vRegF dst, vRegF src) %{
13651   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
13652 
13653   ins_cost(INSN_COST * 50);
13654   format %{ "fsqrts  $dst, $src" %}
13655   ins_encode %{
13656     __ fsqrts(as_FloatRegister($dst$$reg),
13657              as_FloatRegister($src$$reg));
13658   %}
13659 
13660   ins_pipe(fp_div_d);
13661 %}
13662 
13663 // ============================================================================
13664 // Logical Instructions
13665 
13666 // Integer Logical Instructions
13667 
13668 // And Instructions
13669 
13670 
13671 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
13672   match(Set dst (AndI src1 src2));
13673 
13674   format %{ "andw  $dst, $src1, $src2\t# int" %}
13675 
13676   ins_cost(INSN_COST);
13677   ins_encode %{
13678     __ andw(as_Register($dst$$reg),
13679             as_Register($src1$$reg),
13680             as_Register($src2$$reg));
13681   %}
13682 
13683   ins_pipe(ialu_reg_reg);
13684 %}
13685 
13686 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
13687   match(Set dst (AndI src1 src2));
13688 
13689   format %{ "andsw  $dst, $src1, $src2\t# int" %}
13690 
13691   ins_cost(INSN_COST);
13692   ins_encode %{
13693     __ andw(as_Register($dst$$reg),
13694             as_Register($src1$$reg),
13695             (unsigned long)($src2$$constant));
13696   %}
13697 
13698   ins_pipe(ialu_reg_imm);
13699 %}
13700 
13701 // Or Instructions
13702 
13703 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13704   match(Set dst (OrI src1 src2));
13705 
13706   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13707 
13708   ins_cost(INSN_COST);
13709   ins_encode %{
13710     __ orrw(as_Register($dst$$reg),
13711             as_Register($src1$$reg),
13712             as_Register($src2$$reg));
13713   %}
13714 
13715   ins_pipe(ialu_reg_reg);
13716 %}
13717 
13718 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13719   match(Set dst (OrI src1 src2));
13720 
13721   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13722 
13723   ins_cost(INSN_COST);
13724   ins_encode %{
13725     __ orrw(as_Register($dst$$reg),
13726             as_Register($src1$$reg),
13727             (unsigned long)($src2$$constant));
13728   %}
13729 
13730   ins_pipe(ialu_reg_imm);
13731 %}
13732 
13733 // Xor Instructions
13734 
13735 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13736   match(Set dst (XorI src1 src2));
13737 
13738   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13739 
13740   ins_cost(INSN_COST);
13741   ins_encode %{
13742     __ eorw(as_Register($dst$$reg),
13743             as_Register($src1$$reg),
13744             as_Register($src2$$reg));
13745   %}
13746 
13747   ins_pipe(ialu_reg_reg);
13748 %}
13749 
13750 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13751   match(Set dst (XorI src1 src2));
13752 
13753   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13754 
13755   ins_cost(INSN_COST);
13756   ins_encode %{
13757     __ eorw(as_Register($dst$$reg),
13758             as_Register($src1$$reg),
13759             (unsigned long)($src2$$constant));
13760   %}
13761 
13762   ins_pipe(ialu_reg_imm);
13763 %}
13764 
13765 // Long Logical Instructions
13766 // TODO
13767 
13768 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
13769   match(Set dst (AndL src1 src2));
13770 
13771   format %{ "and  $dst, $src1, $src2\t# int" %}
13772 
13773   ins_cost(INSN_COST);
13774   ins_encode %{
13775     __ andr(as_Register($dst$$reg),
13776             as_Register($src1$$reg),
13777             as_Register($src2$$reg));
13778   %}
13779 
13780   ins_pipe(ialu_reg_reg);
13781 %}
13782 
13783 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
13784   match(Set dst (AndL src1 src2));
13785 
13786   format %{ "and  $dst, $src1, $src2\t# int" %}
13787 
13788   ins_cost(INSN_COST);
13789   ins_encode %{
13790     __ andr(as_Register($dst$$reg),
13791             as_Register($src1$$reg),
13792             (unsigned long)($src2$$constant));
13793   %}
13794 
13795   ins_pipe(ialu_reg_imm);
13796 %}
13797 
13798 // Or Instructions
13799 
13800 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13801   match(Set dst (OrL src1 src2));
13802 
13803   format %{ "orr  $dst, $src1, $src2\t# int" %}
13804 
13805   ins_cost(INSN_COST);
13806   ins_encode %{
13807     __ orr(as_Register($dst$$reg),
13808            as_Register($src1$$reg),
13809            as_Register($src2$$reg));
13810   %}
13811 
13812   ins_pipe(ialu_reg_reg);
13813 %}
13814 
13815 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13816   match(Set dst (OrL src1 src2));
13817 
13818   format %{ "orr  $dst, $src1, $src2\t# int" %}
13819 
13820   ins_cost(INSN_COST);
13821   ins_encode %{
13822     __ orr(as_Register($dst$$reg),
13823            as_Register($src1$$reg),
13824            (unsigned long)($src2$$constant));
13825   %}
13826 
13827   ins_pipe(ialu_reg_imm);
13828 %}
13829 
13830 // Xor Instructions
13831 
13832 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13833   match(Set dst (XorL src1 src2));
13834 
13835   format %{ "eor  $dst, $src1, $src2\t# int" %}
13836 
13837   ins_cost(INSN_COST);
13838   ins_encode %{
13839     __ eor(as_Register($dst$$reg),
13840            as_Register($src1$$reg),
13841            as_Register($src2$$reg));
13842   %}
13843 
13844   ins_pipe(ialu_reg_reg);
13845 %}
13846 
13847 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13848   match(Set dst (XorL src1 src2));
13849 
13850   ins_cost(INSN_COST);
13851   format %{ "eor  $dst, $src1, $src2\t# int" %}
13852 
13853   ins_encode %{
13854     __ eor(as_Register($dst$$reg),
13855            as_Register($src1$$reg),
13856            (unsigned long)($src2$$constant));
13857   %}
13858 
13859   ins_pipe(ialu_reg_imm);
13860 %}
13861 
13862 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
13863 %{
13864   match(Set dst (ConvI2L src));
13865 
13866   ins_cost(INSN_COST);
13867   format %{ "sxtw  $dst, $src\t# i2l" %}
13868   ins_encode %{
13869     __ sbfm($dst$$Register, $src$$Register, 0, 31);
13870   %}
13871   ins_pipe(ialu_reg_shift);
13872 %}
13873 
13874 // this pattern occurs in bigmath arithmetic
13875 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
13876 %{
13877   match(Set dst (AndL (ConvI2L src) mask));
13878 
13879   ins_cost(INSN_COST);
13880   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
13881   ins_encode %{
13882     __ ubfm($dst$$Register, $src$$Register, 0, 31);
13883   %}
13884 
13885   ins_pipe(ialu_reg_shift);
13886 %}
13887 
13888 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
13889   match(Set dst (ConvL2I src));
13890 
13891   ins_cost(INSN_COST);
13892   format %{ "movw  $dst, $src \t// l2i" %}
13893 
13894   ins_encode %{
13895     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
13896   %}
13897 
13898   ins_pipe(ialu_reg);
13899 %}
13900 
13901 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
13902 %{
13903   match(Set dst (Conv2B src));
13904   effect(KILL cr);
13905 
13906   format %{
13907     "cmpw $src, zr\n\t"
13908     "cset $dst, ne"
13909   %}
13910 
13911   ins_encode %{
13912     __ cmpw(as_Register($src$$reg), zr);
13913     __ cset(as_Register($dst$$reg), Assembler::NE);
13914   %}
13915 
13916   ins_pipe(ialu_reg);
13917 %}
13918 
13919 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
13920 %{
13921   match(Set dst (Conv2B src));
13922   effect(KILL cr);
13923 
13924   format %{
13925     "cmp  $src, zr\n\t"
13926     "cset $dst, ne"
13927   %}
13928 
13929   ins_encode %{
13930     __ cmp(as_Register($src$$reg), zr);
13931     __ cset(as_Register($dst$$reg), Assembler::NE);
13932   %}
13933 
13934   ins_pipe(ialu_reg);
13935 %}
13936 
13937 instruct convD2F_reg(vRegF dst, vRegD src) %{
13938   match(Set dst (ConvD2F src));
13939 
13940   ins_cost(INSN_COST * 5);
13941   format %{ "fcvtd  $dst, $src \t// d2f" %}
13942 
13943   ins_encode %{
13944     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13945   %}
13946 
13947   ins_pipe(fp_d2f);
13948 %}
13949 
13950 instruct convF2D_reg(vRegD dst, vRegF src) %{
13951   match(Set dst (ConvF2D src));
13952 
13953   ins_cost(INSN_COST * 5);
13954   format %{ "fcvts  $dst, $src \t// f2d" %}
13955 
13956   ins_encode %{
13957     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13958   %}
13959 
13960   ins_pipe(fp_f2d);
13961 %}
13962 
13963 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13964   match(Set dst (ConvF2I src));
13965 
13966   ins_cost(INSN_COST * 5);
13967   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
13968 
13969   ins_encode %{
13970     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13971   %}
13972 
13973   ins_pipe(fp_f2i);
13974 %}
13975 
13976 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
13977   match(Set dst (ConvF2L src));
13978 
13979   ins_cost(INSN_COST * 5);
13980   format %{ "fcvtzs  $dst, $src \t// f2l" %}
13981 
13982   ins_encode %{
13983     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13984   %}
13985 
13986   ins_pipe(fp_f2l);
13987 %}
13988 
13989 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
13990   match(Set dst (ConvI2F src));
13991 
13992   ins_cost(INSN_COST * 5);
13993   format %{ "scvtfws  $dst, $src \t// i2f" %}
13994 
13995   ins_encode %{
13996     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13997   %}
13998 
13999   ins_pipe(fp_i2f);
14000 %}
14001 
14002 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
14003   match(Set dst (ConvL2F src));
14004 
14005   ins_cost(INSN_COST * 5);
14006   format %{ "scvtfs  $dst, $src \t// l2f" %}
14007 
14008   ins_encode %{
14009     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14010   %}
14011 
14012   ins_pipe(fp_l2f);
14013 %}
14014 
14015 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
14016   match(Set dst (ConvD2I src));
14017 
14018   ins_cost(INSN_COST * 5);
14019   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
14020 
14021   ins_encode %{
14022     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14023   %}
14024 
14025   ins_pipe(fp_d2i);
14026 %}
14027 
14028 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14029   match(Set dst (ConvD2L src));
14030 
14031   ins_cost(INSN_COST * 5);
14032   format %{ "fcvtzd  $dst, $src \t// d2l" %}
14033 
14034   ins_encode %{
14035     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14036   %}
14037 
14038   ins_pipe(fp_d2l);
14039 %}
14040 
14041 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
14042   match(Set dst (ConvI2D src));
14043 
14044   ins_cost(INSN_COST * 5);
14045   format %{ "scvtfwd  $dst, $src \t// i2d" %}
14046 
14047   ins_encode %{
14048     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14049   %}
14050 
14051   ins_pipe(fp_i2d);
14052 %}
14053 
14054 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
14055   match(Set dst (ConvL2D src));
14056 
14057   ins_cost(INSN_COST * 5);
14058   format %{ "scvtfd  $dst, $src \t// l2d" %}
14059 
14060   ins_encode %{
14061     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14062   %}
14063 
14064   ins_pipe(fp_l2d);
14065 %}
14066 
14067 // stack <-> reg and reg <-> reg shuffles with no conversion
14068 
14069 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
14070 
14071   match(Set dst (MoveF2I src));
14072 
14073   effect(DEF dst, USE src);
14074 
14075   ins_cost(4 * INSN_COST);
14076 
14077   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
14078 
14079   ins_encode %{
14080     __ ldrw($dst$$Register, Address(sp, $src$$disp));
14081   %}
14082 
14083   ins_pipe(iload_reg_reg);
14084 
14085 %}
14086 
14087 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
14088 
14089   match(Set dst (MoveI2F src));
14090 
14091   effect(DEF dst, USE src);
14092 
14093   ins_cost(4 * INSN_COST);
14094 
14095   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
14096 
14097   ins_encode %{
14098     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14099   %}
14100 
14101   ins_pipe(pipe_class_memory);
14102 
14103 %}
14104 
14105 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
14106 
14107   match(Set dst (MoveD2L src));
14108 
14109   effect(DEF dst, USE src);
14110 
14111   ins_cost(4 * INSN_COST);
14112 
14113   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
14114 
14115   ins_encode %{
14116     __ ldr($dst$$Register, Address(sp, $src$$disp));
14117   %}
14118 
14119   ins_pipe(iload_reg_reg);
14120 
14121 %}
14122 
14123 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
14124 
14125   match(Set dst (MoveL2D src));
14126 
14127   effect(DEF dst, USE src);
14128 
14129   ins_cost(4 * INSN_COST);
14130 
14131   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
14132 
14133   ins_encode %{
14134     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14135   %}
14136 
14137   ins_pipe(pipe_class_memory);
14138 
14139 %}
14140 
14141 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
14142 
14143   match(Set dst (MoveF2I src));
14144 
14145   effect(DEF dst, USE src);
14146 
14147   ins_cost(INSN_COST);
14148 
14149   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
14150 
14151   ins_encode %{
14152     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14153   %}
14154 
14155   ins_pipe(pipe_class_memory);
14156 
14157 %}
14158 
14159 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
14160 
14161   match(Set dst (MoveI2F src));
14162 
14163   effect(DEF dst, USE src);
14164 
14165   ins_cost(INSN_COST);
14166 
14167   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
14168 
14169   ins_encode %{
14170     __ strw($src$$Register, Address(sp, $dst$$disp));
14171   %}
14172 
14173   ins_pipe(istore_reg_reg);
14174 
14175 %}
14176 
14177 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
14178 
14179   match(Set dst (MoveD2L src));
14180 
14181   effect(DEF dst, USE src);
14182 
14183   ins_cost(INSN_COST);
14184 
14185   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
14186 
14187   ins_encode %{
14188     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14189   %}
14190 
14191   ins_pipe(pipe_class_memory);
14192 
14193 %}
14194 
14195 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
14196 
14197   match(Set dst (MoveL2D src));
14198 
14199   effect(DEF dst, USE src);
14200 
14201   ins_cost(INSN_COST);
14202 
14203   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
14204 
14205   ins_encode %{
14206     __ str($src$$Register, Address(sp, $dst$$disp));
14207   %}
14208 
14209   ins_pipe(istore_reg_reg);
14210 
14211 %}
14212 
14213 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14214 
14215   match(Set dst (MoveF2I src));
14216 
14217   effect(DEF dst, USE src);
14218 
14219   ins_cost(INSN_COST);
14220 
14221   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
14222 
14223   ins_encode %{
14224     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
14225   %}
14226 
14227   ins_pipe(fp_f2i);
14228 
14229 %}
14230 
14231 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
14232 
14233   match(Set dst (MoveI2F src));
14234 
14235   effect(DEF dst, USE src);
14236 
14237   ins_cost(INSN_COST);
14238 
14239   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
14240 
14241   ins_encode %{
14242     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
14243   %}
14244 
14245   ins_pipe(fp_i2f);
14246 
14247 %}
14248 
14249 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14250 
14251   match(Set dst (MoveD2L src));
14252 
14253   effect(DEF dst, USE src);
14254 
14255   ins_cost(INSN_COST);
14256 
14257   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
14258 
14259   ins_encode %{
14260     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
14261   %}
14262 
14263   ins_pipe(fp_d2l);
14264 
14265 %}
14266 
14267 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
14268 
14269   match(Set dst (MoveL2D src));
14270 
14271   effect(DEF dst, USE src);
14272 
14273   ins_cost(INSN_COST);
14274 
14275   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14276 
14277   ins_encode %{
14278     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14279   %}
14280 
14281   ins_pipe(fp_l2d);
14282 
14283 %}
14284 
14285 // ============================================================================
14286 // clearing of an array
14287 
14288 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14289 %{
14290   match(Set dummy (ClearArray cnt base));
14291   effect(USE_KILL cnt, USE_KILL base);
14292 
14293   ins_cost(4 * INSN_COST);
14294   format %{ "ClearArray $cnt, $base" %}
14295 
14296   ins_encode %{
14297     __ zero_words($base$$Register, $cnt$$Register);
14298   %}
14299 
14300   ins_pipe(pipe_class_memory);
14301 %}
14302 
14303 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14304 %{
14305   predicate((u_int64_t)n->in(2)->get_long()
14306             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
14307   match(Set dummy (ClearArray cnt base));
14308   effect(USE_KILL base);
14309 
14310   ins_cost(4 * INSN_COST);
14311   format %{ "ClearArray $cnt, $base" %}
14312 
14313   ins_encode %{
14314     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
14315   %}
14316 
14317   ins_pipe(pipe_class_memory);
14318 %}
14319 
14320 // ============================================================================
14321 // Overflow Math Instructions
14322 
14323 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14324 %{
14325   match(Set cr (OverflowAddI op1 op2));
14326 
14327   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14328   ins_cost(INSN_COST);
14329   ins_encode %{
14330     __ cmnw($op1$$Register, $op2$$Register);
14331   %}
14332 
14333   ins_pipe(icmp_reg_reg);
14334 %}
14335 
14336 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14337 %{
14338   match(Set cr (OverflowAddI op1 op2));
14339 
14340   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14341   ins_cost(INSN_COST);
14342   ins_encode %{
14343     __ cmnw($op1$$Register, $op2$$constant);
14344   %}
14345 
14346   ins_pipe(icmp_reg_imm);
14347 %}
14348 
14349 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14350 %{
14351   match(Set cr (OverflowAddL op1 op2));
14352 
14353   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14354   ins_cost(INSN_COST);
14355   ins_encode %{
14356     __ cmn($op1$$Register, $op2$$Register);
14357   %}
14358 
14359   ins_pipe(icmp_reg_reg);
14360 %}
14361 
14362 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14363 %{
14364   match(Set cr (OverflowAddL op1 op2));
14365 
14366   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14367   ins_cost(INSN_COST);
14368   ins_encode %{
14369     __ cmn($op1$$Register, $op2$$constant);
14370   %}
14371 
14372   ins_pipe(icmp_reg_imm);
14373 %}
14374 
14375 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14376 %{
14377   match(Set cr (OverflowSubI op1 op2));
14378 
14379   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14380   ins_cost(INSN_COST);
14381   ins_encode %{
14382     __ cmpw($op1$$Register, $op2$$Register);
14383   %}
14384 
14385   ins_pipe(icmp_reg_reg);
14386 %}
14387 
14388 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14389 %{
14390   match(Set cr (OverflowSubI op1 op2));
14391 
14392   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14393   ins_cost(INSN_COST);
14394   ins_encode %{
14395     __ cmpw($op1$$Register, $op2$$constant);
14396   %}
14397 
14398   ins_pipe(icmp_reg_imm);
14399 %}
14400 
14401 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14402 %{
14403   match(Set cr (OverflowSubL op1 op2));
14404 
14405   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14406   ins_cost(INSN_COST);
14407   ins_encode %{
14408     __ cmp($op1$$Register, $op2$$Register);
14409   %}
14410 
14411   ins_pipe(icmp_reg_reg);
14412 %}
14413 
14414 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14415 %{
14416   match(Set cr (OverflowSubL op1 op2));
14417 
14418   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14419   ins_cost(INSN_COST);
14420   ins_encode %{
14421     __ cmp($op1$$Register, $op2$$constant);
14422   %}
14423 
14424   ins_pipe(icmp_reg_imm);
14425 %}
14426 
14427 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
14428 %{
14429   match(Set cr (OverflowSubI zero op1));
14430 
14431   format %{ "cmpw  zr, $op1\t# overflow check int" %}
14432   ins_cost(INSN_COST);
14433   ins_encode %{
14434     __ cmpw(zr, $op1$$Register);
14435   %}
14436 
14437   ins_pipe(icmp_reg_imm);
14438 %}
14439 
14440 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
14441 %{
14442   match(Set cr (OverflowSubL zero op1));
14443 
14444   format %{ "cmp   zr, $op1\t# overflow check long" %}
14445   ins_cost(INSN_COST);
14446   ins_encode %{
14447     __ cmp(zr, $op1$$Register);
14448   %}
14449 
14450   ins_pipe(icmp_reg_imm);
14451 %}
14452 
14453 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14454 %{
14455   match(Set cr (OverflowMulI op1 op2));
14456 
14457   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14458             "cmp   rscratch1, rscratch1, sxtw\n\t"
14459             "movw  rscratch1, #0x80000000\n\t"
14460             "cselw rscratch1, rscratch1, zr, NE\n\t"
14461             "cmpw  rscratch1, #1" %}
14462   ins_cost(5 * INSN_COST);
14463   ins_encode %{
14464     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14465     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14466     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14467     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14468     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14469   %}
14470 
14471   ins_pipe(pipe_slow);
14472 %}
14473 
14474 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
14475 %{
14476   match(If cmp (OverflowMulI op1 op2));
14477   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14478             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14479   effect(USE labl, KILL cr);
14480 
14481   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14482             "cmp   rscratch1, rscratch1, sxtw\n\t"
14483             "b$cmp   $labl" %}
14484   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
14485   ins_encode %{
14486     Label* L = $labl$$label;
14487     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14488     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14489     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14490     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14491   %}
14492 
14493   ins_pipe(pipe_serial);
14494 %}
14495 
14496 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14497 %{
14498   match(Set cr (OverflowMulL op1 op2));
14499 
14500   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14501             "smulh rscratch2, $op1, $op2\n\t"
14502             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14503             "movw  rscratch1, #0x80000000\n\t"
14504             "cselw rscratch1, rscratch1, zr, NE\n\t"
14505             "cmpw  rscratch1, #1" %}
14506   ins_cost(6 * INSN_COST);
14507   ins_encode %{
14508     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14509     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14510     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14511     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14512     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14513     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14514   %}
14515 
14516   ins_pipe(pipe_slow);
14517 %}
14518 
14519 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
14520 %{
14521   match(If cmp (OverflowMulL op1 op2));
14522   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14523             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14524   effect(USE labl, KILL cr);
14525 
14526   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14527             "smulh rscratch2, $op1, $op2\n\t"
14528             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14529             "b$cmp $labl" %}
14530   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
14531   ins_encode %{
14532     Label* L = $labl$$label;
14533     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14534     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14535     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14536     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14537     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14538   %}
14539 
14540   ins_pipe(pipe_serial);
14541 %}
14542 
14543 // ============================================================================
14544 // Compare Instructions
14545 
14546 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
14547 %{
14548   match(Set cr (CmpI op1 op2));
14549 
14550   effect(DEF cr, USE op1, USE op2);
14551 
14552   ins_cost(INSN_COST);
14553   format %{ "cmpw  $op1, $op2" %}
14554 
14555   ins_encode(aarch64_enc_cmpw(op1, op2));
14556 
14557   ins_pipe(icmp_reg_reg);
14558 %}
14559 
14560 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
14561 %{
14562   match(Set cr (CmpI op1 zero));
14563 
14564   effect(DEF cr, USE op1);
14565 
14566   ins_cost(INSN_COST);
14567   format %{ "cmpw $op1, 0" %}
14568 
14569   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14570 
14571   ins_pipe(icmp_reg_imm);
14572 %}
14573 
14574 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
14575 %{
14576   match(Set cr (CmpI op1 op2));
14577 
14578   effect(DEF cr, USE op1);
14579 
14580   ins_cost(INSN_COST);
14581   format %{ "cmpw  $op1, $op2" %}
14582 
14583   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14584 
14585   ins_pipe(icmp_reg_imm);
14586 %}
14587 
14588 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
14589 %{
14590   match(Set cr (CmpI op1 op2));
14591 
14592   effect(DEF cr, USE op1);
14593 
14594   ins_cost(INSN_COST * 2);
14595   format %{ "cmpw  $op1, $op2" %}
14596 
14597   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14598 
14599   ins_pipe(icmp_reg_imm);
14600 %}
14601 
14602 // Unsigned compare Instructions; really, same as signed compare
14603 // except it should only be used to feed an If or a CMovI which takes a
14604 // cmpOpU.
14605 
14606 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
14607 %{
14608   match(Set cr (CmpU op1 op2));
14609 
14610   effect(DEF cr, USE op1, USE op2);
14611 
14612   ins_cost(INSN_COST);
14613   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14614 
14615   ins_encode(aarch64_enc_cmpw(op1, op2));
14616 
14617   ins_pipe(icmp_reg_reg);
14618 %}
14619 
14620 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
14621 %{
14622   match(Set cr (CmpU op1 zero));
14623 
14624   effect(DEF cr, USE op1);
14625 
14626   ins_cost(INSN_COST);
14627   format %{ "cmpw $op1, #0\t# unsigned" %}
14628 
14629   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14630 
14631   ins_pipe(icmp_reg_imm);
14632 %}
14633 
14634 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
14635 %{
14636   match(Set cr (CmpU op1 op2));
14637 
14638   effect(DEF cr, USE op1);
14639 
14640   ins_cost(INSN_COST);
14641   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14642 
14643   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14644 
14645   ins_pipe(icmp_reg_imm);
14646 %}
14647 
14648 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
14649 %{
14650   match(Set cr (CmpU op1 op2));
14651 
14652   effect(DEF cr, USE op1);
14653 
14654   ins_cost(INSN_COST * 2);
14655   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14656 
14657   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14658 
14659   ins_pipe(icmp_reg_imm);
14660 %}
14661 
14662 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14663 %{
14664   match(Set cr (CmpL op1 op2));
14665 
14666   effect(DEF cr, USE op1, USE op2);
14667 
14668   ins_cost(INSN_COST);
14669   format %{ "cmp  $op1, $op2" %}
14670 
14671   ins_encode(aarch64_enc_cmp(op1, op2));
14672 
14673   ins_pipe(icmp_reg_reg);
14674 %}
14675 
14676 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
14677 %{
14678   match(Set cr (CmpL op1 zero));
14679 
14680   effect(DEF cr, USE op1);
14681 
14682   ins_cost(INSN_COST);
14683   format %{ "tst  $op1" %}
14684 
14685   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14686 
14687   ins_pipe(icmp_reg_imm);
14688 %}
14689 
14690 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
14691 %{
14692   match(Set cr (CmpL op1 op2));
14693 
14694   effect(DEF cr, USE op1);
14695 
14696   ins_cost(INSN_COST);
14697   format %{ "cmp  $op1, $op2" %}
14698 
14699   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14700 
14701   ins_pipe(icmp_reg_imm);
14702 %}
14703 
14704 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
14705 %{
14706   match(Set cr (CmpL op1 op2));
14707 
14708   effect(DEF cr, USE op1);
14709 
14710   ins_cost(INSN_COST * 2);
14711   format %{ "cmp  $op1, $op2" %}
14712 
14713   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14714 
14715   ins_pipe(icmp_reg_imm);
14716 %}
14717 
14718 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
14719 %{
14720   match(Set cr (CmpUL op1 op2));
14721 
14722   effect(DEF cr, USE op1, USE op2);
14723 
14724   ins_cost(INSN_COST);
14725   format %{ "cmp  $op1, $op2" %}
14726 
14727   ins_encode(aarch64_enc_cmp(op1, op2));
14728 
14729   ins_pipe(icmp_reg_reg);
14730 %}
14731 
14732 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
14733 %{
14734   match(Set cr (CmpUL op1 zero));
14735 
14736   effect(DEF cr, USE op1);
14737 
14738   ins_cost(INSN_COST);
14739   format %{ "tst  $op1" %}
14740 
14741   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14742 
14743   ins_pipe(icmp_reg_imm);
14744 %}
14745 
14746 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
14747 %{
14748   match(Set cr (CmpUL op1 op2));
14749 
14750   effect(DEF cr, USE op1);
14751 
14752   ins_cost(INSN_COST);
14753   format %{ "cmp  $op1, $op2" %}
14754 
14755   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14756 
14757   ins_pipe(icmp_reg_imm);
14758 %}
14759 
14760 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
14761 %{
14762   match(Set cr (CmpUL op1 op2));
14763 
14764   effect(DEF cr, USE op1);
14765 
14766   ins_cost(INSN_COST * 2);
14767   format %{ "cmp  $op1, $op2" %}
14768 
14769   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14770 
14771   ins_pipe(icmp_reg_imm);
14772 %}
14773 
14774 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
14775 %{
14776   match(Set cr (CmpP op1 op2));
14777 
14778   effect(DEF cr, USE op1, USE op2);
14779 
14780   ins_cost(INSN_COST);
14781   format %{ "cmp  $op1, $op2\t // ptr" %}
14782 
14783   ins_encode(aarch64_enc_cmpp(op1, op2));
14784 
14785   ins_pipe(icmp_reg_reg);
14786 %}
14787 
14788 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
14789 %{
14790   match(Set cr (CmpN op1 op2));
14791 
14792   effect(DEF cr, USE op1, USE op2);
14793 
14794   ins_cost(INSN_COST);
14795   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
14796 
14797   ins_encode(aarch64_enc_cmpn(op1, op2));
14798 
14799   ins_pipe(icmp_reg_reg);
14800 %}
14801 
14802 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
14803 %{
14804   match(Set cr (CmpP op1 zero));
14805 
14806   effect(DEF cr, USE op1, USE zero);
14807 
14808   ins_cost(INSN_COST);
14809   format %{ "cmp  $op1, 0\t // ptr" %}
14810 
14811   ins_encode(aarch64_enc_testp(op1));
14812 
14813   ins_pipe(icmp_reg_imm);
14814 %}
14815 
14816 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
14817 %{
14818   match(Set cr (CmpN op1 zero));
14819 
14820   effect(DEF cr, USE op1, USE zero);
14821 
14822   ins_cost(INSN_COST);
14823   format %{ "cmp  $op1, 0\t // compressed ptr" %}
14824 
14825   ins_encode(aarch64_enc_testn(op1));
14826 
14827   ins_pipe(icmp_reg_imm);
14828 %}
14829 
14830 // FP comparisons
14831 //
14832 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
14833 // using normal cmpOp. See declaration of rFlagsReg for details.
14834 
14835 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
14836 %{
14837   match(Set cr (CmpF src1 src2));
14838 
14839   ins_cost(3 * INSN_COST);
14840   format %{ "fcmps $src1, $src2" %}
14841 
14842   ins_encode %{
14843     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14844   %}
14845 
14846   ins_pipe(pipe_class_compare);
14847 %}
14848 
14849 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
14850 %{
14851   match(Set cr (CmpF src1 src2));
14852 
14853   ins_cost(3 * INSN_COST);
14854   format %{ "fcmps $src1, 0.0" %}
14855 
14856   ins_encode %{
14857     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
14858   %}
14859 
14860   ins_pipe(pipe_class_compare);
14861 %}
14862 // FROM HERE
14863 
14864 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
14865 %{
14866   match(Set cr (CmpD src1 src2));
14867 
14868   ins_cost(3 * INSN_COST);
14869   format %{ "fcmpd $src1, $src2" %}
14870 
14871   ins_encode %{
14872     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14873   %}
14874 
14875   ins_pipe(pipe_class_compare);
14876 %}
14877 
14878 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
14879 %{
14880   match(Set cr (CmpD src1 src2));
14881 
14882   ins_cost(3 * INSN_COST);
14883   format %{ "fcmpd $src1, 0.0" %}
14884 
14885   ins_encode %{
14886     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
14887   %}
14888 
14889   ins_pipe(pipe_class_compare);
14890 %}
14891 
14892 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
14893 %{
14894   match(Set dst (CmpF3 src1 src2));
14895   effect(KILL cr);
14896 
14897   ins_cost(5 * INSN_COST);
14898   format %{ "fcmps $src1, $src2\n\t"
14899             "csinvw($dst, zr, zr, eq\n\t"
14900             "csnegw($dst, $dst, $dst, lt)"
14901   %}
14902 
14903   ins_encode %{
14904     Label done;
14905     FloatRegister s1 = as_FloatRegister($src1$$reg);
14906     FloatRegister s2 = as_FloatRegister($src2$$reg);
14907     Register d = as_Register($dst$$reg);
14908     __ fcmps(s1, s2);
14909     // installs 0 if EQ else -1
14910     __ csinvw(d, zr, zr, Assembler::EQ);
14911     // keeps -1 if less or unordered else installs 1
14912     __ csnegw(d, d, d, Assembler::LT);
14913     __ bind(done);
14914   %}
14915 
14916   ins_pipe(pipe_class_default);
14917 
14918 %}
14919 
14920 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
14921 %{
14922   match(Set dst (CmpD3 src1 src2));
14923   effect(KILL cr);
14924 
14925   ins_cost(5 * INSN_COST);
14926   format %{ "fcmpd $src1, $src2\n\t"
14927             "csinvw($dst, zr, zr, eq\n\t"
14928             "csnegw($dst, $dst, $dst, lt)"
14929   %}
14930 
14931   ins_encode %{
14932     Label done;
14933     FloatRegister s1 = as_FloatRegister($src1$$reg);
14934     FloatRegister s2 = as_FloatRegister($src2$$reg);
14935     Register d = as_Register($dst$$reg);
14936     __ fcmpd(s1, s2);
14937     // installs 0 if EQ else -1
14938     __ csinvw(d, zr, zr, Assembler::EQ);
14939     // keeps -1 if less or unordered else installs 1
14940     __ csnegw(d, d, d, Assembler::LT);
14941     __ bind(done);
14942   %}
14943   ins_pipe(pipe_class_default);
14944 
14945 %}
14946 
14947 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
14948 %{
14949   match(Set dst (CmpF3 src1 zero));
14950   effect(KILL cr);
14951 
14952   ins_cost(5 * INSN_COST);
14953   format %{ "fcmps $src1, 0.0\n\t"
14954             "csinvw($dst, zr, zr, eq\n\t"
14955             "csnegw($dst, $dst, $dst, lt)"
14956   %}
14957 
14958   ins_encode %{
14959     Label done;
14960     FloatRegister s1 = as_FloatRegister($src1$$reg);
14961     Register d = as_Register($dst$$reg);
14962     __ fcmps(s1, 0.0D);
14963     // installs 0 if EQ else -1
14964     __ csinvw(d, zr, zr, Assembler::EQ);
14965     // keeps -1 if less or unordered else installs 1
14966     __ csnegw(d, d, d, Assembler::LT);
14967     __ bind(done);
14968   %}
14969 
14970   ins_pipe(pipe_class_default);
14971 
14972 %}
14973 
14974 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
14975 %{
14976   match(Set dst (CmpD3 src1 zero));
14977   effect(KILL cr);
14978 
14979   ins_cost(5 * INSN_COST);
14980   format %{ "fcmpd $src1, 0.0\n\t"
14981             "csinvw($dst, zr, zr, eq\n\t"
14982             "csnegw($dst, $dst, $dst, lt)"
14983   %}
14984 
14985   ins_encode %{
14986     Label done;
14987     FloatRegister s1 = as_FloatRegister($src1$$reg);
14988     Register d = as_Register($dst$$reg);
14989     __ fcmpd(s1, 0.0D);
14990     // installs 0 if EQ else -1
14991     __ csinvw(d, zr, zr, Assembler::EQ);
14992     // keeps -1 if less or unordered else installs 1
14993     __ csnegw(d, d, d, Assembler::LT);
14994     __ bind(done);
14995   %}
14996   ins_pipe(pipe_class_default);
14997 
14998 %}
14999 
15000 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
15001 %{
15002   match(Set dst (CmpLTMask p q));
15003   effect(KILL cr);
15004 
15005   ins_cost(3 * INSN_COST);
15006 
15007   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
15008             "csetw $dst, lt\n\t"
15009             "subw $dst, zr, $dst"
15010   %}
15011 
15012   ins_encode %{
15013     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
15014     __ csetw(as_Register($dst$$reg), Assembler::LT);
15015     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
15016   %}
15017 
15018   ins_pipe(ialu_reg_reg);
15019 %}
15020 
15021 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
15022 %{
15023   match(Set dst (CmpLTMask src zero));
15024   effect(KILL cr);
15025 
15026   ins_cost(INSN_COST);
15027 
15028   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
15029 
15030   ins_encode %{
15031     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
15032   %}
15033 
15034   ins_pipe(ialu_reg_shift);
15035 %}
15036 
15037 // ============================================================================
15038 // Max and Min
15039 
15040 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15041 %{
15042   match(Set dst (MinI src1 src2));
15043 
15044   effect(DEF dst, USE src1, USE src2, KILL cr);
15045   size(8);
15046 
15047   ins_cost(INSN_COST * 3);
15048   format %{
15049     "cmpw $src1 $src2\t signed int\n\t"
15050     "cselw $dst, $src1, $src2 lt\t"
15051   %}
15052 
15053   ins_encode %{
15054     __ cmpw(as_Register($src1$$reg),
15055             as_Register($src2$$reg));
15056     __ cselw(as_Register($dst$$reg),
15057              as_Register($src1$$reg),
15058              as_Register($src2$$reg),
15059              Assembler::LT);
15060   %}
15061 
15062   ins_pipe(ialu_reg_reg);
15063 %}
15064 // FROM HERE
15065 
15066 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15067 %{
15068   match(Set dst (MaxI src1 src2));
15069 
15070   effect(DEF dst, USE src1, USE src2, KILL cr);
15071   size(8);
15072 
15073   ins_cost(INSN_COST * 3);
15074   format %{
15075     "cmpw $src1 $src2\t signed int\n\t"
15076     "cselw $dst, $src1, $src2 gt\t"
15077   %}
15078 
15079   ins_encode %{
15080     __ cmpw(as_Register($src1$$reg),
15081             as_Register($src2$$reg));
15082     __ cselw(as_Register($dst$$reg),
15083              as_Register($src1$$reg),
15084              as_Register($src2$$reg),
15085              Assembler::GT);
15086   %}
15087 
15088   ins_pipe(ialu_reg_reg);
15089 %}
15090 
15091 // ============================================================================
15092 // Branch Instructions
15093 
15094 // Direct Branch.
15095 instruct branch(label lbl)
15096 %{
15097   match(Goto);
15098 
15099   effect(USE lbl);
15100 
15101   ins_cost(BRANCH_COST);
15102   format %{ "b  $lbl" %}
15103 
15104   ins_encode(aarch64_enc_b(lbl));
15105 
15106   ins_pipe(pipe_branch);
15107 %}
15108 
15109 // Conditional Near Branch
15110 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
15111 %{
15112   // Same match rule as `branchConFar'.
15113   match(If cmp cr);
15114 
15115   effect(USE lbl);
15116 
15117   ins_cost(BRANCH_COST);
15118   // If set to 1 this indicates that the current instruction is a
15119   // short variant of a long branch. This avoids using this
15120   // instruction in first-pass matching. It will then only be used in
15121   // the `Shorten_branches' pass.
15122   // ins_short_branch(1);
15123   format %{ "b$cmp  $lbl" %}
15124 
15125   ins_encode(aarch64_enc_br_con(cmp, lbl));
15126 
15127   ins_pipe(pipe_branch_cond);
15128 %}
15129 
15130 // Conditional Near Branch Unsigned
15131 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15132 %{
15133   // Same match rule as `branchConFar'.
15134   match(If cmp cr);
15135 
15136   effect(USE lbl);
15137 
15138   ins_cost(BRANCH_COST);
15139   // If set to 1 this indicates that the current instruction is a
15140   // short variant of a long branch. This avoids using this
15141   // instruction in first-pass matching. It will then only be used in
15142   // the `Shorten_branches' pass.
15143   // ins_short_branch(1);
15144   format %{ "b$cmp  $lbl\t# unsigned" %}
15145 
15146   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15147 
15148   ins_pipe(pipe_branch_cond);
15149 %}
15150 
15151 // Make use of CBZ and CBNZ.  These instructions, as well as being
15152 // shorter than (cmp; branch), have the additional benefit of not
15153 // killing the flags.
15154 
15155 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
15156   match(If cmp (CmpI op1 op2));
15157   effect(USE labl);
15158 
15159   ins_cost(BRANCH_COST);
15160   format %{ "cbw$cmp   $op1, $labl" %}
15161   ins_encode %{
15162     Label* L = $labl$$label;
15163     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15164     if (cond == Assembler::EQ)
15165       __ cbzw($op1$$Register, *L);
15166     else
15167       __ cbnzw($op1$$Register, *L);
15168   %}
15169   ins_pipe(pipe_cmp_branch);
15170 %}
15171 
15172 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
15173   match(If cmp (CmpL op1 op2));
15174   effect(USE labl);
15175 
15176   ins_cost(BRANCH_COST);
15177   format %{ "cb$cmp   $op1, $labl" %}
15178   ins_encode %{
15179     Label* L = $labl$$label;
15180     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15181     if (cond == Assembler::EQ)
15182       __ cbz($op1$$Register, *L);
15183     else
15184       __ cbnz($op1$$Register, *L);
15185   %}
15186   ins_pipe(pipe_cmp_branch);
15187 %}
15188 
15189 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
15190   match(If cmp (CmpP op1 op2));
15191   effect(USE labl);
15192 
15193   ins_cost(BRANCH_COST);
15194   format %{ "cb$cmp   $op1, $labl" %}
15195   ins_encode %{
15196     Label* L = $labl$$label;
15197     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15198     if (cond == Assembler::EQ)
15199       __ cbz($op1$$Register, *L);
15200     else
15201       __ cbnz($op1$$Register, *L);
15202   %}
15203   ins_pipe(pipe_cmp_branch);
15204 %}
15205 
15206 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
15207   match(If cmp (CmpN op1 op2));
15208   effect(USE labl);
15209 
15210   ins_cost(BRANCH_COST);
15211   format %{ "cbw$cmp   $op1, $labl" %}
15212   ins_encode %{
15213     Label* L = $labl$$label;
15214     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15215     if (cond == Assembler::EQ)
15216       __ cbzw($op1$$Register, *L);
15217     else
15218       __ cbnzw($op1$$Register, *L);
15219   %}
15220   ins_pipe(pipe_cmp_branch);
15221 %}
15222 
15223 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
15224   match(If cmp (CmpP (DecodeN oop) zero));
15225   effect(USE labl);
15226 
15227   ins_cost(BRANCH_COST);
15228   format %{ "cb$cmp   $oop, $labl" %}
15229   ins_encode %{
15230     Label* L = $labl$$label;
15231     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15232     if (cond == Assembler::EQ)
15233       __ cbzw($oop$$Register, *L);
15234     else
15235       __ cbnzw($oop$$Register, *L);
15236   %}
15237   ins_pipe(pipe_cmp_branch);
15238 %}
15239 
15240 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
15241   match(If cmp (CmpU op1 op2));
15242   effect(USE labl);
15243 
15244   ins_cost(BRANCH_COST);
15245   format %{ "cbw$cmp   $op1, $labl" %}
15246   ins_encode %{
15247     Label* L = $labl$$label;
15248     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15249     if (cond == Assembler::EQ || cond == Assembler::LS)
15250       __ cbzw($op1$$Register, *L);
15251     else
15252       __ cbnzw($op1$$Register, *L);
15253   %}
15254   ins_pipe(pipe_cmp_branch);
15255 %}
15256 
15257 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
15258   match(If cmp (CmpUL op1 op2));
15259   effect(USE labl);
15260 
15261   ins_cost(BRANCH_COST);
15262   format %{ "cb$cmp   $op1, $labl" %}
15263   ins_encode %{
15264     Label* L = $labl$$label;
15265     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15266     if (cond == Assembler::EQ || cond == Assembler::LS)
15267       __ cbz($op1$$Register, *L);
15268     else
15269       __ cbnz($op1$$Register, *L);
15270   %}
15271   ins_pipe(pipe_cmp_branch);
15272 %}
15273 
15274 // Test bit and Branch
15275 
15276 // Patterns for short (< 32KiB) variants
15277 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15278   match(If cmp (CmpL op1 op2));
15279   effect(USE labl);
15280 
15281   ins_cost(BRANCH_COST);
15282   format %{ "cb$cmp   $op1, $labl # long" %}
15283   ins_encode %{
15284     Label* L = $labl$$label;
15285     Assembler::Condition cond =
15286       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15287     __ tbr(cond, $op1$$Register, 63, *L);
15288   %}
15289   ins_pipe(pipe_cmp_branch);
15290   ins_short_branch(1);
15291 %}
15292 
15293 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15294   match(If cmp (CmpI op1 op2));
15295   effect(USE labl);
15296 
15297   ins_cost(BRANCH_COST);
15298   format %{ "cb$cmp   $op1, $labl # int" %}
15299   ins_encode %{
15300     Label* L = $labl$$label;
15301     Assembler::Condition cond =
15302       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15303     __ tbr(cond, $op1$$Register, 31, *L);
15304   %}
15305   ins_pipe(pipe_cmp_branch);
15306   ins_short_branch(1);
15307 %}
15308 
15309 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15310   match(If cmp (CmpL (AndL op1 op2) op3));
15311   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15312   effect(USE labl);
15313 
15314   ins_cost(BRANCH_COST);
15315   format %{ "tb$cmp   $op1, $op2, $labl" %}
15316   ins_encode %{
15317     Label* L = $labl$$label;
15318     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15319     int bit = exact_log2($op2$$constant);
15320     __ tbr(cond, $op1$$Register, bit, *L);
15321   %}
15322   ins_pipe(pipe_cmp_branch);
15323   ins_short_branch(1);
15324 %}
15325 
15326 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15327   match(If cmp (CmpI (AndI op1 op2) op3));
15328   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15329   effect(USE labl);
15330 
15331   ins_cost(BRANCH_COST);
15332   format %{ "tb$cmp   $op1, $op2, $labl" %}
15333   ins_encode %{
15334     Label* L = $labl$$label;
15335     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15336     int bit = exact_log2($op2$$constant);
15337     __ tbr(cond, $op1$$Register, bit, *L);
15338   %}
15339   ins_pipe(pipe_cmp_branch);
15340   ins_short_branch(1);
15341 %}
15342 
15343 // And far variants
15344 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15345   match(If cmp (CmpL op1 op2));
15346   effect(USE labl);
15347 
15348   ins_cost(BRANCH_COST);
15349   format %{ "cb$cmp   $op1, $labl # long" %}
15350   ins_encode %{
15351     Label* L = $labl$$label;
15352     Assembler::Condition cond =
15353       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15354     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
15355   %}
15356   ins_pipe(pipe_cmp_branch);
15357 %}
15358 
15359 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15360   match(If cmp (CmpI op1 op2));
15361   effect(USE labl);
15362 
15363   ins_cost(BRANCH_COST);
15364   format %{ "cb$cmp   $op1, $labl # int" %}
15365   ins_encode %{
15366     Label* L = $labl$$label;
15367     Assembler::Condition cond =
15368       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15369     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
15370   %}
15371   ins_pipe(pipe_cmp_branch);
15372 %}
15373 
15374 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15375   match(If cmp (CmpL (AndL op1 op2) op3));
15376   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15377   effect(USE labl);
15378 
15379   ins_cost(BRANCH_COST);
15380   format %{ "tb$cmp   $op1, $op2, $labl" %}
15381   ins_encode %{
15382     Label* L = $labl$$label;
15383     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15384     int bit = exact_log2($op2$$constant);
15385     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15386   %}
15387   ins_pipe(pipe_cmp_branch);
15388 %}
15389 
15390 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15391   match(If cmp (CmpI (AndI op1 op2) op3));
15392   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15393   effect(USE labl);
15394 
15395   ins_cost(BRANCH_COST);
15396   format %{ "tb$cmp   $op1, $op2, $labl" %}
15397   ins_encode %{
15398     Label* L = $labl$$label;
15399     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15400     int bit = exact_log2($op2$$constant);
15401     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15402   %}
15403   ins_pipe(pipe_cmp_branch);
15404 %}
15405 
15406 // Test bits
15407 
15408 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
15409   match(Set cr (CmpL (AndL op1 op2) op3));
15410   predicate(Assembler::operand_valid_for_logical_immediate
15411             (/*is_32*/false, n->in(1)->in(2)->get_long()));
15412 
15413   ins_cost(INSN_COST);
15414   format %{ "tst $op1, $op2 # long" %}
15415   ins_encode %{
15416     __ tst($op1$$Register, $op2$$constant);
15417   %}
15418   ins_pipe(ialu_reg_reg);
15419 %}
15420 
15421 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
15422   match(Set cr (CmpI (AndI op1 op2) op3));
15423   predicate(Assembler::operand_valid_for_logical_immediate
15424             (/*is_32*/true, n->in(1)->in(2)->get_int()));
15425 
15426   ins_cost(INSN_COST);
15427   format %{ "tst $op1, $op2 # int" %}
15428   ins_encode %{
15429     __ tstw($op1$$Register, $op2$$constant);
15430   %}
15431   ins_pipe(ialu_reg_reg);
15432 %}
15433 
15434 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
15435   match(Set cr (CmpL (AndL op1 op2) op3));
15436 
15437   ins_cost(INSN_COST);
15438   format %{ "tst $op1, $op2 # long" %}
15439   ins_encode %{
15440     __ tst($op1$$Register, $op2$$Register);
15441   %}
15442   ins_pipe(ialu_reg_reg);
15443 %}
15444 
15445 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
15446   match(Set cr (CmpI (AndI op1 op2) op3));
15447 
15448   ins_cost(INSN_COST);
15449   format %{ "tstw $op1, $op2 # int" %}
15450   ins_encode %{
15451     __ tstw($op1$$Register, $op2$$Register);
15452   %}
15453   ins_pipe(ialu_reg_reg);
15454 %}
15455 
15456 
15457 // Conditional Far Branch
15458 // Conditional Far Branch Unsigned
15459 // TODO: fixme
15460 
15461 // counted loop end branch near
15462 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
15463 %{
15464   match(CountedLoopEnd cmp cr);
15465 
15466   effect(USE lbl);
15467 
15468   ins_cost(BRANCH_COST);
15469   // short variant.
15470   // ins_short_branch(1);
15471   format %{ "b$cmp $lbl \t// counted loop end" %}
15472 
15473   ins_encode(aarch64_enc_br_con(cmp, lbl));
15474 
15475   ins_pipe(pipe_branch);
15476 %}
15477 
15478 // counted loop end branch near Unsigned
15479 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15480 %{
15481   match(CountedLoopEnd cmp cr);
15482 
15483   effect(USE lbl);
15484 
15485   ins_cost(BRANCH_COST);
15486   // short variant.
15487   // ins_short_branch(1);
15488   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
15489 
15490   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15491 
15492   ins_pipe(pipe_branch);
15493 %}
15494 
15495 // counted loop end branch far
15496 // counted loop end branch far unsigned
15497 // TODO: fixme
15498 
15499 // ============================================================================
15500 // inlined locking and unlocking
15501 
15502 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15503 %{
15504   match(Set cr (FastLock object box));
15505   effect(TEMP tmp, TEMP tmp2);
15506 
15507   // TODO
15508   // identify correct cost
15509   ins_cost(5 * INSN_COST);
15510   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
15511 
15512   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
15513 
15514   ins_pipe(pipe_serial);
15515 %}
15516 
15517 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15518 %{
15519   match(Set cr (FastUnlock object box));
15520   effect(TEMP tmp, TEMP tmp2);
15521 
15522   ins_cost(5 * INSN_COST);
15523   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
15524 
15525   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
15526 
15527   ins_pipe(pipe_serial);
15528 %}
15529 
15530 
15531 // ============================================================================
15532 // Safepoint Instructions
15533 
15534 // TODO
15535 // provide a near and far version of this code
15536 
15537 instruct safePoint(iRegP poll)
15538 %{
15539   match(SafePoint poll);
15540 
15541   format %{
15542     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
15543   %}
15544   ins_encode %{
15545     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
15546   %}
15547   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
15548 %}
15549 
15550 
15551 // ============================================================================
15552 // Procedure Call/Return Instructions
15553 
15554 // Call Java Static Instruction
15555 
15556 instruct CallStaticJavaDirect(method meth)
15557 %{
15558   match(CallStaticJava);
15559 
15560   effect(USE meth);
15561 
15562   ins_cost(CALL_COST);
15563 
15564   format %{ "call,static $meth \t// ==> " %}
15565 
15566   ins_encode( aarch64_enc_java_static_call(meth),
15567               aarch64_enc_call_epilog );
15568 
15569   ins_pipe(pipe_class_call);
15570 %}
15571 
15572 // TO HERE
15573 
15574 // Call Java Dynamic Instruction
15575 instruct CallDynamicJavaDirect(method meth)
15576 %{
15577   match(CallDynamicJava);
15578 
15579   effect(USE meth);
15580 
15581   ins_cost(CALL_COST);
15582 
15583   format %{ "CALL,dynamic $meth \t// ==> " %}
15584 
15585   ins_encode( aarch64_enc_java_dynamic_call(meth),
15586                aarch64_enc_call_epilog );
15587 
15588   ins_pipe(pipe_class_call);
15589 %}
15590 
15591 // Call Runtime Instruction
15592 
15593 instruct CallRuntimeDirect(method meth)
15594 %{
15595   match(CallRuntime);
15596 
15597   effect(USE meth);
15598 
15599   ins_cost(CALL_COST);
15600 
15601   format %{ "CALL, runtime $meth" %}
15602 
15603   ins_encode( aarch64_enc_java_to_runtime(meth) );
15604 
15605   ins_pipe(pipe_class_call);
15606 %}
15607 
15608 // Call Runtime Instruction
15609 
15610 instruct CallLeafDirect(method meth)
15611 %{
15612   match(CallLeaf);
15613 
15614   effect(USE meth);
15615 
15616   ins_cost(CALL_COST);
15617 
15618   format %{ "CALL, runtime leaf $meth" %}
15619 
15620   ins_encode( aarch64_enc_java_to_runtime(meth) );
15621 
15622   ins_pipe(pipe_class_call);
15623 %}
15624 
15625 // Call Runtime Instruction
15626 
15627 instruct CallLeafNoFPDirect(method meth)
15628 %{
15629   match(CallLeafNoFP);
15630 
15631   effect(USE meth);
15632 
15633   ins_cost(CALL_COST);
15634 
15635   format %{ "CALL, runtime leaf nofp $meth" %}
15636 
15637   ins_encode( aarch64_enc_java_to_runtime(meth) );
15638 
15639   ins_pipe(pipe_class_call);
15640 %}
15641 
15642 // Tail Call; Jump from runtime stub to Java code.
15643 // Also known as an 'interprocedural jump'.
15644 // Target of jump will eventually return to caller.
15645 // TailJump below removes the return address.
15646 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
15647 %{
15648   match(TailCall jump_target method_oop);
15649 
15650   ins_cost(CALL_COST);
15651 
15652   format %{ "br $jump_target\t# $method_oop holds method oop" %}
15653 
15654   ins_encode(aarch64_enc_tail_call(jump_target));
15655 
15656   ins_pipe(pipe_class_call);
15657 %}
15658 
15659 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
15660 %{
15661   match(TailJump jump_target ex_oop);
15662 
15663   ins_cost(CALL_COST);
15664 
15665   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
15666 
15667   ins_encode(aarch64_enc_tail_jmp(jump_target));
15668 
15669   ins_pipe(pipe_class_call);
15670 %}
15671 
15672 // Create exception oop: created by stack-crawling runtime code.
15673 // Created exception is now available to this handler, and is setup
15674 // just prior to jumping to this handler. No code emitted.
15675 // TODO check
15676 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
15677 instruct CreateException(iRegP_R0 ex_oop)
15678 %{
15679   match(Set ex_oop (CreateEx));
15680 
15681   format %{ " -- \t// exception oop; no code emitted" %}
15682 
15683   size(0);
15684 
15685   ins_encode( /*empty*/ );
15686 
15687   ins_pipe(pipe_class_empty);
15688 %}
15689 
15690 // Rethrow exception: The exception oop will come in the first
15691 // argument position. Then JUMP (not call) to the rethrow stub code.
15692 instruct RethrowException() %{
15693   match(Rethrow);
15694   ins_cost(CALL_COST);
15695 
15696   format %{ "b rethrow_stub" %}
15697 
15698   ins_encode( aarch64_enc_rethrow() );
15699 
15700   ins_pipe(pipe_class_call);
15701 %}
15702 
15703 
15704 // Return Instruction
15705 // epilog node loads ret address into lr as part of frame pop
15706 instruct Ret()
15707 %{
15708   match(Return);
15709 
15710   format %{ "ret\t// return register" %}
15711 
15712   ins_encode( aarch64_enc_ret() );
15713 
15714   ins_pipe(pipe_branch);
15715 %}
15716 
15717 // Die now.
15718 instruct ShouldNotReachHere() %{
15719   match(Halt);
15720 
15721   ins_cost(CALL_COST);
15722   format %{ "ShouldNotReachHere" %}
15723 
15724   ins_encode %{
15725     // +1 so NativeInstruction::is_sigill_zombie_not_entrant() doesn't
15726     // return true
15727     __ dpcs1(0xdead + 1);
15728   %}
15729 
15730   ins_pipe(pipe_class_default);
15731 %}
15732 
15733 // ============================================================================
15734 // Partial Subtype Check
15735 //
15736 // superklass array for an instance of the superklass.  Set a hidden
15737 // internal cache on a hit (cache is checked with exposed code in
15738 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
15739 // encoding ALSO sets flags.
15740 
15741 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
15742 %{
15743   match(Set result (PartialSubtypeCheck sub super));
15744   effect(KILL cr, KILL temp);
15745 
15746   ins_cost(1100);  // slightly larger than the next version
15747   format %{ "partialSubtypeCheck $result, $sub, $super" %}
15748 
15749   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15750 
15751   opcode(0x1); // Force zero of result reg on hit
15752 
15753   ins_pipe(pipe_class_memory);
15754 %}
15755 
15756 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
15757 %{
15758   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
15759   effect(KILL temp, KILL result);
15760 
15761   ins_cost(1100);  // slightly larger than the next version
15762   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
15763 
15764   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15765 
15766   opcode(0x0); // Don't zero result reg on hit
15767 
15768   ins_pipe(pipe_class_memory);
15769 %}
15770 
15771 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15772                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
15773 %{
15774   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15775   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15776   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15777 
15778   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15779   ins_encode %{
15780     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15781     __ string_compare($str1$$Register, $str2$$Register,
15782                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15783                       $tmp1$$Register, $tmp2$$Register,
15784                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU);
15785   %}
15786   ins_pipe(pipe_class_memory);
15787 %}
15788 
15789 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15790                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
15791 %{
15792   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15793   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15794   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15795 
15796   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15797   ins_encode %{
15798     __ string_compare($str1$$Register, $str2$$Register,
15799                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15800                       $tmp1$$Register, $tmp2$$Register,
15801                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL);
15802   %}
15803   ins_pipe(pipe_class_memory);
15804 %}
15805 
15806 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15807                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
15808                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
15809 %{
15810   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15811   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15812   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
15813          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15814 
15815   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
15816   ins_encode %{
15817     __ string_compare($str1$$Register, $str2$$Register,
15818                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15819                       $tmp1$$Register, $tmp2$$Register,
15820                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
15821                       $vtmp3$$FloatRegister, StrIntrinsicNode::UL);
15822   %}
15823   ins_pipe(pipe_class_memory);
15824 %}
15825 
15826 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15827                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
15828                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
15829 %{
15830   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15831   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15832   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
15833          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15834 
15835   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
15836   ins_encode %{
15837     __ string_compare($str1$$Register, $str2$$Register,
15838                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15839                       $tmp1$$Register, $tmp2$$Register,
15840                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
15841                       $vtmp3$$FloatRegister,StrIntrinsicNode::LU);
15842   %}
15843   ins_pipe(pipe_class_memory);
15844 %}
15845 
15846 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15847        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
15848        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
15849 %{
15850   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15851   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15852   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15853          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
15854   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
15855 
15856   ins_encode %{
15857     __ string_indexof($str1$$Register, $str2$$Register,
15858                       $cnt1$$Register, $cnt2$$Register,
15859                       $tmp1$$Register, $tmp2$$Register,
15860                       $tmp3$$Register, $tmp4$$Register,
15861                       $tmp5$$Register, $tmp6$$Register,
15862                       -1, $result$$Register, StrIntrinsicNode::UU);
15863   %}
15864   ins_pipe(pipe_class_memory);
15865 %}
15866 
15867 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15868        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
15869        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
15870 %{
15871   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15872   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15873   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15874          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
15875   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
15876 
15877   ins_encode %{
15878     __ string_indexof($str1$$Register, $str2$$Register,
15879                       $cnt1$$Register, $cnt2$$Register,
15880                       $tmp1$$Register, $tmp2$$Register,
15881                       $tmp3$$Register, $tmp4$$Register,
15882                       $tmp5$$Register, $tmp6$$Register,
15883                       -1, $result$$Register, StrIntrinsicNode::LL);
15884   %}
15885   ins_pipe(pipe_class_memory);
15886 %}
15887 
15888 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15889        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
15890        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
15891 %{
15892   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15893   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15894   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15895          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
15896   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
15897 
15898   ins_encode %{
15899     __ string_indexof($str1$$Register, $str2$$Register,
15900                       $cnt1$$Register, $cnt2$$Register,
15901                       $tmp1$$Register, $tmp2$$Register,
15902                       $tmp3$$Register, $tmp4$$Register,
15903                       $tmp5$$Register, $tmp6$$Register,
15904                       -1, $result$$Register, StrIntrinsicNode::UL);
15905   %}
15906   ins_pipe(pipe_class_memory);
15907 %}
15908 
15909 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15910                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15911                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15912 %{
15913   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15914   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15915   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15916          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15917   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
15918 
15919   ins_encode %{
15920     int icnt2 = (int)$int_cnt2$$constant;
15921     __ string_indexof($str1$$Register, $str2$$Register,
15922                       $cnt1$$Register, zr,
15923                       $tmp1$$Register, $tmp2$$Register,
15924                       $tmp3$$Register, $tmp4$$Register, zr, zr,
15925                       icnt2, $result$$Register, StrIntrinsicNode::UU);
15926   %}
15927   ins_pipe(pipe_class_memory);
15928 %}
15929 
15930 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15931                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15932                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15933 %{
15934   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15935   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15936   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15937          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15938   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
15939 
15940   ins_encode %{
15941     int icnt2 = (int)$int_cnt2$$constant;
15942     __ string_indexof($str1$$Register, $str2$$Register,
15943                       $cnt1$$Register, zr,
15944                       $tmp1$$Register, $tmp2$$Register,
15945                       $tmp3$$Register, $tmp4$$Register, zr, zr,
15946                       icnt2, $result$$Register, StrIntrinsicNode::LL);
15947   %}
15948   ins_pipe(pipe_class_memory);
15949 %}
15950 
15951 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15952                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15953                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15954 %{
15955   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15956   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15957   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15958          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15959   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
15960 
15961   ins_encode %{
15962     int icnt2 = (int)$int_cnt2$$constant;
15963     __ string_indexof($str1$$Register, $str2$$Register,
15964                       $cnt1$$Register, zr,
15965                       $tmp1$$Register, $tmp2$$Register,
15966                       $tmp3$$Register, $tmp4$$Register, zr, zr,
15967                       icnt2, $result$$Register, StrIntrinsicNode::UL);
15968   %}
15969   ins_pipe(pipe_class_memory);
15970 %}
15971 
15972 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
15973                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15974                               iRegINoSp tmp3, rFlagsReg cr)
15975 %{
15976   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15977   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
15978          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
15979 
15980   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
15981 
15982   ins_encode %{
15983     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
15984                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
15985                            $tmp3$$Register);
15986   %}
15987   ins_pipe(pipe_class_memory);
15988 %}
15989 
15990 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15991                         iRegI_R0 result, rFlagsReg cr)
15992 %{
15993   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
15994   match(Set result (StrEquals (Binary str1 str2) cnt));
15995   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15996 
15997   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15998   ins_encode %{
15999     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16000     __ string_equals($str1$$Register, $str2$$Register,
16001                      $result$$Register, $cnt$$Register, 1);
16002   %}
16003   ins_pipe(pipe_class_memory);
16004 %}
16005 
16006 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
16007                         iRegI_R0 result, rFlagsReg cr)
16008 %{
16009   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
16010   match(Set result (StrEquals (Binary str1 str2) cnt));
16011   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16012 
16013   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16014   ins_encode %{
16015     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16016     __ string_equals($str1$$Register, $str2$$Register,
16017                      $result$$Register, $cnt$$Register, 2);
16018   %}
16019   ins_pipe(pipe_class_memory);
16020 %}
16021 
16022 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16023                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
16024                        iRegP_R10 tmp, rFlagsReg cr)
16025 %{
16026   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16027   match(Set result (AryEq ary1 ary2));
16028   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16029 
16030   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16031   ins_encode %{
16032     __ arrays_equals($ary1$$Register, $ary2$$Register,
16033                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16034                      $result$$Register, $tmp$$Register, 1);
16035     %}
16036   ins_pipe(pipe_class_memory);
16037 %}
16038 
16039 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16040                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
16041                        iRegP_R10 tmp, rFlagsReg cr)
16042 %{
16043   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16044   match(Set result (AryEq ary1 ary2));
16045   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16046 
16047   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16048   ins_encode %{
16049     __ arrays_equals($ary1$$Register, $ary2$$Register,
16050                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16051                      $result$$Register, $tmp$$Register, 2);
16052   %}
16053   ins_pipe(pipe_class_memory);
16054 %}
16055 
16056 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
16057 %{
16058   match(Set result (HasNegatives ary1 len));
16059   effect(USE_KILL ary1, USE_KILL len, KILL cr);
16060   format %{ "has negatives byte[] $ary1,$len -> $result" %}
16061   ins_encode %{
16062     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
16063   %}
16064   ins_pipe( pipe_slow );
16065 %}
16066 
16067 // fast char[] to byte[] compression
16068 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16069                          vRegD_V0 tmp1, vRegD_V1 tmp2,
16070                          vRegD_V2 tmp3, vRegD_V3 tmp4,
16071                          iRegI_R0 result, rFlagsReg cr)
16072 %{
16073   match(Set result (StrCompressedCopy src (Binary dst len)));
16074   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16075 
16076   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
16077   ins_encode %{
16078     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16079                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
16080                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
16081                            $result$$Register);
16082   %}
16083   ins_pipe( pipe_slow );
16084 %}
16085 
16086 // fast byte[] to char[] inflation
16087 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
16088                         vRegD_V0 tmp1, vRegD_V1 tmp2, vRegD_V2 tmp3, iRegP_R3 tmp4, rFlagsReg cr)
16089 %{
16090   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16091   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16092 
16093   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16094   ins_encode %{
16095     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16096                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
16097   %}
16098   ins_pipe(pipe_class_memory);
16099 %}
16100 
16101 // encode char[] to byte[] in ISO_8859_1
16102 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16103                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
16104                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
16105                           iRegI_R0 result, rFlagsReg cr)
16106 %{
16107   match(Set result (EncodeISOArray src (Binary dst len)));
16108   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
16109          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
16110 
16111   format %{ "Encode array $src,$dst,$len -> $result" %}
16112   ins_encode %{
16113     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16114          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
16115          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
16116   %}
16117   ins_pipe( pipe_class_memory );
16118 %}
16119 
16120 // ============================================================================
16121 // This name is KNOWN by the ADLC and cannot be changed.
16122 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
16123 // for this guy.
16124 instruct tlsLoadP(thread_RegP dst)
16125 %{
16126   match(Set dst (ThreadLocal));
16127 
16128   ins_cost(0);
16129 
16130   format %{ " -- \t// $dst=Thread::current(), empty" %}
16131 
16132   size(0);
16133 
16134   ins_encode( /*empty*/ );
16135 
16136   ins_pipe(pipe_class_empty);
16137 %}
16138 
16139 // ====================VECTOR INSTRUCTIONS=====================================
16140 
16141 // Load vector (32 bits)
16142 instruct loadV4(vecD dst, vmem4 mem)
16143 %{
16144   predicate(n->as_LoadVector()->memory_size() == 4);
16145   match(Set dst (LoadVector mem));
16146   ins_cost(4 * INSN_COST);
16147   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
16148   ins_encode( aarch64_enc_ldrvS(dst, mem) );
16149   ins_pipe(vload_reg_mem64);
16150 %}
16151 
16152 // Load vector (64 bits)
16153 instruct loadV8(vecD dst, vmem8 mem)
16154 %{
16155   predicate(n->as_LoadVector()->memory_size() == 8);
16156   match(Set dst (LoadVector mem));
16157   ins_cost(4 * INSN_COST);
16158   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
16159   ins_encode( aarch64_enc_ldrvD(dst, mem) );
16160   ins_pipe(vload_reg_mem64);
16161 %}
16162 
16163 // Load Vector (128 bits)
16164 instruct loadV16(vecX dst, vmem16 mem)
16165 %{
16166   predicate(n->as_LoadVector()->memory_size() == 16);
16167   match(Set dst (LoadVector mem));
16168   ins_cost(4 * INSN_COST);
16169   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
16170   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
16171   ins_pipe(vload_reg_mem128);
16172 %}
16173 
16174 // Store Vector (32 bits)
16175 instruct storeV4(vecD src, vmem4 mem)
16176 %{
16177   predicate(n->as_StoreVector()->memory_size() == 4);
16178   match(Set mem (StoreVector mem src));
16179   ins_cost(4 * INSN_COST);
16180   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
16181   ins_encode( aarch64_enc_strvS(src, mem) );
16182   ins_pipe(vstore_reg_mem64);
16183 %}
16184 
16185 // Store Vector (64 bits)
16186 instruct storeV8(vecD src, vmem8 mem)
16187 %{
16188   predicate(n->as_StoreVector()->memory_size() == 8);
16189   match(Set mem (StoreVector mem src));
16190   ins_cost(4 * INSN_COST);
16191   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
16192   ins_encode( aarch64_enc_strvD(src, mem) );
16193   ins_pipe(vstore_reg_mem64);
16194 %}
16195 
16196 // Store Vector (128 bits)
16197 instruct storeV16(vecX src, vmem16 mem)
16198 %{
16199   predicate(n->as_StoreVector()->memory_size() == 16);
16200   match(Set mem (StoreVector mem src));
16201   ins_cost(4 * INSN_COST);
16202   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
16203   ins_encode( aarch64_enc_strvQ(src, mem) );
16204   ins_pipe(vstore_reg_mem128);
16205 %}
16206 
16207 instruct replicate8B(vecD dst, iRegIorL2I src)
16208 %{
16209   predicate(n->as_Vector()->length() == 4 ||
16210             n->as_Vector()->length() == 8);
16211   match(Set dst (ReplicateB src));
16212   ins_cost(INSN_COST);
16213   format %{ "dup  $dst, $src\t# vector (8B)" %}
16214   ins_encode %{
16215     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
16216   %}
16217   ins_pipe(vdup_reg_reg64);
16218 %}
16219 
16220 instruct replicate16B(vecX dst, iRegIorL2I src)
16221 %{
16222   predicate(n->as_Vector()->length() == 16);
16223   match(Set dst (ReplicateB src));
16224   ins_cost(INSN_COST);
16225   format %{ "dup  $dst, $src\t# vector (16B)" %}
16226   ins_encode %{
16227     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
16228   %}
16229   ins_pipe(vdup_reg_reg128);
16230 %}
16231 
16232 instruct replicate8B_imm(vecD dst, immI con)
16233 %{
16234   predicate(n->as_Vector()->length() == 4 ||
16235             n->as_Vector()->length() == 8);
16236   match(Set dst (ReplicateB con));
16237   ins_cost(INSN_COST);
16238   format %{ "movi  $dst, $con\t# vector(8B)" %}
16239   ins_encode %{
16240     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
16241   %}
16242   ins_pipe(vmovi_reg_imm64);
16243 %}
16244 
16245 instruct replicate16B_imm(vecX dst, immI con)
16246 %{
16247   predicate(n->as_Vector()->length() == 16);
16248   match(Set dst (ReplicateB con));
16249   ins_cost(INSN_COST);
16250   format %{ "movi  $dst, $con\t# vector(16B)" %}
16251   ins_encode %{
16252     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
16253   %}
16254   ins_pipe(vmovi_reg_imm128);
16255 %}
16256 
16257 instruct replicate4S(vecD dst, iRegIorL2I src)
16258 %{
16259   predicate(n->as_Vector()->length() == 2 ||
16260             n->as_Vector()->length() == 4);
16261   match(Set dst (ReplicateS src));
16262   ins_cost(INSN_COST);
16263   format %{ "dup  $dst, $src\t# vector (4S)" %}
16264   ins_encode %{
16265     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
16266   %}
16267   ins_pipe(vdup_reg_reg64);
16268 %}
16269 
16270 instruct replicate8S(vecX dst, iRegIorL2I src)
16271 %{
16272   predicate(n->as_Vector()->length() == 8);
16273   match(Set dst (ReplicateS src));
16274   ins_cost(INSN_COST);
16275   format %{ "dup  $dst, $src\t# vector (8S)" %}
16276   ins_encode %{
16277     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
16278   %}
16279   ins_pipe(vdup_reg_reg128);
16280 %}
16281 
16282 instruct replicate4S_imm(vecD dst, immI con)
16283 %{
16284   predicate(n->as_Vector()->length() == 2 ||
16285             n->as_Vector()->length() == 4);
16286   match(Set dst (ReplicateS con));
16287   ins_cost(INSN_COST);
16288   format %{ "movi  $dst, $con\t# vector(4H)" %}
16289   ins_encode %{
16290     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
16291   %}
16292   ins_pipe(vmovi_reg_imm64);
16293 %}
16294 
16295 instruct replicate8S_imm(vecX dst, immI con)
16296 %{
16297   predicate(n->as_Vector()->length() == 8);
16298   match(Set dst (ReplicateS con));
16299   ins_cost(INSN_COST);
16300   format %{ "movi  $dst, $con\t# vector(8H)" %}
16301   ins_encode %{
16302     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
16303   %}
16304   ins_pipe(vmovi_reg_imm128);
16305 %}
16306 
16307 instruct replicate2I(vecD dst, iRegIorL2I src)
16308 %{
16309   predicate(n->as_Vector()->length() == 2);
16310   match(Set dst (ReplicateI src));
16311   ins_cost(INSN_COST);
16312   format %{ "dup  $dst, $src\t# vector (2I)" %}
16313   ins_encode %{
16314     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
16315   %}
16316   ins_pipe(vdup_reg_reg64);
16317 %}
16318 
16319 instruct replicate4I(vecX dst, iRegIorL2I src)
16320 %{
16321   predicate(n->as_Vector()->length() == 4);
16322   match(Set dst (ReplicateI src));
16323   ins_cost(INSN_COST);
16324   format %{ "dup  $dst, $src\t# vector (4I)" %}
16325   ins_encode %{
16326     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
16327   %}
16328   ins_pipe(vdup_reg_reg128);
16329 %}
16330 
16331 instruct replicate2I_imm(vecD dst, immI con)
16332 %{
16333   predicate(n->as_Vector()->length() == 2);
16334   match(Set dst (ReplicateI con));
16335   ins_cost(INSN_COST);
16336   format %{ "movi  $dst, $con\t# vector(2I)" %}
16337   ins_encode %{
16338     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
16339   %}
16340   ins_pipe(vmovi_reg_imm64);
16341 %}
16342 
16343 instruct replicate4I_imm(vecX dst, immI con)
16344 %{
16345   predicate(n->as_Vector()->length() == 4);
16346   match(Set dst (ReplicateI con));
16347   ins_cost(INSN_COST);
16348   format %{ "movi  $dst, $con\t# vector(4I)" %}
16349   ins_encode %{
16350     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
16351   %}
16352   ins_pipe(vmovi_reg_imm128);
16353 %}
16354 
16355 instruct replicate2L(vecX dst, iRegL src)
16356 %{
16357   predicate(n->as_Vector()->length() == 2);
16358   match(Set dst (ReplicateL src));
16359   ins_cost(INSN_COST);
16360   format %{ "dup  $dst, $src\t# vector (2L)" %}
16361   ins_encode %{
16362     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
16363   %}
16364   ins_pipe(vdup_reg_reg128);
16365 %}
16366 
16367 instruct replicate2L_zero(vecX dst, immI0 zero)
16368 %{
16369   predicate(n->as_Vector()->length() == 2);
16370   match(Set dst (ReplicateI zero));
16371   ins_cost(INSN_COST);
16372   format %{ "movi  $dst, $zero\t# vector(4I)" %}
16373   ins_encode %{
16374     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16375            as_FloatRegister($dst$$reg),
16376            as_FloatRegister($dst$$reg));
16377   %}
16378   ins_pipe(vmovi_reg_imm128);
16379 %}
16380 
16381 instruct replicate2F(vecD dst, vRegF src)
16382 %{
16383   predicate(n->as_Vector()->length() == 2);
16384   match(Set dst (ReplicateF src));
16385   ins_cost(INSN_COST);
16386   format %{ "dup  $dst, $src\t# vector (2F)" %}
16387   ins_encode %{
16388     __ dup(as_FloatRegister($dst$$reg), __ T2S,
16389            as_FloatRegister($src$$reg));
16390   %}
16391   ins_pipe(vdup_reg_freg64);
16392 %}
16393 
16394 instruct replicate4F(vecX dst, vRegF src)
16395 %{
16396   predicate(n->as_Vector()->length() == 4);
16397   match(Set dst (ReplicateF src));
16398   ins_cost(INSN_COST);
16399   format %{ "dup  $dst, $src\t# vector (4F)" %}
16400   ins_encode %{
16401     __ dup(as_FloatRegister($dst$$reg), __ T4S,
16402            as_FloatRegister($src$$reg));
16403   %}
16404   ins_pipe(vdup_reg_freg128);
16405 %}
16406 
16407 instruct replicate2D(vecX dst, vRegD src)
16408 %{
16409   predicate(n->as_Vector()->length() == 2);
16410   match(Set dst (ReplicateD src));
16411   ins_cost(INSN_COST);
16412   format %{ "dup  $dst, $src\t# vector (2D)" %}
16413   ins_encode %{
16414     __ dup(as_FloatRegister($dst$$reg), __ T2D,
16415            as_FloatRegister($src$$reg));
16416   %}
16417   ins_pipe(vdup_reg_dreg128);
16418 %}
16419 
16420 // ====================REDUCTION ARITHMETIC====================================
16421 
16422 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
16423 %{
16424   match(Set dst (AddReductionVI src1 src2));
16425   ins_cost(INSN_COST);
16426   effect(TEMP tmp, TEMP tmp2);
16427   format %{ "umov  $tmp, $src2, S, 0\n\t"
16428             "umov  $tmp2, $src2, S, 1\n\t"
16429             "addw  $dst, $src1, $tmp\n\t"
16430             "addw  $dst, $dst, $tmp2\t add reduction2i"
16431   %}
16432   ins_encode %{
16433     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16434     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16435     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
16436     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
16437   %}
16438   ins_pipe(pipe_class_default);
16439 %}
16440 
16441 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16442 %{
16443   match(Set dst (AddReductionVI src1 src2));
16444   ins_cost(INSN_COST);
16445   effect(TEMP tmp, TEMP tmp2);
16446   format %{ "addv  $tmp, T4S, $src2\n\t"
16447             "umov  $tmp2, $tmp, S, 0\n\t"
16448             "addw  $dst, $tmp2, $src1\t add reduction4i"
16449   %}
16450   ins_encode %{
16451     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
16452             as_FloatRegister($src2$$reg));
16453     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16454     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
16455   %}
16456   ins_pipe(pipe_class_default);
16457 %}
16458 
16459 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
16460 %{
16461   match(Set dst (MulReductionVI src1 src2));
16462   ins_cost(INSN_COST);
16463   effect(TEMP tmp, TEMP dst);
16464   format %{ "umov  $tmp, $src2, S, 0\n\t"
16465             "mul   $dst, $tmp, $src1\n\t"
16466             "umov  $tmp, $src2, S, 1\n\t"
16467             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
16468   %}
16469   ins_encode %{
16470     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16471     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
16472     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16473     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16474   %}
16475   ins_pipe(pipe_class_default);
16476 %}
16477 
16478 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16479 %{
16480   match(Set dst (MulReductionVI src1 src2));
16481   ins_cost(INSN_COST);
16482   effect(TEMP tmp, TEMP tmp2, TEMP dst);
16483   format %{ "ins   $tmp, $src2, 0, 1\n\t"
16484             "mul   $tmp, $tmp, $src2\n\t"
16485             "umov  $tmp2, $tmp, S, 0\n\t"
16486             "mul   $dst, $tmp2, $src1\n\t"
16487             "umov  $tmp2, $tmp, S, 1\n\t"
16488             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
16489   %}
16490   ins_encode %{
16491     __ ins(as_FloatRegister($tmp$$reg), __ D,
16492            as_FloatRegister($src2$$reg), 0, 1);
16493     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
16494            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
16495     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16496     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
16497     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
16498     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
16499   %}
16500   ins_pipe(pipe_class_default);
16501 %}
16502 
16503 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16504 %{
16505   match(Set dst (AddReductionVF src1 src2));
16506   ins_cost(INSN_COST);
16507   effect(TEMP tmp, TEMP dst);
16508   format %{ "fadds $dst, $src1, $src2\n\t"
16509             "ins   $tmp, S, $src2, 0, 1\n\t"
16510             "fadds $dst, $dst, $tmp\t add reduction2f"
16511   %}
16512   ins_encode %{
16513     __ fadds(as_FloatRegister($dst$$reg),
16514              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16515     __ ins(as_FloatRegister($tmp$$reg), __ S,
16516            as_FloatRegister($src2$$reg), 0, 1);
16517     __ fadds(as_FloatRegister($dst$$reg),
16518              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16519   %}
16520   ins_pipe(pipe_class_default);
16521 %}
16522 
16523 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16524 %{
16525   match(Set dst (AddReductionVF src1 src2));
16526   ins_cost(INSN_COST);
16527   effect(TEMP tmp, TEMP dst);
16528   format %{ "fadds $dst, $src1, $src2\n\t"
16529             "ins   $tmp, S, $src2, 0, 1\n\t"
16530             "fadds $dst, $dst, $tmp\n\t"
16531             "ins   $tmp, S, $src2, 0, 2\n\t"
16532             "fadds $dst, $dst, $tmp\n\t"
16533             "ins   $tmp, S, $src2, 0, 3\n\t"
16534             "fadds $dst, $dst, $tmp\t add reduction4f"
16535   %}
16536   ins_encode %{
16537     __ fadds(as_FloatRegister($dst$$reg),
16538              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16539     __ ins(as_FloatRegister($tmp$$reg), __ S,
16540            as_FloatRegister($src2$$reg), 0, 1);
16541     __ fadds(as_FloatRegister($dst$$reg),
16542              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16543     __ ins(as_FloatRegister($tmp$$reg), __ S,
16544            as_FloatRegister($src2$$reg), 0, 2);
16545     __ fadds(as_FloatRegister($dst$$reg),
16546              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16547     __ ins(as_FloatRegister($tmp$$reg), __ S,
16548            as_FloatRegister($src2$$reg), 0, 3);
16549     __ fadds(as_FloatRegister($dst$$reg),
16550              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16551   %}
16552   ins_pipe(pipe_class_default);
16553 %}
16554 
16555 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16556 %{
16557   match(Set dst (MulReductionVF src1 src2));
16558   ins_cost(INSN_COST);
16559   effect(TEMP tmp, TEMP dst);
16560   format %{ "fmuls $dst, $src1, $src2\n\t"
16561             "ins   $tmp, S, $src2, 0, 1\n\t"
16562             "fmuls $dst, $dst, $tmp\t add reduction4f"
16563   %}
16564   ins_encode %{
16565     __ fmuls(as_FloatRegister($dst$$reg),
16566              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16567     __ ins(as_FloatRegister($tmp$$reg), __ S,
16568            as_FloatRegister($src2$$reg), 0, 1);
16569     __ fmuls(as_FloatRegister($dst$$reg),
16570              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16571   %}
16572   ins_pipe(pipe_class_default);
16573 %}
16574 
16575 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16576 %{
16577   match(Set dst (MulReductionVF src1 src2));
16578   ins_cost(INSN_COST);
16579   effect(TEMP tmp, TEMP dst);
16580   format %{ "fmuls $dst, $src1, $src2\n\t"
16581             "ins   $tmp, S, $src2, 0, 1\n\t"
16582             "fmuls $dst, $dst, $tmp\n\t"
16583             "ins   $tmp, S, $src2, 0, 2\n\t"
16584             "fmuls $dst, $dst, $tmp\n\t"
16585             "ins   $tmp, S, $src2, 0, 3\n\t"
16586             "fmuls $dst, $dst, $tmp\t add reduction4f"
16587   %}
16588   ins_encode %{
16589     __ fmuls(as_FloatRegister($dst$$reg),
16590              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16591     __ ins(as_FloatRegister($tmp$$reg), __ S,
16592            as_FloatRegister($src2$$reg), 0, 1);
16593     __ fmuls(as_FloatRegister($dst$$reg),
16594              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16595     __ ins(as_FloatRegister($tmp$$reg), __ S,
16596            as_FloatRegister($src2$$reg), 0, 2);
16597     __ fmuls(as_FloatRegister($dst$$reg),
16598              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16599     __ ins(as_FloatRegister($tmp$$reg), __ S,
16600            as_FloatRegister($src2$$reg), 0, 3);
16601     __ fmuls(as_FloatRegister($dst$$reg),
16602              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16603   %}
16604   ins_pipe(pipe_class_default);
16605 %}
16606 
16607 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16608 %{
16609   match(Set dst (AddReductionVD src1 src2));
16610   ins_cost(INSN_COST);
16611   effect(TEMP tmp, TEMP dst);
16612   format %{ "faddd $dst, $src1, $src2\n\t"
16613             "ins   $tmp, D, $src2, 0, 1\n\t"
16614             "faddd $dst, $dst, $tmp\t add reduction2d"
16615   %}
16616   ins_encode %{
16617     __ faddd(as_FloatRegister($dst$$reg),
16618              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16619     __ ins(as_FloatRegister($tmp$$reg), __ D,
16620            as_FloatRegister($src2$$reg), 0, 1);
16621     __ faddd(as_FloatRegister($dst$$reg),
16622              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16623   %}
16624   ins_pipe(pipe_class_default);
16625 %}
16626 
16627 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16628 %{
16629   match(Set dst (MulReductionVD src1 src2));
16630   ins_cost(INSN_COST);
16631   effect(TEMP tmp, TEMP dst);
16632   format %{ "fmuld $dst, $src1, $src2\n\t"
16633             "ins   $tmp, D, $src2, 0, 1\n\t"
16634             "fmuld $dst, $dst, $tmp\t add reduction2d"
16635   %}
16636   ins_encode %{
16637     __ fmuld(as_FloatRegister($dst$$reg),
16638              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16639     __ ins(as_FloatRegister($tmp$$reg), __ D,
16640            as_FloatRegister($src2$$reg), 0, 1);
16641     __ fmuld(as_FloatRegister($dst$$reg),
16642              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16643   %}
16644   ins_pipe(pipe_class_default);
16645 %}
16646 
16647 // ====================VECTOR ARITHMETIC=======================================
16648 
16649 // --------------------------------- ADD --------------------------------------
16650 
16651 instruct vadd8B(vecD dst, vecD src1, vecD src2)
16652 %{
16653   predicate(n->as_Vector()->length() == 4 ||
16654             n->as_Vector()->length() == 8);
16655   match(Set dst (AddVB src1 src2));
16656   ins_cost(INSN_COST);
16657   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
16658   ins_encode %{
16659     __ addv(as_FloatRegister($dst$$reg), __ T8B,
16660             as_FloatRegister($src1$$reg),
16661             as_FloatRegister($src2$$reg));
16662   %}
16663   ins_pipe(vdop64);
16664 %}
16665 
16666 instruct vadd16B(vecX dst, vecX src1, vecX src2)
16667 %{
16668   predicate(n->as_Vector()->length() == 16);
16669   match(Set dst (AddVB src1 src2));
16670   ins_cost(INSN_COST);
16671   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
16672   ins_encode %{
16673     __ addv(as_FloatRegister($dst$$reg), __ T16B,
16674             as_FloatRegister($src1$$reg),
16675             as_FloatRegister($src2$$reg));
16676   %}
16677   ins_pipe(vdop128);
16678 %}
16679 
16680 instruct vadd4S(vecD dst, vecD src1, vecD src2)
16681 %{
16682   predicate(n->as_Vector()->length() == 2 ||
16683             n->as_Vector()->length() == 4);
16684   match(Set dst (AddVS src1 src2));
16685   ins_cost(INSN_COST);
16686   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
16687   ins_encode %{
16688     __ addv(as_FloatRegister($dst$$reg), __ T4H,
16689             as_FloatRegister($src1$$reg),
16690             as_FloatRegister($src2$$reg));
16691   %}
16692   ins_pipe(vdop64);
16693 %}
16694 
16695 instruct vadd8S(vecX dst, vecX src1, vecX src2)
16696 %{
16697   predicate(n->as_Vector()->length() == 8);
16698   match(Set dst (AddVS src1 src2));
16699   ins_cost(INSN_COST);
16700   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
16701   ins_encode %{
16702     __ addv(as_FloatRegister($dst$$reg), __ T8H,
16703             as_FloatRegister($src1$$reg),
16704             as_FloatRegister($src2$$reg));
16705   %}
16706   ins_pipe(vdop128);
16707 %}
16708 
16709 instruct vadd2I(vecD dst, vecD src1, vecD src2)
16710 %{
16711   predicate(n->as_Vector()->length() == 2);
16712   match(Set dst (AddVI src1 src2));
16713   ins_cost(INSN_COST);
16714   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
16715   ins_encode %{
16716     __ addv(as_FloatRegister($dst$$reg), __ T2S,
16717             as_FloatRegister($src1$$reg),
16718             as_FloatRegister($src2$$reg));
16719   %}
16720   ins_pipe(vdop64);
16721 %}
16722 
16723 instruct vadd4I(vecX dst, vecX src1, vecX src2)
16724 %{
16725   predicate(n->as_Vector()->length() == 4);
16726   match(Set dst (AddVI src1 src2));
16727   ins_cost(INSN_COST);
16728   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
16729   ins_encode %{
16730     __ addv(as_FloatRegister($dst$$reg), __ T4S,
16731             as_FloatRegister($src1$$reg),
16732             as_FloatRegister($src2$$reg));
16733   %}
16734   ins_pipe(vdop128);
16735 %}
16736 
16737 instruct vadd2L(vecX dst, vecX src1, vecX src2)
16738 %{
16739   predicate(n->as_Vector()->length() == 2);
16740   match(Set dst (AddVL src1 src2));
16741   ins_cost(INSN_COST);
16742   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
16743   ins_encode %{
16744     __ addv(as_FloatRegister($dst$$reg), __ T2D,
16745             as_FloatRegister($src1$$reg),
16746             as_FloatRegister($src2$$reg));
16747   %}
16748   ins_pipe(vdop128);
16749 %}
16750 
16751 instruct vadd2F(vecD dst, vecD src1, vecD src2)
16752 %{
16753   predicate(n->as_Vector()->length() == 2);
16754   match(Set dst (AddVF src1 src2));
16755   ins_cost(INSN_COST);
16756   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
16757   ins_encode %{
16758     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
16759             as_FloatRegister($src1$$reg),
16760             as_FloatRegister($src2$$reg));
16761   %}
16762   ins_pipe(vdop_fp64);
16763 %}
16764 
16765 instruct vadd4F(vecX dst, vecX src1, vecX src2)
16766 %{
16767   predicate(n->as_Vector()->length() == 4);
16768   match(Set dst (AddVF src1 src2));
16769   ins_cost(INSN_COST);
16770   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
16771   ins_encode %{
16772     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
16773             as_FloatRegister($src1$$reg),
16774             as_FloatRegister($src2$$reg));
16775   %}
16776   ins_pipe(vdop_fp128);
16777 %}
16778 
16779 instruct vadd2D(vecX dst, vecX src1, vecX src2)
16780 %{
16781   match(Set dst (AddVD src1 src2));
16782   ins_cost(INSN_COST);
16783   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
16784   ins_encode %{
16785     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
16786             as_FloatRegister($src1$$reg),
16787             as_FloatRegister($src2$$reg));
16788   %}
16789   ins_pipe(vdop_fp128);
16790 %}
16791 
16792 // --------------------------------- SUB --------------------------------------
16793 
16794 instruct vsub8B(vecD dst, vecD src1, vecD src2)
16795 %{
16796   predicate(n->as_Vector()->length() == 4 ||
16797             n->as_Vector()->length() == 8);
16798   match(Set dst (SubVB src1 src2));
16799   ins_cost(INSN_COST);
16800   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
16801   ins_encode %{
16802     __ subv(as_FloatRegister($dst$$reg), __ T8B,
16803             as_FloatRegister($src1$$reg),
16804             as_FloatRegister($src2$$reg));
16805   %}
16806   ins_pipe(vdop64);
16807 %}
16808 
16809 instruct vsub16B(vecX dst, vecX src1, vecX src2)
16810 %{
16811   predicate(n->as_Vector()->length() == 16);
16812   match(Set dst (SubVB src1 src2));
16813   ins_cost(INSN_COST);
16814   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
16815   ins_encode %{
16816     __ subv(as_FloatRegister($dst$$reg), __ T16B,
16817             as_FloatRegister($src1$$reg),
16818             as_FloatRegister($src2$$reg));
16819   %}
16820   ins_pipe(vdop128);
16821 %}
16822 
16823 instruct vsub4S(vecD dst, vecD src1, vecD src2)
16824 %{
16825   predicate(n->as_Vector()->length() == 2 ||
16826             n->as_Vector()->length() == 4);
16827   match(Set dst (SubVS src1 src2));
16828   ins_cost(INSN_COST);
16829   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
16830   ins_encode %{
16831     __ subv(as_FloatRegister($dst$$reg), __ T4H,
16832             as_FloatRegister($src1$$reg),
16833             as_FloatRegister($src2$$reg));
16834   %}
16835   ins_pipe(vdop64);
16836 %}
16837 
16838 instruct vsub8S(vecX dst, vecX src1, vecX src2)
16839 %{
16840   predicate(n->as_Vector()->length() == 8);
16841   match(Set dst (SubVS src1 src2));
16842   ins_cost(INSN_COST);
16843   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
16844   ins_encode %{
16845     __ subv(as_FloatRegister($dst$$reg), __ T8H,
16846             as_FloatRegister($src1$$reg),
16847             as_FloatRegister($src2$$reg));
16848   %}
16849   ins_pipe(vdop128);
16850 %}
16851 
16852 instruct vsub2I(vecD dst, vecD src1, vecD src2)
16853 %{
16854   predicate(n->as_Vector()->length() == 2);
16855   match(Set dst (SubVI src1 src2));
16856   ins_cost(INSN_COST);
16857   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
16858   ins_encode %{
16859     __ subv(as_FloatRegister($dst$$reg), __ T2S,
16860             as_FloatRegister($src1$$reg),
16861             as_FloatRegister($src2$$reg));
16862   %}
16863   ins_pipe(vdop64);
16864 %}
16865 
16866 instruct vsub4I(vecX dst, vecX src1, vecX src2)
16867 %{
16868   predicate(n->as_Vector()->length() == 4);
16869   match(Set dst (SubVI src1 src2));
16870   ins_cost(INSN_COST);
16871   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
16872   ins_encode %{
16873     __ subv(as_FloatRegister($dst$$reg), __ T4S,
16874             as_FloatRegister($src1$$reg),
16875             as_FloatRegister($src2$$reg));
16876   %}
16877   ins_pipe(vdop128);
16878 %}
16879 
16880 instruct vsub2L(vecX dst, vecX src1, vecX src2)
16881 %{
16882   predicate(n->as_Vector()->length() == 2);
16883   match(Set dst (SubVL src1 src2));
16884   ins_cost(INSN_COST);
16885   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
16886   ins_encode %{
16887     __ subv(as_FloatRegister($dst$$reg), __ T2D,
16888             as_FloatRegister($src1$$reg),
16889             as_FloatRegister($src2$$reg));
16890   %}
16891   ins_pipe(vdop128);
16892 %}
16893 
16894 instruct vsub2F(vecD dst, vecD src1, vecD src2)
16895 %{
16896   predicate(n->as_Vector()->length() == 2);
16897   match(Set dst (SubVF src1 src2));
16898   ins_cost(INSN_COST);
16899   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
16900   ins_encode %{
16901     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
16902             as_FloatRegister($src1$$reg),
16903             as_FloatRegister($src2$$reg));
16904   %}
16905   ins_pipe(vdop_fp64);
16906 %}
16907 
16908 instruct vsub4F(vecX dst, vecX src1, vecX src2)
16909 %{
16910   predicate(n->as_Vector()->length() == 4);
16911   match(Set dst (SubVF src1 src2));
16912   ins_cost(INSN_COST);
16913   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
16914   ins_encode %{
16915     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
16916             as_FloatRegister($src1$$reg),
16917             as_FloatRegister($src2$$reg));
16918   %}
16919   ins_pipe(vdop_fp128);
16920 %}
16921 
16922 instruct vsub2D(vecX dst, vecX src1, vecX src2)
16923 %{
16924   predicate(n->as_Vector()->length() == 2);
16925   match(Set dst (SubVD src1 src2));
16926   ins_cost(INSN_COST);
16927   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
16928   ins_encode %{
16929     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
16930             as_FloatRegister($src1$$reg),
16931             as_FloatRegister($src2$$reg));
16932   %}
16933   ins_pipe(vdop_fp128);
16934 %}
16935 
16936 // --------------------------------- MUL --------------------------------------
16937 
16938 instruct vmul4S(vecD dst, vecD src1, vecD src2)
16939 %{
16940   predicate(n->as_Vector()->length() == 2 ||
16941             n->as_Vector()->length() == 4);
16942   match(Set dst (MulVS src1 src2));
16943   ins_cost(INSN_COST);
16944   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
16945   ins_encode %{
16946     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
16947             as_FloatRegister($src1$$reg),
16948             as_FloatRegister($src2$$reg));
16949   %}
16950   ins_pipe(vmul64);
16951 %}
16952 
16953 instruct vmul8S(vecX dst, vecX src1, vecX src2)
16954 %{
16955   predicate(n->as_Vector()->length() == 8);
16956   match(Set dst (MulVS src1 src2));
16957   ins_cost(INSN_COST);
16958   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
16959   ins_encode %{
16960     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
16961             as_FloatRegister($src1$$reg),
16962             as_FloatRegister($src2$$reg));
16963   %}
16964   ins_pipe(vmul128);
16965 %}
16966 
16967 instruct vmul2I(vecD dst, vecD src1, vecD src2)
16968 %{
16969   predicate(n->as_Vector()->length() == 2);
16970   match(Set dst (MulVI src1 src2));
16971   ins_cost(INSN_COST);
16972   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
16973   ins_encode %{
16974     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
16975             as_FloatRegister($src1$$reg),
16976             as_FloatRegister($src2$$reg));
16977   %}
16978   ins_pipe(vmul64);
16979 %}
16980 
16981 instruct vmul4I(vecX dst, vecX src1, vecX src2)
16982 %{
16983   predicate(n->as_Vector()->length() == 4);
16984   match(Set dst (MulVI src1 src2));
16985   ins_cost(INSN_COST);
16986   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
16987   ins_encode %{
16988     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
16989             as_FloatRegister($src1$$reg),
16990             as_FloatRegister($src2$$reg));
16991   %}
16992   ins_pipe(vmul128);
16993 %}
16994 
16995 instruct vmul2F(vecD dst, vecD src1, vecD src2)
16996 %{
16997   predicate(n->as_Vector()->length() == 2);
16998   match(Set dst (MulVF src1 src2));
16999   ins_cost(INSN_COST);
17000   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
17001   ins_encode %{
17002     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
17003             as_FloatRegister($src1$$reg),
17004             as_FloatRegister($src2$$reg));
17005   %}
17006   ins_pipe(vmuldiv_fp64);
17007 %}
17008 
17009 instruct vmul4F(vecX dst, vecX src1, vecX src2)
17010 %{
17011   predicate(n->as_Vector()->length() == 4);
17012   match(Set dst (MulVF src1 src2));
17013   ins_cost(INSN_COST);
17014   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
17015   ins_encode %{
17016     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
17017             as_FloatRegister($src1$$reg),
17018             as_FloatRegister($src2$$reg));
17019   %}
17020   ins_pipe(vmuldiv_fp128);
17021 %}
17022 
17023 instruct vmul2D(vecX dst, vecX src1, vecX src2)
17024 %{
17025   predicate(n->as_Vector()->length() == 2);
17026   match(Set dst (MulVD src1 src2));
17027   ins_cost(INSN_COST);
17028   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
17029   ins_encode %{
17030     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
17031             as_FloatRegister($src1$$reg),
17032             as_FloatRegister($src2$$reg));
17033   %}
17034   ins_pipe(vmuldiv_fp128);
17035 %}
17036 
17037 // --------------------------------- MLA --------------------------------------
17038 
17039 instruct vmla4S(vecD dst, vecD src1, vecD src2)
17040 %{
17041   predicate(n->as_Vector()->length() == 2 ||
17042             n->as_Vector()->length() == 4);
17043   match(Set dst (AddVS dst (MulVS src1 src2)));
17044   ins_cost(INSN_COST);
17045   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
17046   ins_encode %{
17047     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
17048             as_FloatRegister($src1$$reg),
17049             as_FloatRegister($src2$$reg));
17050   %}
17051   ins_pipe(vmla64);
17052 %}
17053 
17054 instruct vmla8S(vecX dst, vecX src1, vecX src2)
17055 %{
17056   predicate(n->as_Vector()->length() == 8);
17057   match(Set dst (AddVS dst (MulVS src1 src2)));
17058   ins_cost(INSN_COST);
17059   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
17060   ins_encode %{
17061     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
17062             as_FloatRegister($src1$$reg),
17063             as_FloatRegister($src2$$reg));
17064   %}
17065   ins_pipe(vmla128);
17066 %}
17067 
17068 instruct vmla2I(vecD dst, vecD src1, vecD src2)
17069 %{
17070   predicate(n->as_Vector()->length() == 2);
17071   match(Set dst (AddVI dst (MulVI src1 src2)));
17072   ins_cost(INSN_COST);
17073   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
17074   ins_encode %{
17075     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
17076             as_FloatRegister($src1$$reg),
17077             as_FloatRegister($src2$$reg));
17078   %}
17079   ins_pipe(vmla64);
17080 %}
17081 
17082 instruct vmla4I(vecX dst, vecX src1, vecX src2)
17083 %{
17084   predicate(n->as_Vector()->length() == 4);
17085   match(Set dst (AddVI dst (MulVI src1 src2)));
17086   ins_cost(INSN_COST);
17087   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
17088   ins_encode %{
17089     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
17090             as_FloatRegister($src1$$reg),
17091             as_FloatRegister($src2$$reg));
17092   %}
17093   ins_pipe(vmla128);
17094 %}
17095 
17096 // dst + src1 * src2
17097 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
17098   predicate(UseFMA && n->as_Vector()->length() == 2);
17099   match(Set dst (FmaVF  dst (Binary src1 src2)));
17100   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
17101   ins_cost(INSN_COST);
17102   ins_encode %{
17103     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
17104             as_FloatRegister($src1$$reg),
17105             as_FloatRegister($src2$$reg));
17106   %}
17107   ins_pipe(vmuldiv_fp64);
17108 %}
17109 
17110 // dst + src1 * src2
17111 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
17112   predicate(UseFMA && n->as_Vector()->length() == 4);
17113   match(Set dst (FmaVF  dst (Binary src1 src2)));
17114   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
17115   ins_cost(INSN_COST);
17116   ins_encode %{
17117     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
17118             as_FloatRegister($src1$$reg),
17119             as_FloatRegister($src2$$reg));
17120   %}
17121   ins_pipe(vmuldiv_fp128);
17122 %}
17123 
17124 // dst + src1 * src2
17125 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
17126   predicate(UseFMA && n->as_Vector()->length() == 2);
17127   match(Set dst (FmaVD  dst (Binary src1 src2)));
17128   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
17129   ins_cost(INSN_COST);
17130   ins_encode %{
17131     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
17132             as_FloatRegister($src1$$reg),
17133             as_FloatRegister($src2$$reg));
17134   %}
17135   ins_pipe(vmuldiv_fp128);
17136 %}
17137 
17138 // --------------------------------- MLS --------------------------------------
17139 
17140 instruct vmls4S(vecD dst, vecD src1, vecD src2)
17141 %{
17142   predicate(n->as_Vector()->length() == 2 ||
17143             n->as_Vector()->length() == 4);
17144   match(Set dst (SubVS dst (MulVS src1 src2)));
17145   ins_cost(INSN_COST);
17146   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
17147   ins_encode %{
17148     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
17149             as_FloatRegister($src1$$reg),
17150             as_FloatRegister($src2$$reg));
17151   %}
17152   ins_pipe(vmla64);
17153 %}
17154 
17155 instruct vmls8S(vecX dst, vecX src1, vecX src2)
17156 %{
17157   predicate(n->as_Vector()->length() == 8);
17158   match(Set dst (SubVS dst (MulVS src1 src2)));
17159   ins_cost(INSN_COST);
17160   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
17161   ins_encode %{
17162     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
17163             as_FloatRegister($src1$$reg),
17164             as_FloatRegister($src2$$reg));
17165   %}
17166   ins_pipe(vmla128);
17167 %}
17168 
17169 instruct vmls2I(vecD dst, vecD src1, vecD src2)
17170 %{
17171   predicate(n->as_Vector()->length() == 2);
17172   match(Set dst (SubVI dst (MulVI src1 src2)));
17173   ins_cost(INSN_COST);
17174   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
17175   ins_encode %{
17176     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
17177             as_FloatRegister($src1$$reg),
17178             as_FloatRegister($src2$$reg));
17179   %}
17180   ins_pipe(vmla64);
17181 %}
17182 
17183 instruct vmls4I(vecX dst, vecX src1, vecX src2)
17184 %{
17185   predicate(n->as_Vector()->length() == 4);
17186   match(Set dst (SubVI dst (MulVI src1 src2)));
17187   ins_cost(INSN_COST);
17188   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
17189   ins_encode %{
17190     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
17191             as_FloatRegister($src1$$reg),
17192             as_FloatRegister($src2$$reg));
17193   %}
17194   ins_pipe(vmla128);
17195 %}
17196 
17197 // dst - src1 * src2
17198 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
17199   predicate(UseFMA && n->as_Vector()->length() == 2);
17200   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17201   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17202   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
17203   ins_cost(INSN_COST);
17204   ins_encode %{
17205     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
17206             as_FloatRegister($src1$$reg),
17207             as_FloatRegister($src2$$reg));
17208   %}
17209   ins_pipe(vmuldiv_fp64);
17210 %}
17211 
17212 // dst - src1 * src2
17213 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
17214   predicate(UseFMA && n->as_Vector()->length() == 4);
17215   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17216   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17217   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
17218   ins_cost(INSN_COST);
17219   ins_encode %{
17220     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
17221             as_FloatRegister($src1$$reg),
17222             as_FloatRegister($src2$$reg));
17223   %}
17224   ins_pipe(vmuldiv_fp128);
17225 %}
17226 
17227 // dst - src1 * src2
17228 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
17229   predicate(UseFMA && n->as_Vector()->length() == 2);
17230   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
17231   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
17232   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
17233   ins_cost(INSN_COST);
17234   ins_encode %{
17235     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
17236             as_FloatRegister($src1$$reg),
17237             as_FloatRegister($src2$$reg));
17238   %}
17239   ins_pipe(vmuldiv_fp128);
17240 %}
17241 
17242 // --------------------------------- DIV --------------------------------------
17243 
17244 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
17245 %{
17246   predicate(n->as_Vector()->length() == 2);
17247   match(Set dst (DivVF src1 src2));
17248   ins_cost(INSN_COST);
17249   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
17250   ins_encode %{
17251     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
17252             as_FloatRegister($src1$$reg),
17253             as_FloatRegister($src2$$reg));
17254   %}
17255   ins_pipe(vmuldiv_fp64);
17256 %}
17257 
17258 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
17259 %{
17260   predicate(n->as_Vector()->length() == 4);
17261   match(Set dst (DivVF src1 src2));
17262   ins_cost(INSN_COST);
17263   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
17264   ins_encode %{
17265     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
17266             as_FloatRegister($src1$$reg),
17267             as_FloatRegister($src2$$reg));
17268   %}
17269   ins_pipe(vmuldiv_fp128);
17270 %}
17271 
17272 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
17273 %{
17274   predicate(n->as_Vector()->length() == 2);
17275   match(Set dst (DivVD src1 src2));
17276   ins_cost(INSN_COST);
17277   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
17278   ins_encode %{
17279     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
17280             as_FloatRegister($src1$$reg),
17281             as_FloatRegister($src2$$reg));
17282   %}
17283   ins_pipe(vmuldiv_fp128);
17284 %}
17285 
17286 // --------------------------------- SQRT -------------------------------------
17287 
17288 instruct vsqrt2D(vecX dst, vecX src)
17289 %{
17290   predicate(n->as_Vector()->length() == 2);
17291   match(Set dst (SqrtVD src));
17292   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
17293   ins_encode %{
17294     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
17295              as_FloatRegister($src$$reg));
17296   %}
17297   ins_pipe(vsqrt_fp128);
17298 %}
17299 
17300 // --------------------------------- ABS --------------------------------------
17301 
17302 instruct vabs2F(vecD dst, vecD src)
17303 %{
17304   predicate(n->as_Vector()->length() == 2);
17305   match(Set dst (AbsVF src));
17306   ins_cost(INSN_COST * 3);
17307   format %{ "fabs  $dst,$src\t# vector (2S)" %}
17308   ins_encode %{
17309     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
17310             as_FloatRegister($src$$reg));
17311   %}
17312   ins_pipe(vunop_fp64);
17313 %}
17314 
17315 instruct vabs4F(vecX dst, vecX src)
17316 %{
17317   predicate(n->as_Vector()->length() == 4);
17318   match(Set dst (AbsVF src));
17319   ins_cost(INSN_COST * 3);
17320   format %{ "fabs  $dst,$src\t# vector (4S)" %}
17321   ins_encode %{
17322     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
17323             as_FloatRegister($src$$reg));
17324   %}
17325   ins_pipe(vunop_fp128);
17326 %}
17327 
17328 instruct vabs2D(vecX dst, vecX src)
17329 %{
17330   predicate(n->as_Vector()->length() == 2);
17331   match(Set dst (AbsVD src));
17332   ins_cost(INSN_COST * 3);
17333   format %{ "fabs  $dst,$src\t# vector (2D)" %}
17334   ins_encode %{
17335     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
17336             as_FloatRegister($src$$reg));
17337   %}
17338   ins_pipe(vunop_fp128);
17339 %}
17340 
17341 // --------------------------------- NEG --------------------------------------
17342 
17343 instruct vneg2F(vecD dst, vecD src)
17344 %{
17345   predicate(n->as_Vector()->length() == 2);
17346   match(Set dst (NegVF src));
17347   ins_cost(INSN_COST * 3);
17348   format %{ "fneg  $dst,$src\t# vector (2S)" %}
17349   ins_encode %{
17350     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
17351             as_FloatRegister($src$$reg));
17352   %}
17353   ins_pipe(vunop_fp64);
17354 %}
17355 
17356 instruct vneg4F(vecX dst, vecX src)
17357 %{
17358   predicate(n->as_Vector()->length() == 4);
17359   match(Set dst (NegVF src));
17360   ins_cost(INSN_COST * 3);
17361   format %{ "fneg  $dst,$src\t# vector (4S)" %}
17362   ins_encode %{
17363     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
17364             as_FloatRegister($src$$reg));
17365   %}
17366   ins_pipe(vunop_fp128);
17367 %}
17368 
17369 instruct vneg2D(vecX dst, vecX src)
17370 %{
17371   predicate(n->as_Vector()->length() == 2);
17372   match(Set dst (NegVD src));
17373   ins_cost(INSN_COST * 3);
17374   format %{ "fneg  $dst,$src\t# vector (2D)" %}
17375   ins_encode %{
17376     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
17377             as_FloatRegister($src$$reg));
17378   %}
17379   ins_pipe(vunop_fp128);
17380 %}
17381 
17382 // --------------------------------- AND --------------------------------------
17383 
17384 instruct vand8B(vecD dst, vecD src1, vecD src2)
17385 %{
17386   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17387             n->as_Vector()->length_in_bytes() == 8);
17388   match(Set dst (AndV src1 src2));
17389   ins_cost(INSN_COST);
17390   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17391   ins_encode %{
17392     __ andr(as_FloatRegister($dst$$reg), __ T8B,
17393             as_FloatRegister($src1$$reg),
17394             as_FloatRegister($src2$$reg));
17395   %}
17396   ins_pipe(vlogical64);
17397 %}
17398 
17399 instruct vand16B(vecX dst, vecX src1, vecX src2)
17400 %{
17401   predicate(n->as_Vector()->length_in_bytes() == 16);
17402   match(Set dst (AndV src1 src2));
17403   ins_cost(INSN_COST);
17404   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
17405   ins_encode %{
17406     __ andr(as_FloatRegister($dst$$reg), __ T16B,
17407             as_FloatRegister($src1$$reg),
17408             as_FloatRegister($src2$$reg));
17409   %}
17410   ins_pipe(vlogical128);
17411 %}
17412 
17413 // --------------------------------- OR ---------------------------------------
17414 
17415 instruct vor8B(vecD dst, vecD src1, vecD src2)
17416 %{
17417   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17418             n->as_Vector()->length_in_bytes() == 8);
17419   match(Set dst (OrV src1 src2));
17420   ins_cost(INSN_COST);
17421   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17422   ins_encode %{
17423     __ orr(as_FloatRegister($dst$$reg), __ T8B,
17424             as_FloatRegister($src1$$reg),
17425             as_FloatRegister($src2$$reg));
17426   %}
17427   ins_pipe(vlogical64);
17428 %}
17429 
17430 instruct vor16B(vecX dst, vecX src1, vecX src2)
17431 %{
17432   predicate(n->as_Vector()->length_in_bytes() == 16);
17433   match(Set dst (OrV src1 src2));
17434   ins_cost(INSN_COST);
17435   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
17436   ins_encode %{
17437     __ orr(as_FloatRegister($dst$$reg), __ T16B,
17438             as_FloatRegister($src1$$reg),
17439             as_FloatRegister($src2$$reg));
17440   %}
17441   ins_pipe(vlogical128);
17442 %}
17443 
17444 // --------------------------------- XOR --------------------------------------
17445 
17446 instruct vxor8B(vecD dst, vecD src1, vecD src2)
17447 %{
17448   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17449             n->as_Vector()->length_in_bytes() == 8);
17450   match(Set dst (XorV src1 src2));
17451   ins_cost(INSN_COST);
17452   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
17453   ins_encode %{
17454     __ eor(as_FloatRegister($dst$$reg), __ T8B,
17455             as_FloatRegister($src1$$reg),
17456             as_FloatRegister($src2$$reg));
17457   %}
17458   ins_pipe(vlogical64);
17459 %}
17460 
17461 instruct vxor16B(vecX dst, vecX src1, vecX src2)
17462 %{
17463   predicate(n->as_Vector()->length_in_bytes() == 16);
17464   match(Set dst (XorV src1 src2));
17465   ins_cost(INSN_COST);
17466   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
17467   ins_encode %{
17468     __ eor(as_FloatRegister($dst$$reg), __ T16B,
17469             as_FloatRegister($src1$$reg),
17470             as_FloatRegister($src2$$reg));
17471   %}
17472   ins_pipe(vlogical128);
17473 %}
17474 
17475 // ------------------------------ Shift ---------------------------------------
17476 
17477 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
17478   match(Set dst (LShiftCntV cnt));
17479   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
17480   ins_encode %{
17481     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17482   %}
17483   ins_pipe(vdup_reg_reg128);
17484 %}
17485 
17486 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
17487 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
17488   match(Set dst (RShiftCntV cnt));
17489   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
17490   ins_encode %{
17491     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17492     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
17493   %}
17494   ins_pipe(vdup_reg_reg128);
17495 %}
17496 
17497 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
17498   predicate(n->as_Vector()->length() == 4 ||
17499             n->as_Vector()->length() == 8);
17500   match(Set dst (LShiftVB src shift));
17501   match(Set dst (RShiftVB src shift));
17502   ins_cost(INSN_COST);
17503   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
17504   ins_encode %{
17505     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
17506             as_FloatRegister($src$$reg),
17507             as_FloatRegister($shift$$reg));
17508   %}
17509   ins_pipe(vshift64);
17510 %}
17511 
17512 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
17513   predicate(n->as_Vector()->length() == 16);
17514   match(Set dst (LShiftVB src shift));
17515   match(Set dst (RShiftVB src shift));
17516   ins_cost(INSN_COST);
17517   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
17518   ins_encode %{
17519     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
17520             as_FloatRegister($src$$reg),
17521             as_FloatRegister($shift$$reg));
17522   %}
17523   ins_pipe(vshift128);
17524 %}
17525 
17526 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
17527   predicate(n->as_Vector()->length() == 4 ||
17528             n->as_Vector()->length() == 8);
17529   match(Set dst (URShiftVB src shift));
17530   ins_cost(INSN_COST);
17531   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
17532   ins_encode %{
17533     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
17534             as_FloatRegister($src$$reg),
17535             as_FloatRegister($shift$$reg));
17536   %}
17537   ins_pipe(vshift64);
17538 %}
17539 
17540 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
17541   predicate(n->as_Vector()->length() == 16);
17542   match(Set dst (URShiftVB src shift));
17543   ins_cost(INSN_COST);
17544   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
17545   ins_encode %{
17546     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
17547             as_FloatRegister($src$$reg),
17548             as_FloatRegister($shift$$reg));
17549   %}
17550   ins_pipe(vshift128);
17551 %}
17552 
17553 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
17554   predicate(n->as_Vector()->length() == 4 ||
17555             n->as_Vector()->length() == 8);
17556   match(Set dst (LShiftVB src shift));
17557   ins_cost(INSN_COST);
17558   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
17559   ins_encode %{
17560     int sh = (int)$shift$$constant;
17561     if (sh >= 8) {
17562       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17563              as_FloatRegister($src$$reg),
17564              as_FloatRegister($src$$reg));
17565     } else {
17566       __ shl(as_FloatRegister($dst$$reg), __ T8B,
17567              as_FloatRegister($src$$reg), sh);
17568     }
17569   %}
17570   ins_pipe(vshift64_imm);
17571 %}
17572 
17573 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
17574   predicate(n->as_Vector()->length() == 16);
17575   match(Set dst (LShiftVB src shift));
17576   ins_cost(INSN_COST);
17577   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
17578   ins_encode %{
17579     int sh = (int)$shift$$constant;
17580     if (sh >= 8) {
17581       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17582              as_FloatRegister($src$$reg),
17583              as_FloatRegister($src$$reg));
17584     } else {
17585       __ shl(as_FloatRegister($dst$$reg), __ T16B,
17586              as_FloatRegister($src$$reg), sh);
17587     }
17588   %}
17589   ins_pipe(vshift128_imm);
17590 %}
17591 
17592 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
17593   predicate(n->as_Vector()->length() == 4 ||
17594             n->as_Vector()->length() == 8);
17595   match(Set dst (RShiftVB src shift));
17596   ins_cost(INSN_COST);
17597   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
17598   ins_encode %{
17599     int sh = (int)$shift$$constant;
17600     if (sh >= 8) sh = 7;
17601     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
17602            as_FloatRegister($src$$reg), sh);
17603   %}
17604   ins_pipe(vshift64_imm);
17605 %}
17606 
17607 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
17608   predicate(n->as_Vector()->length() == 16);
17609   match(Set dst (RShiftVB src shift));
17610   ins_cost(INSN_COST);
17611   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
17612   ins_encode %{
17613     int sh = (int)$shift$$constant;
17614     if (sh >= 8) sh = 7;
17615     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
17616            as_FloatRegister($src$$reg), sh);
17617   %}
17618   ins_pipe(vshift128_imm);
17619 %}
17620 
17621 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
17622   predicate(n->as_Vector()->length() == 4 ||
17623             n->as_Vector()->length() == 8);
17624   match(Set dst (URShiftVB src shift));
17625   ins_cost(INSN_COST);
17626   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
17627   ins_encode %{
17628     int sh = (int)$shift$$constant;
17629     if (sh >= 8) {
17630       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17631              as_FloatRegister($src$$reg),
17632              as_FloatRegister($src$$reg));
17633     } else {
17634       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
17635              as_FloatRegister($src$$reg), sh);
17636     }
17637   %}
17638   ins_pipe(vshift64_imm);
17639 %}
17640 
17641 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
17642   predicate(n->as_Vector()->length() == 16);
17643   match(Set dst (URShiftVB src shift));
17644   ins_cost(INSN_COST);
17645   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
17646   ins_encode %{
17647     int sh = (int)$shift$$constant;
17648     if (sh >= 8) {
17649       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17650              as_FloatRegister($src$$reg),
17651              as_FloatRegister($src$$reg));
17652     } else {
17653       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
17654              as_FloatRegister($src$$reg), sh);
17655     }
17656   %}
17657   ins_pipe(vshift128_imm);
17658 %}
17659 
17660 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
17661   predicate(n->as_Vector()->length() == 2 ||
17662             n->as_Vector()->length() == 4);
17663   match(Set dst (LShiftVS src shift));
17664   match(Set dst (RShiftVS src shift));
17665   ins_cost(INSN_COST);
17666   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
17667   ins_encode %{
17668     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
17669             as_FloatRegister($src$$reg),
17670             as_FloatRegister($shift$$reg));
17671   %}
17672   ins_pipe(vshift64);
17673 %}
17674 
17675 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
17676   predicate(n->as_Vector()->length() == 8);
17677   match(Set dst (LShiftVS src shift));
17678   match(Set dst (RShiftVS src shift));
17679   ins_cost(INSN_COST);
17680   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
17681   ins_encode %{
17682     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
17683             as_FloatRegister($src$$reg),
17684             as_FloatRegister($shift$$reg));
17685   %}
17686   ins_pipe(vshift128);
17687 %}
17688 
17689 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
17690   predicate(n->as_Vector()->length() == 2 ||
17691             n->as_Vector()->length() == 4);
17692   match(Set dst (URShiftVS src shift));
17693   ins_cost(INSN_COST);
17694   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
17695   ins_encode %{
17696     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
17697             as_FloatRegister($src$$reg),
17698             as_FloatRegister($shift$$reg));
17699   %}
17700   ins_pipe(vshift64);
17701 %}
17702 
17703 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
17704   predicate(n->as_Vector()->length() == 8);
17705   match(Set dst (URShiftVS src shift));
17706   ins_cost(INSN_COST);
17707   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
17708   ins_encode %{
17709     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
17710             as_FloatRegister($src$$reg),
17711             as_FloatRegister($shift$$reg));
17712   %}
17713   ins_pipe(vshift128);
17714 %}
17715 
17716 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
17717   predicate(n->as_Vector()->length() == 2 ||
17718             n->as_Vector()->length() == 4);
17719   match(Set dst (LShiftVS src shift));
17720   ins_cost(INSN_COST);
17721   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
17722   ins_encode %{
17723     int sh = (int)$shift$$constant;
17724     if (sh >= 16) {
17725       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17726              as_FloatRegister($src$$reg),
17727              as_FloatRegister($src$$reg));
17728     } else {
17729       __ shl(as_FloatRegister($dst$$reg), __ T4H,
17730              as_FloatRegister($src$$reg), sh);
17731     }
17732   %}
17733   ins_pipe(vshift64_imm);
17734 %}
17735 
17736 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
17737   predicate(n->as_Vector()->length() == 8);
17738   match(Set dst (LShiftVS src shift));
17739   ins_cost(INSN_COST);
17740   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
17741   ins_encode %{
17742     int sh = (int)$shift$$constant;
17743     if (sh >= 16) {
17744       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17745              as_FloatRegister($src$$reg),
17746              as_FloatRegister($src$$reg));
17747     } else {
17748       __ shl(as_FloatRegister($dst$$reg), __ T8H,
17749              as_FloatRegister($src$$reg), sh);
17750     }
17751   %}
17752   ins_pipe(vshift128_imm);
17753 %}
17754 
17755 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
17756   predicate(n->as_Vector()->length() == 2 ||
17757             n->as_Vector()->length() == 4);
17758   match(Set dst (RShiftVS src shift));
17759   ins_cost(INSN_COST);
17760   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
17761   ins_encode %{
17762     int sh = (int)$shift$$constant;
17763     if (sh >= 16) sh = 15;
17764     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
17765            as_FloatRegister($src$$reg), sh);
17766   %}
17767   ins_pipe(vshift64_imm);
17768 %}
17769 
17770 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
17771   predicate(n->as_Vector()->length() == 8);
17772   match(Set dst (RShiftVS src shift));
17773   ins_cost(INSN_COST);
17774   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
17775   ins_encode %{
17776     int sh = (int)$shift$$constant;
17777     if (sh >= 16) sh = 15;
17778     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
17779            as_FloatRegister($src$$reg), sh);
17780   %}
17781   ins_pipe(vshift128_imm);
17782 %}
17783 
17784 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
17785   predicate(n->as_Vector()->length() == 2 ||
17786             n->as_Vector()->length() == 4);
17787   match(Set dst (URShiftVS src shift));
17788   ins_cost(INSN_COST);
17789   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
17790   ins_encode %{
17791     int sh = (int)$shift$$constant;
17792     if (sh >= 16) {
17793       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17794              as_FloatRegister($src$$reg),
17795              as_FloatRegister($src$$reg));
17796     } else {
17797       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
17798              as_FloatRegister($src$$reg), sh);
17799     }
17800   %}
17801   ins_pipe(vshift64_imm);
17802 %}
17803 
17804 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
17805   predicate(n->as_Vector()->length() == 8);
17806   match(Set dst (URShiftVS src shift));
17807   ins_cost(INSN_COST);
17808   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
17809   ins_encode %{
17810     int sh = (int)$shift$$constant;
17811     if (sh >= 16) {
17812       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17813              as_FloatRegister($src$$reg),
17814              as_FloatRegister($src$$reg));
17815     } else {
17816       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
17817              as_FloatRegister($src$$reg), sh);
17818     }
17819   %}
17820   ins_pipe(vshift128_imm);
17821 %}
17822 
17823 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
17824   predicate(n->as_Vector()->length() == 2);
17825   match(Set dst (LShiftVI src shift));
17826   match(Set dst (RShiftVI src shift));
17827   ins_cost(INSN_COST);
17828   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
17829   ins_encode %{
17830     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
17831             as_FloatRegister($src$$reg),
17832             as_FloatRegister($shift$$reg));
17833   %}
17834   ins_pipe(vshift64);
17835 %}
17836 
17837 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
17838   predicate(n->as_Vector()->length() == 4);
17839   match(Set dst (LShiftVI src shift));
17840   match(Set dst (RShiftVI src shift));
17841   ins_cost(INSN_COST);
17842   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
17843   ins_encode %{
17844     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
17845             as_FloatRegister($src$$reg),
17846             as_FloatRegister($shift$$reg));
17847   %}
17848   ins_pipe(vshift128);
17849 %}
17850 
17851 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
17852   predicate(n->as_Vector()->length() == 2);
17853   match(Set dst (URShiftVI src shift));
17854   ins_cost(INSN_COST);
17855   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
17856   ins_encode %{
17857     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
17858             as_FloatRegister($src$$reg),
17859             as_FloatRegister($shift$$reg));
17860   %}
17861   ins_pipe(vshift64);
17862 %}
17863 
17864 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
17865   predicate(n->as_Vector()->length() == 4);
17866   match(Set dst (URShiftVI src shift));
17867   ins_cost(INSN_COST);
17868   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
17869   ins_encode %{
17870     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
17871             as_FloatRegister($src$$reg),
17872             as_FloatRegister($shift$$reg));
17873   %}
17874   ins_pipe(vshift128);
17875 %}
17876 
17877 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
17878   predicate(n->as_Vector()->length() == 2);
17879   match(Set dst (LShiftVI src shift));
17880   ins_cost(INSN_COST);
17881   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
17882   ins_encode %{
17883     __ shl(as_FloatRegister($dst$$reg), __ T2S,
17884            as_FloatRegister($src$$reg),
17885            (int)$shift$$constant);
17886   %}
17887   ins_pipe(vshift64_imm);
17888 %}
17889 
17890 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
17891   predicate(n->as_Vector()->length() == 4);
17892   match(Set dst (LShiftVI src shift));
17893   ins_cost(INSN_COST);
17894   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
17895   ins_encode %{
17896     __ shl(as_FloatRegister($dst$$reg), __ T4S,
17897            as_FloatRegister($src$$reg),
17898            (int)$shift$$constant);
17899   %}
17900   ins_pipe(vshift128_imm);
17901 %}
17902 
17903 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
17904   predicate(n->as_Vector()->length() == 2);
17905   match(Set dst (RShiftVI src shift));
17906   ins_cost(INSN_COST);
17907   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
17908   ins_encode %{
17909     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
17910             as_FloatRegister($src$$reg),
17911             (int)$shift$$constant);
17912   %}
17913   ins_pipe(vshift64_imm);
17914 %}
17915 
17916 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
17917   predicate(n->as_Vector()->length() == 4);
17918   match(Set dst (RShiftVI src shift));
17919   ins_cost(INSN_COST);
17920   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
17921   ins_encode %{
17922     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
17923             as_FloatRegister($src$$reg),
17924             (int)$shift$$constant);
17925   %}
17926   ins_pipe(vshift128_imm);
17927 %}
17928 
17929 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
17930   predicate(n->as_Vector()->length() == 2);
17931   match(Set dst (URShiftVI src shift));
17932   ins_cost(INSN_COST);
17933   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
17934   ins_encode %{
17935     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
17936             as_FloatRegister($src$$reg),
17937             (int)$shift$$constant);
17938   %}
17939   ins_pipe(vshift64_imm);
17940 %}
17941 
17942 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
17943   predicate(n->as_Vector()->length() == 4);
17944   match(Set dst (URShiftVI src shift));
17945   ins_cost(INSN_COST);
17946   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
17947   ins_encode %{
17948     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
17949             as_FloatRegister($src$$reg),
17950             (int)$shift$$constant);
17951   %}
17952   ins_pipe(vshift128_imm);
17953 %}
17954 
17955 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
17956   predicate(n->as_Vector()->length() == 2);
17957   match(Set dst (LShiftVL src shift));
17958   match(Set dst (RShiftVL src shift));
17959   ins_cost(INSN_COST);
17960   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
17961   ins_encode %{
17962     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
17963             as_FloatRegister($src$$reg),
17964             as_FloatRegister($shift$$reg));
17965   %}
17966   ins_pipe(vshift128);
17967 %}
17968 
17969 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
17970   predicate(n->as_Vector()->length() == 2);
17971   match(Set dst (URShiftVL src shift));
17972   ins_cost(INSN_COST);
17973   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
17974   ins_encode %{
17975     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
17976             as_FloatRegister($src$$reg),
17977             as_FloatRegister($shift$$reg));
17978   %}
17979   ins_pipe(vshift128);
17980 %}
17981 
17982 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
17983   predicate(n->as_Vector()->length() == 2);
17984   match(Set dst (LShiftVL src shift));
17985   ins_cost(INSN_COST);
17986   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
17987   ins_encode %{
17988     __ shl(as_FloatRegister($dst$$reg), __ T2D,
17989            as_FloatRegister($src$$reg),
17990            (int)$shift$$constant);
17991   %}
17992   ins_pipe(vshift128_imm);
17993 %}
17994 
17995 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
17996   predicate(n->as_Vector()->length() == 2);
17997   match(Set dst (RShiftVL src shift));
17998   ins_cost(INSN_COST);
17999   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
18000   ins_encode %{
18001     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
18002             as_FloatRegister($src$$reg),
18003             (int)$shift$$constant);
18004   %}
18005   ins_pipe(vshift128_imm);
18006 %}
18007 
18008 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
18009   predicate(n->as_Vector()->length() == 2);
18010   match(Set dst (URShiftVL src shift));
18011   ins_cost(INSN_COST);
18012   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
18013   ins_encode %{
18014     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
18015             as_FloatRegister($src$$reg),
18016             (int)$shift$$constant);
18017   %}
18018   ins_pipe(vshift128_imm);
18019 %}
18020 
18021 //----------PEEPHOLE RULES-----------------------------------------------------
18022 // These must follow all instruction definitions as they use the names
18023 // defined in the instructions definitions.
18024 //
18025 // peepmatch ( root_instr_name [preceding_instruction]* );
18026 //
18027 // peepconstraint %{
18028 // (instruction_number.operand_name relational_op instruction_number.operand_name
18029 //  [, ...] );
18030 // // instruction numbers are zero-based using left to right order in peepmatch
18031 //
18032 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
18033 // // provide an instruction_number.operand_name for each operand that appears
18034 // // in the replacement instruction's match rule
18035 //
18036 // ---------VM FLAGS---------------------------------------------------------
18037 //
18038 // All peephole optimizations can be turned off using -XX:-OptoPeephole
18039 //
18040 // Each peephole rule is given an identifying number starting with zero and
18041 // increasing by one in the order seen by the parser.  An individual peephole
18042 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
18043 // on the command-line.
18044 //
18045 // ---------CURRENT LIMITATIONS----------------------------------------------
18046 //
18047 // Only match adjacent instructions in same basic block
18048 // Only equality constraints
18049 // Only constraints between operands, not (0.dest_reg == RAX_enc)
18050 // Only one replacement instruction
18051 //
18052 // ---------EXAMPLE----------------------------------------------------------
18053 //
18054 // // pertinent parts of existing instructions in architecture description
18055 // instruct movI(iRegINoSp dst, iRegI src)
18056 // %{
18057 //   match(Set dst (CopyI src));
18058 // %}
18059 //
18060 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
18061 // %{
18062 //   match(Set dst (AddI dst src));
18063 //   effect(KILL cr);
18064 // %}
18065 //
18066 // // Change (inc mov) to lea
18067 // peephole %{
18068 //   // increment preceeded by register-register move
18069 //   peepmatch ( incI_iReg movI );
18070 //   // require that the destination register of the increment
18071 //   // match the destination register of the move
18072 //   peepconstraint ( 0.dst == 1.dst );
18073 //   // construct a replacement instruction that sets
18074 //   // the destination to ( move's source register + one )
18075 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
18076 // %}
18077 //
18078 
18079 // Implementation no longer uses movX instructions since
18080 // machine-independent system no longer uses CopyX nodes.
18081 //
18082 // peephole
18083 // %{
18084 //   peepmatch (incI_iReg movI);
18085 //   peepconstraint (0.dst == 1.dst);
18086 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18087 // %}
18088 
18089 // peephole
18090 // %{
18091 //   peepmatch (decI_iReg movI);
18092 //   peepconstraint (0.dst == 1.dst);
18093 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18094 // %}
18095 
18096 // peephole
18097 // %{
18098 //   peepmatch (addI_iReg_imm movI);
18099 //   peepconstraint (0.dst == 1.dst);
18100 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18101 // %}
18102 
18103 // peephole
18104 // %{
18105 //   peepmatch (incL_iReg movL);
18106 //   peepconstraint (0.dst == 1.dst);
18107 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18108 // %}
18109 
18110 // peephole
18111 // %{
18112 //   peepmatch (decL_iReg movL);
18113 //   peepconstraint (0.dst == 1.dst);
18114 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18115 // %}
18116 
18117 // peephole
18118 // %{
18119 //   peepmatch (addL_iReg_imm movL);
18120 //   peepconstraint (0.dst == 1.dst);
18121 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18122 // %}
18123 
18124 // peephole
18125 // %{
18126 //   peepmatch (addP_iReg_imm movP);
18127 //   peepconstraint (0.dst == 1.dst);
18128 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
18129 // %}
18130 
18131 // // Change load of spilled value to only a spill
18132 // instruct storeI(memory mem, iRegI src)
18133 // %{
18134 //   match(Set mem (StoreI mem src));
18135 // %}
18136 //
18137 // instruct loadI(iRegINoSp dst, memory mem)
18138 // %{
18139 //   match(Set dst (LoadI mem));
18140 // %}
18141 //
18142 
18143 //----------SMARTSPILL RULES---------------------------------------------------
18144 // These must follow all instruction definitions as they use the names
18145 // defined in the instructions definitions.
18146 
18147 // Local Variables:
18148 // mode: c++
18149 // End: