1 //
   2 // Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, 2019, Red Hat Inc.
   4 // All rights reserved.
   5 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6 //
   7 // This code is free software; you can redistribute it and/or modify it
   8 // under the terms of the GNU General Public License version 2 only, as
   9 // published by the Free Software Foundation.
  10 //
  11 // This code is distributed in the hope that it will be useful, but WITHOUT
  12 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 // version 2 for more details (a copy is included in the LICENSE file that
  15 // accompanied this code).
  16 //
  17 // You should have received a copy of the GNU General Public License version
  18 // 2 along with this work; if not, write to the Free Software Foundation,
  19 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20 //
  21 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22 // or visit www.oracle.com if you need additional information or have any
  23 // questions.
  24 //
  25 //
  26 
  27 // AArch64 Architecture Description File
  28 
  29 //----------REGISTER DEFINITION BLOCK------------------------------------------
  30 // This information is used by the matcher and the register allocator to
  31 // describe individual registers and classes of registers within the target
  32 // archtecture.
  33 
  34 register %{
  35 //----------Architecture Description Register Definitions----------------------
  36 // General Registers
  37 // "reg_def"  name ( register save type, C convention save type,
  38 //                   ideal register type, encoding );
  39 // Register Save Types:
  40 //
  41 // NS  = No-Save:       The register allocator assumes that these registers
  42 //                      can be used without saving upon entry to the method, &
  43 //                      that they do not need to be saved at call sites.
  44 //
  45 // SOC = Save-On-Call:  The register allocator assumes that these registers
  46 //                      can be used without saving upon entry to the method,
  47 //                      but that they must be saved at call sites.
  48 //
  49 // SOE = Save-On-Entry: The register allocator assumes that these registers
  50 //                      must be saved before using them upon entry to the
  51 //                      method, but they do not need to be saved at call
  52 //                      sites.
  53 //
  54 // AS  = Always-Save:   The register allocator assumes that these registers
  55 //                      must be saved before using them upon entry to the
  56 //                      method, & that they must be saved at call sites.
  57 //
  58 // Ideal Register Type is used to determine how to save & restore a
  59 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  60 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  61 //
  62 // The encoding number is the actual bit-pattern placed into the opcodes.
  63 
  64 // We must define the 64 bit int registers in two 32 bit halves, the
  65 // real lower register and a virtual upper half register. upper halves
  66 // are used by the register allocator but are not actually supplied as
  67 // operands to memory ops.
  68 //
  69 // follow the C1 compiler in making registers
  70 //
  71 //   r0-r7,r10-r26 volatile (caller save)
  72 //   r27-r32 system (no save, no allocate)
  73 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  74 //
  75 // as regards Java usage. we don't use any callee save registers
  76 // because this makes it difficult to de-optimise a frame (see comment
  77 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  78 //
  79 
  80 // General Registers
  81 
  82 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  83 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  84 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  85 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  86 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  87 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  88 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  89 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  90 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  91 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  92 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  93 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  94 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  95 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  96 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  97 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  98 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  99 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 100 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 101 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 102 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 103 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 104 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 105 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 106 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 107 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 108 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 109 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 110 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 111 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 112 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 113 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 114 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 115 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 116 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 117 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 118 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 119 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 120 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 121 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 122 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 123 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 124 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 125 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 126 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 127 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 128 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 129 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 130 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 131 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 132 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 133 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());  
 134 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 135 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 136 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 137 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 138 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 139 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 140 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 141 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 142 
 143 // ----------------------------
 144 // Float/Double Registers
 145 // ----------------------------
 146 
 147 // Double Registers
 148 
 149 // The rules of ADL require that double registers be defined in pairs.
 150 // Each pair must be two 32-bit values, but not necessarily a pair of
 151 // single float registers. In each pair, ADLC-assigned register numbers
 152 // must be adjacent, with the lower number even. Finally, when the
 153 // CPU stores such a register pair to memory, the word associated with
 154 // the lower ADLC-assigned number must be stored to the lower address.
 155 
 156 // AArch64 has 32 floating-point registers. Each can store a vector of
 157 // single or double precision floating-point values up to 8 * 32
 158 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 159 // use the first float or double element of the vector.
 160 
 161 // for Java use float registers v0-v15 are always save on call whereas
 162 // the platform ABI treats v8-v15 as callee save). float registers
 163 // v16-v31 are SOC as per the platform spec
 164 
 165   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 166   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 167   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 168   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 169 
 170   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 171   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 172   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 173   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 174 
 175   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 176   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 177   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 178   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 179 
 180   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 181   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 182   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 183   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 184 
 185   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 186   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 187   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 188   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 189 
 190   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 191   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 192   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 193   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 194 
 195   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 196   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 197   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 198   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 199 
 200   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 201   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 202   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 203   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 204 
 205   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 206   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 207   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 208   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 209 
 210   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 211   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 212   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 213   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 214 
 215   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 216   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 217   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 218   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 219 
 220   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 221   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 222   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 223   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 224 
 225   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 226   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 227   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 228   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 229 
 230   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 231   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 232   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 233   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 234 
 235   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 236   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 237   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 238   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 239 
 240   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 241   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 242   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 243   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 244 
 245   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 246   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 247   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 248   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 249 
 250   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 251   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 252   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 253   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 254 
 255   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 256   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 257   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 258   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 259 
 260   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 261   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 262   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 263   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 264 
 265   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 266   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 267   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 268   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 269 
 270   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 271   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 272   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 273   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 274 
 275   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 276   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 277   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 278   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 279 
 280   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 281   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 282   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 283   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 284 
 285   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 286   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 287   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 288   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 289 
 290   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 291   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 292   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 293   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 294 
 295   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 296   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 297   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 298   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 299 
 300   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 301   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 302   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 303   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 304 
 305   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 306   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 307   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 308   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 309 
 310   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 311   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 312   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 313   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 314 
 315   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 316   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 317   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 318   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 319 
 320   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 321   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 322   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 323   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 324 
 325 // ----------------------------
 326 // Special Registers
 327 // ----------------------------
 328 
 329 // the AArch64 CSPR status flag register is not directly acessible as
 330 // instruction operand. the FPSR status flag register is a system
 331 // register which can be written/read using MSR/MRS but again does not
 332 // appear as an operand (a code identifying the FSPR occurs as an
 333 // immediate value in the instruction).
 334 
 335 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 336 
 337 
 338 // Specify priority of register selection within phases of register
 339 // allocation.  Highest priority is first.  A useful heuristic is to
 340 // give registers a low priority when they are required by machine
 341 // instructions, like EAX and EDX on I486, and choose no-save registers
 342 // before save-on-call, & save-on-call before save-on-entry.  Registers
 343 // which participate in fixed calling sequences should come last.
 344 // Registers which are used as pairs must fall on an even boundary.
 345 
 346 alloc_class chunk0(
 347     // volatiles
 348     R10, R10_H,
 349     R11, R11_H,
 350     R12, R12_H,
 351     R13, R13_H,
 352     R14, R14_H,
 353     R15, R15_H,
 354     R16, R16_H,
 355     R17, R17_H,
 356     R18, R18_H,
 357 
 358     // arg registers
 359     R0, R0_H,
 360     R1, R1_H,
 361     R2, R2_H,
 362     R3, R3_H,
 363     R4, R4_H,
 364     R5, R5_H,
 365     R6, R6_H,
 366     R7, R7_H,
 367 
 368     // non-volatiles
 369     R19, R19_H,
 370     R20, R20_H,
 371     R21, R21_H,
 372     R22, R22_H,
 373     R23, R23_H,
 374     R24, R24_H,
 375     R25, R25_H,
 376     R26, R26_H,
 377     
 378     // non-allocatable registers
 379 
 380     R27, R27_H, // heapbase
 381     R28, R28_H, // thread
 382     R29, R29_H, // fp
 383     R30, R30_H, // lr
 384     R31, R31_H, // sp
 385 );
 386 
 387 alloc_class chunk1(
 388 
 389     // no save
 390     V16, V16_H, V16_J, V16_K,
 391     V17, V17_H, V17_J, V17_K,
 392     V18, V18_H, V18_J, V18_K,
 393     V19, V19_H, V19_J, V19_K,
 394     V20, V20_H, V20_J, V20_K,
 395     V21, V21_H, V21_J, V21_K,
 396     V22, V22_H, V22_J, V22_K,
 397     V23, V23_H, V23_J, V23_K,
 398     V24, V24_H, V24_J, V24_K,
 399     V25, V25_H, V25_J, V25_K,
 400     V26, V26_H, V26_J, V26_K,
 401     V27, V27_H, V27_J, V27_K,
 402     V28, V28_H, V28_J, V28_K,
 403     V29, V29_H, V29_J, V29_K,
 404     V30, V30_H, V30_J, V30_K,
 405     V31, V31_H, V31_J, V31_K,
 406 
 407     // arg registers
 408     V0, V0_H, V0_J, V0_K,
 409     V1, V1_H, V1_J, V1_K,
 410     V2, V2_H, V2_J, V2_K,
 411     V3, V3_H, V3_J, V3_K,
 412     V4, V4_H, V4_J, V4_K,
 413     V5, V5_H, V5_J, V5_K,
 414     V6, V6_H, V6_J, V6_K,
 415     V7, V7_H, V7_J, V7_K,
 416 
 417     // non-volatiles
 418     V8, V8_H, V8_J, V8_K,
 419     V9, V9_H, V9_J, V9_K,
 420     V10, V10_H, V10_J, V10_K,
 421     V11, V11_H, V11_J, V11_K,
 422     V12, V12_H, V12_J, V12_K,
 423     V13, V13_H, V13_J, V13_K,
 424     V14, V14_H, V14_J, V14_K,
 425     V15, V15_H, V15_J, V15_K,
 426 );
 427 
 428 alloc_class chunk2(RFLAGS);
 429 
 430 //----------Architecture Description Register Classes--------------------------
 431 // Several register classes are automatically defined based upon information in
 432 // this architecture description.
 433 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 434 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 435 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 436 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 437 //
 438 
 439 // Class for all 32 bit integer registers -- excludes SP which will
 440 // never be used as an integer register
 441 reg_class any_reg32(
 442     R0,
 443     R1,
 444     R2,
 445     R3,
 446     R4,
 447     R5,
 448     R6,
 449     R7,
 450     R10,
 451     R11,
 452     R12,
 453     R13,
 454     R14,
 455     R15,
 456     R16,
 457     R17,
 458     R18,
 459     R19,
 460     R20,
 461     R21,
 462     R22,
 463     R23,
 464     R24,
 465     R25,
 466     R26,
 467     R27,
 468     R28,
 469     R29,
 470     R30
 471 );
 472 
 473 // Singleton class for R0 int register
 474 reg_class int_r0_reg(R0);
 475 
 476 // Singleton class for R2 int register
 477 reg_class int_r2_reg(R2);
 478 
 479 // Singleton class for R3 int register
 480 reg_class int_r3_reg(R3);
 481 
 482 // Singleton class for R4 int register
 483 reg_class int_r4_reg(R4);
 484 
 485 // Class for all long integer registers (including RSP)
 486 reg_class any_reg(
 487     R0, R0_H,
 488     R1, R1_H,
 489     R2, R2_H,
 490     R3, R3_H,
 491     R4, R4_H,
 492     R5, R5_H,
 493     R6, R6_H,
 494     R7, R7_H,
 495     R10, R10_H,
 496     R11, R11_H,
 497     R12, R12_H,
 498     R13, R13_H,
 499     R14, R14_H,
 500     R15, R15_H,
 501     R16, R16_H,
 502     R17, R17_H,
 503     R18, R18_H,
 504     R19, R19_H,
 505     R20, R20_H,
 506     R21, R21_H,
 507     R22, R22_H,
 508     R23, R23_H,
 509     R24, R24_H,
 510     R25, R25_H,
 511     R26, R26_H,
 512     R27, R27_H,
 513     R28, R28_H,
 514     R29, R29_H,
 515     R30, R30_H,
 516     R31, R31_H
 517 );
 518 
 519 // Class for all non-special integer registers
 520 reg_class no_special_reg32(
 521     R0,
 522     R1,
 523     R2,
 524     R3,
 525     R4,
 526     R5,
 527     R6,
 528     R7,
 529     R10,
 530     R11,
 531     R12,                        // rmethod
 532     R13,
 533     R14,
 534     R15,
 535     R16,
 536     R17,
 537     R18,
 538     R19,
 539     R20,
 540     R21,
 541     R22,
 542     R23,
 543     R24,
 544     R25,
 545     R26
 546  /* R27, */                     // heapbase
 547  /* R28, */                     // thread
 548  /* R29, */                     // fp
 549  /* R30, */                     // lr
 550  /* R31 */                      // sp
 551 );
 552 
 553 // Class for all non-special long integer registers
 554 reg_class no_special_reg(
 555     R0, R0_H,
 556     R1, R1_H,
 557     R2, R2_H,
 558     R3, R3_H,
 559     R4, R4_H,
 560     R5, R5_H,
 561     R6, R6_H,
 562     R7, R7_H,
 563     R10, R10_H,
 564     R11, R11_H,
 565     R12, R12_H,                 // rmethod
 566     R13, R13_H,
 567     R14, R14_H,
 568     R15, R15_H,
 569     R16, R16_H,
 570     R17, R17_H,
 571     R18, R18_H,
 572     R19, R19_H,
 573     R20, R20_H,
 574     R21, R21_H,
 575     R22, R22_H,
 576     R23, R23_H,
 577     R24, R24_H,
 578     R25, R25_H,
 579     R26, R26_H,
 580  /* R27, R27_H, */              // heapbase
 581  /* R28, R28_H, */              // thread
 582  /* R29, R29_H, */              // fp
 583  /* R30, R30_H, */              // lr
 584  /* R31, R31_H */               // sp
 585 );
 586 
 587 // Class for 64 bit register r0
 588 reg_class r0_reg(
 589     R0, R0_H
 590 );
 591 
 592 // Class for 64 bit register r1
 593 reg_class r1_reg(
 594     R1, R1_H
 595 );
 596 
 597 // Class for 64 bit register r2
 598 reg_class r2_reg(
 599     R2, R2_H
 600 );
 601 
 602 // Class for 64 bit register r3
 603 reg_class r3_reg(
 604     R3, R3_H
 605 );
 606 
 607 // Class for 64 bit register r4
 608 reg_class r4_reg(
 609     R4, R4_H
 610 );
 611 
 612 // Class for 64 bit register r5
 613 reg_class r5_reg(
 614     R5, R5_H
 615 );
 616 
 617 // Class for 64 bit register r10
 618 reg_class r10_reg(
 619     R10, R10_H
 620 );
 621 
 622 // Class for 64 bit register r11
 623 reg_class r11_reg(
 624     R11, R11_H
 625 );
 626 
 627 // Class for method register
 628 reg_class method_reg(
 629     R12, R12_H
 630 );
 631 
 632 // Class for heapbase register
 633 reg_class heapbase_reg(
 634     R27, R27_H
 635 );
 636 
 637 // Class for thread register
 638 reg_class thread_reg(
 639     R28, R28_H
 640 );
 641 
 642 // Class for frame pointer register
 643 reg_class fp_reg(
 644     R29, R29_H
 645 );
 646 
 647 // Class for link register
 648 reg_class lr_reg(
 649     R30, R30_H
 650 );
 651 
 652 // Class for long sp register
 653 reg_class sp_reg(
 654   R31, R31_H
 655 );
 656 
 657 // Class for all pointer registers
 658 reg_class ptr_reg(
 659     R0, R0_H,
 660     R1, R1_H,
 661     R2, R2_H,
 662     R3, R3_H,
 663     R4, R4_H,
 664     R5, R5_H,
 665     R6, R6_H,
 666     R7, R7_H,
 667     R10, R10_H,
 668     R11, R11_H,
 669     R12, R12_H,
 670     R13, R13_H,
 671     R14, R14_H,
 672     R15, R15_H,
 673     R16, R16_H,
 674     R17, R17_H,
 675     R18, R18_H,
 676     R19, R19_H,
 677     R20, R20_H,
 678     R21, R21_H,
 679     R22, R22_H,
 680     R23, R23_H,
 681     R24, R24_H,
 682     R25, R25_H,
 683     R26, R26_H,
 684     R27, R27_H,
 685     R28, R28_H,
 686     R29, R29_H,
 687     R30, R30_H,
 688     R31, R31_H
 689 );
 690 
 691 // Class for all non_special pointer registers
 692 reg_class no_special_ptr_reg(
 693     R0, R0_H,
 694     R1, R1_H,
 695     R2, R2_H,
 696     R3, R3_H,
 697     R4, R4_H,
 698     R5, R5_H,
 699     R6, R6_H,
 700     R7, R7_H,
 701     R10, R10_H,
 702     R11, R11_H,
 703     R12, R12_H,
 704     R13, R13_H,
 705     R14, R14_H,
 706     R15, R15_H,
 707     R16, R16_H,
 708     R17, R17_H,
 709     R18, R18_H,
 710     R19, R19_H,
 711     R20, R20_H,
 712     R21, R21_H,
 713     R22, R22_H,
 714     R23, R23_H,
 715     R24, R24_H,
 716     R25, R25_H,
 717     R26, R26_H,
 718  /* R27, R27_H, */              // heapbase
 719  /* R28, R28_H, */              // thread
 720  /* R29, R29_H, */              // fp
 721  /* R30, R30_H, */              // lr
 722  /* R31, R31_H */               // sp
 723 );
 724 
 725 // Class for all float registers
 726 reg_class float_reg(
 727     V0,
 728     V1,
 729     V2,
 730     V3,
 731     V4,
 732     V5,
 733     V6,
 734     V7,
 735     V8,
 736     V9,
 737     V10,
 738     V11,
 739     V12,
 740     V13,
 741     V14,
 742     V15,
 743     V16,
 744     V17,
 745     V18,
 746     V19,
 747     V20,
 748     V21,
 749     V22,
 750     V23,
 751     V24,
 752     V25,
 753     V26,
 754     V27,
 755     V28,
 756     V29,
 757     V30,
 758     V31
 759 );
 760 
 761 // Double precision float registers have virtual `high halves' that
 762 // are needed by the allocator.
 763 // Class for all double registers
 764 reg_class double_reg(
 765     V0, V0_H, 
 766     V1, V1_H, 
 767     V2, V2_H, 
 768     V3, V3_H, 
 769     V4, V4_H, 
 770     V5, V5_H, 
 771     V6, V6_H, 
 772     V7, V7_H, 
 773     V8, V8_H, 
 774     V9, V9_H, 
 775     V10, V10_H, 
 776     V11, V11_H, 
 777     V12, V12_H, 
 778     V13, V13_H, 
 779     V14, V14_H, 
 780     V15, V15_H, 
 781     V16, V16_H, 
 782     V17, V17_H, 
 783     V18, V18_H, 
 784     V19, V19_H, 
 785     V20, V20_H, 
 786     V21, V21_H, 
 787     V22, V22_H, 
 788     V23, V23_H, 
 789     V24, V24_H, 
 790     V25, V25_H, 
 791     V26, V26_H, 
 792     V27, V27_H, 
 793     V28, V28_H, 
 794     V29, V29_H, 
 795     V30, V30_H, 
 796     V31, V31_H
 797 );
 798 
 799 // Class for all 64bit vector registers
 800 reg_class vectord_reg(
 801     V0, V0_H,
 802     V1, V1_H,
 803     V2, V2_H,
 804     V3, V3_H,
 805     V4, V4_H,
 806     V5, V5_H,
 807     V6, V6_H,
 808     V7, V7_H,
 809     V8, V8_H,
 810     V9, V9_H,
 811     V10, V10_H,
 812     V11, V11_H,
 813     V12, V12_H,
 814     V13, V13_H,
 815     V14, V14_H,
 816     V15, V15_H,
 817     V16, V16_H,
 818     V17, V17_H,
 819     V18, V18_H,
 820     V19, V19_H,
 821     V20, V20_H,
 822     V21, V21_H,
 823     V22, V22_H,
 824     V23, V23_H,
 825     V24, V24_H,
 826     V25, V25_H,
 827     V26, V26_H,
 828     V27, V27_H,
 829     V28, V28_H,
 830     V29, V29_H,
 831     V30, V30_H,
 832     V31, V31_H
 833 );
 834 
 835 // Class for all 128bit vector registers
 836 reg_class vectorx_reg(
 837     V0, V0_H, V0_J, V0_K,
 838     V1, V1_H, V1_J, V1_K,
 839     V2, V2_H, V2_J, V2_K,
 840     V3, V3_H, V3_J, V3_K,
 841     V4, V4_H, V4_J, V4_K,
 842     V5, V5_H, V5_J, V5_K,
 843     V6, V6_H, V6_J, V6_K,
 844     V7, V7_H, V7_J, V7_K,
 845     V8, V8_H, V8_J, V8_K,
 846     V9, V9_H, V9_J, V9_K,
 847     V10, V10_H, V10_J, V10_K,
 848     V11, V11_H, V11_J, V11_K,
 849     V12, V12_H, V12_J, V12_K,
 850     V13, V13_H, V13_J, V13_K,
 851     V14, V14_H, V14_J, V14_K,
 852     V15, V15_H, V15_J, V15_K,
 853     V16, V16_H, V16_J, V16_K,
 854     V17, V17_H, V17_J, V17_K,
 855     V18, V18_H, V18_J, V18_K,
 856     V19, V19_H, V19_J, V19_K,
 857     V20, V20_H, V20_J, V20_K,
 858     V21, V21_H, V21_J, V21_K,
 859     V22, V22_H, V22_J, V22_K,
 860     V23, V23_H, V23_J, V23_K,
 861     V24, V24_H, V24_J, V24_K,
 862     V25, V25_H, V25_J, V25_K,
 863     V26, V26_H, V26_J, V26_K,
 864     V27, V27_H, V27_J, V27_K,
 865     V28, V28_H, V28_J, V28_K,
 866     V29, V29_H, V29_J, V29_K,
 867     V30, V30_H, V30_J, V30_K,
 868     V31, V31_H, V31_J, V31_K
 869 );
 870 
 871 // Class for 128 bit register v0
 872 reg_class v0_reg(
 873     V0, V0_H
 874 );
 875 
 876 // Class for 128 bit register v1
 877 reg_class v1_reg(
 878     V1, V1_H
 879 );
 880 
 881 // Class for 128 bit register v2
 882 reg_class v2_reg(
 883     V2, V2_H
 884 );
 885 
 886 // Class for 128 bit register v3
 887 reg_class v3_reg(
 888     V3, V3_H
 889 );
 890 
 891 // Singleton class for condition codes
 892 reg_class int_flags(RFLAGS);
 893 
 894 %}
 895 
 896 //----------DEFINITION BLOCK---------------------------------------------------
 897 // Define name --> value mappings to inform the ADLC of an integer valued name
 898 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 899 // Format:
 900 //        int_def  <name>         ( <int_value>, <expression>);
 901 // Generated Code in ad_<arch>.hpp
 902 //        #define  <name>   (<expression>)
 903 //        // value == <int_value>
 904 // Generated code in ad_<arch>.cpp adlc_verification()
 905 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 906 //
 907 
 908 // we follow the ppc-aix port in using a simple cost model which ranks
 909 // register operations as cheap, memory ops as more expensive and
 910 // branches as most expensive. the first two have a low as well as a
 911 // normal cost. huge cost appears to be a way of saying don't do
 912 // something
 913 
 914 definitions %{
 915   // The default cost (of a register move instruction).
 916   int_def INSN_COST            (    100,     100);
 917   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 918   int_def CALL_COST            (    200,     2 * INSN_COST);
 919   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 920 %}
 921 
 922 
 923 //----------SOURCE BLOCK-------------------------------------------------------
 924 // This is a block of C++ code which provides values, functions, and
 925 // definitions necessary in the rest of the architecture description
 926 
 927 source_hpp %{
 928 
 929 #include "opto/addnode.hpp"
 930 
 931 class CallStubImpl {
 932  
 933   //--------------------------------------------------------------
 934   //---<  Used for optimization in Compile::shorten_branches  >---
 935   //--------------------------------------------------------------
 936 
 937  public:
 938   // Size of call trampoline stub.
 939   static uint size_call_trampoline() {
 940     return 0; // no call trampolines on this platform
 941   }
 942   
 943   // number of relocations needed by a call trampoline stub
 944   static uint reloc_call_trampoline() { 
 945     return 0; // no call trampolines on this platform
 946   }
 947 };
 948 
 949 class HandlerImpl {
 950 
 951  public:
 952 
 953   static int emit_exception_handler(CodeBuffer &cbuf);
 954   static int emit_deopt_handler(CodeBuffer& cbuf);
 955 
 956   static uint size_exception_handler() {
 957     return MacroAssembler::far_branch_size();
 958   }
 959 
 960   static uint size_deopt_handler() {
 961     // count one adr and one far branch instruction
 962     // return 4 * NativeInstruction::instruction_size;
 963     return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
 964   }
 965 };
 966 
 967   bool is_CAS(int opcode);
 968 
 969   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
 970 
 971   bool unnecessary_acquire(const Node *barrier);
 972   bool needs_acquiring_load(const Node *load);
 973 
 974   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
 975 
 976   bool unnecessary_release(const Node *barrier);
 977   bool unnecessary_volatile(const Node *barrier);
 978   bool needs_releasing_store(const Node *store);
 979 
 980   // predicate controlling translation of CompareAndSwapX
 981   bool needs_acquiring_load_exclusive(const Node *load);
 982 
 983   // predicate controlling translation of StoreCM
 984   bool unnecessary_storestore(const Node *storecm);
 985 
 986   // predicate controlling addressing modes
 987   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
 988 %}
 989 
 990 source %{
 991 
 992   // Optimizaton of volatile gets and puts
 993   // -------------------------------------
 994   //
 995   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
 996   // use to implement volatile reads and writes. For a volatile read
 997   // we simply need
 998   //
 999   //   ldar<x>
1000   //
1001   // and for a volatile write we need
1002   //
1003   //   stlr<x>
1004   // 
1005   // Alternatively, we can implement them by pairing a normal
1006   // load/store with a memory barrier. For a volatile read we need
1007   // 
1008   //   ldr<x>
1009   //   dmb ishld
1010   //
1011   // for a volatile write
1012   //
1013   //   dmb ish
1014   //   str<x>
1015   //   dmb ish
1016   //
1017   // We can also use ldaxr and stlxr to implement compare and swap CAS
1018   // sequences. These are normally translated to an instruction
1019   // sequence like the following
1020   //
1021   //   dmb      ish
1022   // retry:
1023   //   ldxr<x>   rval raddr
1024   //   cmp       rval rold
1025   //   b.ne done
1026   //   stlxr<x>  rval, rnew, rold
1027   //   cbnz      rval retry
1028   // done:
1029   //   cset      r0, eq
1030   //   dmb ishld
1031   //
1032   // Note that the exclusive store is already using an stlxr
1033   // instruction. That is required to ensure visibility to other
1034   // threads of the exclusive write (assuming it succeeds) before that
1035   // of any subsequent writes.
1036   //
1037   // The following instruction sequence is an improvement on the above
1038   //
1039   // retry:
1040   //   ldaxr<x>  rval raddr
1041   //   cmp       rval rold
1042   //   b.ne done
1043   //   stlxr<x>  rval, rnew, rold
1044   //   cbnz      rval retry
1045   // done:
1046   //   cset      r0, eq
1047   //
1048   // We don't need the leading dmb ish since the stlxr guarantees
1049   // visibility of prior writes in the case that the swap is
1050   // successful. Crucially we don't have to worry about the case where
1051   // the swap is not successful since no valid program should be
1052   // relying on visibility of prior changes by the attempting thread
1053   // in the case where the CAS fails.
1054   //
1055   // Similarly, we don't need the trailing dmb ishld if we substitute
1056   // an ldaxr instruction since that will provide all the guarantees we
1057   // require regarding observation of changes made by other threads
1058   // before any change to the CAS address observed by the load.
1059   //
1060   // In order to generate the desired instruction sequence we need to
1061   // be able to identify specific 'signature' ideal graph node
1062   // sequences which i) occur as a translation of a volatile reads or
1063   // writes or CAS operations and ii) do not occur through any other
1064   // translation or graph transformation. We can then provide
1065   // alternative aldc matching rules which translate these node
1066   // sequences to the desired machine code sequences. Selection of the
1067   // alternative rules can be implemented by predicates which identify
1068   // the relevant node sequences.
1069   //
1070   // The ideal graph generator translates a volatile read to the node
1071   // sequence
1072   //
1073   //   LoadX[mo_acquire]
1074   //   MemBarAcquire
1075   //
1076   // As a special case when using the compressed oops optimization we
1077   // may also see this variant
1078   //
1079   //   LoadN[mo_acquire]
1080   //   DecodeN
1081   //   MemBarAcquire
1082   //
1083   // A volatile write is translated to the node sequence
1084   //
1085   //   MemBarRelease
1086   //   StoreX[mo_release] {CardMark}-optional
1087   //   MemBarVolatile
1088   //
1089   // n.b. the above node patterns are generated with a strict
1090   // 'signature' configuration of input and output dependencies (see
1091   // the predicates below for exact details). The card mark may be as
1092   // simple as a few extra nodes or, in a few GC configurations, may
1093   // include more complex control flow between the leading and
1094   // trailing memory barriers. However, whatever the card mark
1095   // configuration these signatures are unique to translated volatile
1096   // reads/stores -- they will not appear as a result of any other
1097   // bytecode translation or inlining nor as a consequence of
1098   // optimizing transforms.
1099   //
1100   // We also want to catch inlined unsafe volatile gets and puts and
1101   // be able to implement them using either ldar<x>/stlr<x> or some
1102   // combination of ldr<x>/stlr<x> and dmb instructions.
1103   //
1104   // Inlined unsafe volatiles puts manifest as a minor variant of the
1105   // normal volatile put node sequence containing an extra cpuorder
1106   // membar
1107   //
1108   //   MemBarRelease
1109   //   MemBarCPUOrder
1110   //   StoreX[mo_release] {CardMark}-optional
1111   //   MemBarVolatile
1112   //
1113   // n.b. as an aside, the cpuorder membar is not itself subject to
1114   // matching and translation by adlc rules.  However, the rule
1115   // predicates need to detect its presence in order to correctly
1116   // select the desired adlc rules.
1117   //
1118   // Inlined unsafe volatile gets manifest as a somewhat different
1119   // node sequence to a normal volatile get
1120   //
1121   //   MemBarCPUOrder
1122   //        ||       \\
1123   //   MemBarAcquire LoadX[mo_acquire]
1124   //        ||
1125   //   MemBarCPUOrder
1126   //
1127   // In this case the acquire membar does not directly depend on the
1128   // load. However, we can be sure that the load is generated from an
1129   // inlined unsafe volatile get if we see it dependent on this unique
1130   // sequence of membar nodes. Similarly, given an acquire membar we
1131   // can know that it was added because of an inlined unsafe volatile
1132   // get if it is fed and feeds a cpuorder membar and if its feed
1133   // membar also feeds an acquiring load.
1134   //
1135   // Finally an inlined (Unsafe) CAS operation is translated to the
1136   // following ideal graph
1137   //
1138   //   MemBarRelease
1139   //   MemBarCPUOrder
1140   //   CompareAndSwapX {CardMark}-optional
1141   //   MemBarCPUOrder
1142   //   MemBarAcquire
1143   //
1144   // So, where we can identify these volatile read and write
1145   // signatures we can choose to plant either of the above two code
1146   // sequences. For a volatile read we can simply plant a normal
1147   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1148   // also choose to inhibit translation of the MemBarAcquire and
1149   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1150   //
1151   // When we recognise a volatile store signature we can choose to
1152   // plant at a dmb ish as a translation for the MemBarRelease, a
1153   // normal str<x> and then a dmb ish for the MemBarVolatile.
1154   // Alternatively, we can inhibit translation of the MemBarRelease
1155   // and MemBarVolatile and instead plant a simple stlr<x>
1156   // instruction.
1157   //
1158   // when we recognise a CAS signature we can choose to plant a dmb
1159   // ish as a translation for the MemBarRelease, the conventional
1160   // macro-instruction sequence for the CompareAndSwap node (which
1161   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1162   // Alternatively, we can elide generation of the dmb instructions
1163   // and plant the alternative CompareAndSwap macro-instruction
1164   // sequence (which uses ldaxr<x>).
1165   // 
1166   // Of course, the above only applies when we see these signature
1167   // configurations. We still want to plant dmb instructions in any
1168   // other cases where we may see a MemBarAcquire, MemBarRelease or
1169   // MemBarVolatile. For example, at the end of a constructor which
1170   // writes final/volatile fields we will see a MemBarRelease
1171   // instruction and this needs a 'dmb ish' lest we risk the
1172   // constructed object being visible without making the
1173   // final/volatile field writes visible.
1174   //
1175   // n.b. the translation rules below which rely on detection of the
1176   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1177   // If we see anything other than the signature configurations we
1178   // always just translate the loads and stores to ldr<x> and str<x>
1179   // and translate acquire, release and volatile membars to the
1180   // relevant dmb instructions.
1181   //
1182 
1183   // is_CAS(int opcode)
1184   //
1185   // return true if opcode is one of the possible CompareAndSwapX
1186   // values otherwise false.
1187 
1188   bool is_CAS(int opcode)
1189   {
1190     switch(opcode) {
1191     // We handle these
1192     case Op_CompareAndSwapI:
1193     case Op_CompareAndSwapL:
1194     case Op_CompareAndSwapP:
1195     case Op_CompareAndSwapN:
1196     case Op_GetAndSetI:
1197     case Op_GetAndSetL:
1198     case Op_GetAndSetP:
1199     case Op_GetAndSetN:
1200     case Op_GetAndAddI:
1201     case Op_GetAndAddL:
1202       return true;
1203     default:
1204       return false;
1205     }
1206   }
1207 
1208 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1209 
1210 bool unnecessary_acquire(const Node *barrier)
1211 {
1212   assert(barrier->is_MemBar(), "expecting a membar");
1213 
1214   if (UseBarriersForVolatile) {
1215     // we need to plant a dmb
1216     return false;
1217   }
1218 
1219   MemBarNode* mb = barrier->as_MemBar();
1220 
1221   if (mb->trailing_load()) {
1222     return true;
1223   }
1224 
1225   if (mb->trailing_load_store()) {
1226     Node* load_store = mb->in(MemBarNode::Precedent);
1227     assert(load_store->is_LoadStore(), "unexpected graph shape");
1228     return is_CAS(load_store->Opcode());
1229   }
1230 
1231   return false;
1232 }
1233 
1234 bool needs_acquiring_load(const Node *n)
1235 {
1236   assert(n->is_Load(), "expecting a load");
1237   if (UseBarriersForVolatile) {
1238     // we use a normal load and a dmb
1239     return false;
1240   }
1241 
1242   LoadNode *ld = n->as_Load();
1243 
1244   return ld->is_acquire();
1245 }
1246 
1247 bool unnecessary_release(const Node *n)
1248 {
1249   assert((n->is_MemBar() &&
1250           n->Opcode() == Op_MemBarRelease),
1251          "expecting a release membar");
1252 
1253   if (UseBarriersForVolatile) {
1254     // we need to plant a dmb
1255     return false;
1256   }
1257 
1258   MemBarNode *barrier = n->as_MemBar();
1259 
1260   if (!barrier->leading()) {
1261     return false;
1262   } else {
1263     Node* trailing = barrier->trailing_membar();
1264     MemBarNode* trailing_mb = trailing->as_MemBar();
1265     assert(trailing_mb->trailing(), "Not a trailing membar?");
1266     assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
1267 
1268     Node* mem = trailing_mb->in(MemBarNode::Precedent);
1269     if (mem->is_Store()) {
1270       assert(mem->as_Store()->is_release(), "");
1271       assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
1272       return true;
1273     } else {
1274       assert(mem->is_LoadStore(), "");
1275       assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
1276       return is_CAS(mem->Opcode());
1277     }
1278   }
1279 
1280   return false;
1281 }
1282 
1283 bool unnecessary_volatile(const Node *n)
1284 {
1285   // assert n->is_MemBar();
1286   if (UseBarriersForVolatile) {
1287     // we need to plant a dmb
1288     return false;
1289   }
1290 
1291   MemBarNode *mbvol = n->as_MemBar();
1292 
1293   bool release = mbvol->trailing_store();
1294   assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1295 #ifdef ASSERT
1296   if (release) {
1297     Node* leading = mbvol->leading_membar();
1298     assert(leading->Opcode() == Op_MemBarRelease, "");
1299     assert(leading->as_MemBar()->leading_store(), "");
1300     assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1301    }
1302 #endif
1303 
1304   return release;
1305 }
1306 
1307 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1308 
1309 bool needs_releasing_store(const Node *n)
1310 {
1311   // assert n->is_Store();
1312   if (UseBarriersForVolatile) {
1313     // we use a normal store and dmb combination
1314     return false;
1315   }
1316 
1317   StoreNode *st = n->as_Store();
1318 
1319   return st->trailing_membar() != NULL;
1320 }
1321 
1322 // predicate controlling translation of CAS
1323 //
1324 // returns true if CAS needs to use an acquiring load otherwise false
1325 
1326 bool needs_acquiring_load_exclusive(const Node *n)
1327 {
1328   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
1329   if (UseBarriersForVolatile) {
1330     return false;
1331   }
1332 
1333   LoadStoreNode* ldst = n->as_LoadStore();
1334   assert(ldst->trailing_membar() != NULL, "expected trailing membar");
1335 
1336   // so we can just return true here
1337   return true;
1338 }
1339 
1340 // predicate controlling translation of StoreCM
1341 //
1342 // returns true if a StoreStore must precede the card write otherwise
1343 // false
1344 
1345 bool unnecessary_storestore(const Node *storecm)
1346 {
1347   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
1348 
1349   // we need to generate a dmb ishst between an object put and the
1350   // associated card mark when we are using CMS without conditional
1351   // card marking
1352 
1353   if (UseConcMarkSweepGC && !UseCondCardMark) {
1354     return false;
1355   }
1356 
1357   // a storestore is unnecesary in all other cases
1358 
1359   return true;
1360 }
1361 
1362 
1363 #define __ _masm.
1364 
1365 // advance declaratuons for helper functions to convert register
1366 // indices to register objects
1367 
1368 // the ad file has to provide implementations of certain methods
1369 // expected by the generic code
1370 //
1371 // REQUIRED FUNCTIONALITY
1372 
1373 //=============================================================================
1374 
1375 // !!!!! Special hack to get all types of calls to specify the byte offset
1376 //       from the start of the call to the point where the return address
1377 //       will point.
1378 
1379 int MachCallStaticJavaNode::ret_addr_offset()
1380 {
1381   // call should be a simple bl
1382   // unless this is a method handle invoke in which case it is
1383   // mov(rfp, sp), bl, mov(sp, rfp)
1384   int off = 4;
1385   if (_method_handle_invoke) {
1386     off += 4;
1387   }
1388   return off;
1389 }
1390 
1391 int MachCallDynamicJavaNode::ret_addr_offset()
1392 {
1393   return 16; // movz, movk, movk, bl
1394 }
1395 
1396 int MachCallRuntimeNode::ret_addr_offset() {
1397   // for generated stubs the call will be
1398   //   bl(addr)
1399   // for real runtime callouts it will be six instructions
1400   // see aarch64_enc_java_to_runtime
1401   //   adr(rscratch2, retaddr)
1402   //   lea(rscratch1, RuntimeAddress(addr)
1403   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1404   //   blr(rscratch1)
1405   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1406   if (cb) {
1407     return MacroAssembler::far_branch_size();
1408   } else {
1409     return 6 * NativeInstruction::instruction_size;
1410   }
1411 }
1412 
1413 // Indicate if the safepoint node needs the polling page as an input
1414 
1415 // the shared code plants the oop data at the start of the generated
1416 // code for the safepoint node and that needs ot be at the load
1417 // instruction itself. so we cannot plant a mov of the safepoint poll
1418 // address followed by a load. setting this to true means the mov is
1419 // scheduled as a prior instruction. that's better for scheduling
1420 // anyway.
1421 
1422 bool SafePointNode::needs_polling_address_input()
1423 {
1424   return true;
1425 }
1426 
1427 //=============================================================================
1428 
1429 #ifndef PRODUCT
1430 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1431   st->print("BREAKPOINT");
1432 }
1433 #endif
1434 
1435 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1436   MacroAssembler _masm(&cbuf);
1437   __ brk(0);
1438 }
1439 
1440 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1441   return MachNode::size(ra_);
1442 }
1443 
1444 //=============================================================================
1445 
1446 #ifndef PRODUCT
1447   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1448     st->print("nop \t# %d bytes pad for loops and calls", _count);
1449   }
1450 #endif
1451 
1452   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1453     MacroAssembler _masm(&cbuf);
1454     for (int i = 0; i < _count; i++) { 
1455       __ nop();
1456     }
1457   }
1458 
1459   uint MachNopNode::size(PhaseRegAlloc*) const {
1460     return _count * NativeInstruction::instruction_size;
1461   }
1462 
1463 //=============================================================================
1464 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1465 
1466 int Compile::ConstantTable::calculate_table_base_offset() const {
1467   return 0;  // absolute addressing, no offset
1468 }
1469 
1470 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1471 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1472   ShouldNotReachHere();
1473 }
1474 
1475 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1476   // Empty encoding
1477 }
1478 
1479 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1480   return 0;
1481 }
1482 
1483 #ifndef PRODUCT
1484 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1485   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1486 }
1487 #endif
1488 
1489 #ifndef PRODUCT
1490 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1491   Compile* C = ra_->C;
1492 
1493   int framesize = C->frame_slots() << LogBytesPerInt;
1494 
1495   if (C->need_stack_bang(framesize))
1496     st->print("# stack bang size=%d\n\t", framesize);
1497 
1498   if (framesize == 0) {
1499     // Is this even possible?
1500     st->print("stp  lr, rfp, [sp, #%d]!", -(2 * wordSize)); 
1501   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1502     st->print("sub  sp, sp, #%d\n\t", framesize);
1503     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1504   } else {
1505     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize)); 
1506     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1507     st->print("sub  sp, sp, rscratch1");
1508   }
1509 }
1510 #endif
1511 
1512 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1513   Compile* C = ra_->C;
1514   MacroAssembler _masm(&cbuf);
1515 
1516   // n.b. frame size includes space for return pc and rfp
1517   long framesize = ((long)C->frame_slots()) << LogBytesPerInt;
1518   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
1519 
1520   // insert a nop at the start of the prolog so we can patch in a
1521   // branch if we need to invalidate the method later
1522   __ nop();
1523 
1524   if (C->need_stack_bang(framesize))
1525     __ generate_stack_overflow_check(framesize);
1526 
1527   __ build_frame(framesize);
1528 
1529   if (VerifyStackAtCalls) {
1530     Unimplemented();
1531   }
1532 
1533   C->set_frame_complete(cbuf.insts_size());
1534 
1535   if (C->has_mach_constant_base_node()) {
1536     // NOTE: We set the table base offset here because users might be
1537     // emitted before MachConstantBaseNode.
1538     Compile::ConstantTable& constant_table = C->constant_table();
1539     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1540   }
1541 }
1542 
1543 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1544 {
1545   return MachNode::size(ra_); // too many variables; just compute it
1546                               // the hard way
1547 }
1548 
1549 int MachPrologNode::reloc() const
1550 {
1551   return 0;
1552 }
1553 
1554 //=============================================================================
1555 
1556 #ifndef PRODUCT
1557 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1558   Compile* C = ra_->C;
1559   int framesize = C->frame_slots() << LogBytesPerInt;
1560 
1561   st->print("# pop frame %d\n\t",framesize);
1562 
1563   if (framesize == 0) {
1564     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1565   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1566     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1567     st->print("add  sp, sp, #%d\n\t", framesize);
1568   } else {
1569     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1570     st->print("add  sp, sp, rscratch1\n\t");
1571     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1572   }
1573 
1574   if (do_polling() && C->is_method_compilation()) {
1575     st->print("# touch polling page\n\t");
1576     st->print("mov  rscratch1, #" INTPTR_FORMAT "\n\t", p2i(os::get_polling_page()));
1577     st->print("ldr zr, [rscratch1]");
1578   }
1579 }
1580 #endif
1581 
1582 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1583   Compile* C = ra_->C;
1584   MacroAssembler _masm(&cbuf);
1585   int framesize = C->frame_slots() << LogBytesPerInt;
1586 
1587   __ remove_frame(framesize);
1588 
1589   if (do_polling() && C->is_method_compilation()) {
1590     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
1591   }
1592 }
1593 
1594 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1595   // Variable size. Determine dynamically.
1596   return MachNode::size(ra_);
1597 }
1598 
1599 int MachEpilogNode::reloc() const {
1600   // Return number of relocatable values contained in this instruction.
1601   return 1; // 1 for polling page.
1602 }
1603 
1604 const Pipeline * MachEpilogNode::pipeline() const {
1605   return MachNode::pipeline_class();
1606 }
1607 
1608 // This method seems to be obsolete. It is declared in machnode.hpp
1609 // and defined in all *.ad files, but it is never called. Should we
1610 // get rid of it?
1611 int MachEpilogNode::safepoint_offset() const {
1612   assert(do_polling(), "no return for this epilog node");
1613   return 4;
1614 }
1615 
1616 //=============================================================================
1617 
1618 // Figure out which register class each belongs in: rc_int, rc_float or
1619 // rc_stack.
1620 enum RC { rc_bad, rc_int, rc_float, rc_stack };
1621 
1622 static enum RC rc_class(OptoReg::Name reg) {
1623 
1624   if (reg == OptoReg::Bad) {
1625     return rc_bad;
1626   }
1627 
1628   // we have 30 int registers * 2 halves
1629   // (rscratch1 and rscratch2 are omitted)
1630 
1631   if (reg < 60) {
1632     return rc_int;
1633   }
1634 
1635   // we have 32 float register * 2 halves
1636   if (reg < 60 + 128) {
1637     return rc_float;
1638   }
1639 
1640   // Between float regs & stack is the flags regs.
1641   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
1642 
1643   return rc_stack;
1644 }
1645 
1646 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
1647   Compile* C = ra_->C;
1648 
1649   // Get registers to move.
1650   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
1651   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
1652   OptoReg::Name dst_hi = ra_->get_reg_second(this);
1653   OptoReg::Name dst_lo = ra_->get_reg_first(this);
1654 
1655   enum RC src_hi_rc = rc_class(src_hi);
1656   enum RC src_lo_rc = rc_class(src_lo);
1657   enum RC dst_hi_rc = rc_class(dst_hi);
1658   enum RC dst_lo_rc = rc_class(dst_lo);
1659 
1660   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
1661 
1662   if (src_hi != OptoReg::Bad) {
1663     assert((src_lo&1)==0 && src_lo+1==src_hi &&
1664            (dst_lo&1)==0 && dst_lo+1==dst_hi,
1665            "expected aligned-adjacent pairs");
1666   }
1667 
1668   if (src_lo == dst_lo && src_hi == dst_hi) {
1669     return 0;            // Self copy, no move.
1670   }
1671 
1672   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
1673               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
1674   int src_offset = ra_->reg2offset(src_lo);
1675   int dst_offset = ra_->reg2offset(dst_lo);
1676 
1677   if (bottom_type()->isa_vect() != NULL) {
1678     uint ireg = ideal_reg();
1679     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
1680     if (cbuf) {
1681       MacroAssembler _masm(cbuf);
1682       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
1683       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
1684         // stack->stack
1685         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
1686         if (ireg == Op_VecD) {
1687           __ unspill(rscratch1, true, src_offset);
1688           __ spill(rscratch1, true, dst_offset);
1689         } else {
1690           __ spill_copy128(src_offset, dst_offset);
1691         }
1692       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
1693         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1694                ireg == Op_VecD ? __ T8B : __ T16B,
1695                as_FloatRegister(Matcher::_regEncode[src_lo]));
1696       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
1697         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1698                        ireg == Op_VecD ? __ D : __ Q,
1699                        ra_->reg2offset(dst_lo));
1700       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
1701         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1702                        ireg == Op_VecD ? __ D : __ Q,
1703                        ra_->reg2offset(src_lo));
1704       } else {
1705         ShouldNotReachHere();
1706       }
1707     }
1708   } else if (cbuf) {
1709     MacroAssembler _masm(cbuf);
1710     switch (src_lo_rc) {
1711     case rc_int:
1712       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
1713         if (is64) {
1714             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
1715                    as_Register(Matcher::_regEncode[src_lo]));
1716         } else {
1717             MacroAssembler _masm(cbuf);
1718             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
1719                     as_Register(Matcher::_regEncode[src_lo]));
1720         }
1721       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
1722         if (is64) {
1723             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1724                      as_Register(Matcher::_regEncode[src_lo]));
1725         } else {
1726             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1727                      as_Register(Matcher::_regEncode[src_lo]));
1728         }
1729       } else {                    // gpr --> stack spill
1730         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1731         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
1732       }
1733       break;
1734     case rc_float:
1735       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
1736         if (is64) {
1737             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
1738                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1739         } else {
1740             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
1741                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1742         }
1743       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
1744           if (cbuf) {
1745             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1746                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1747         } else {
1748             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1749                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1750         }
1751       } else {                    // fpr --> stack spill
1752         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1753         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1754                  is64 ? __ D : __ S, dst_offset);
1755       }
1756       break;
1757     case rc_stack:
1758       if (dst_lo_rc == rc_int) {  // stack --> gpr load
1759         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
1760       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
1761         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1762                    is64 ? __ D : __ S, src_offset);
1763       } else {                    // stack --> stack copy
1764         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1765         __ unspill(rscratch1, is64, src_offset);
1766         __ spill(rscratch1, is64, dst_offset);
1767       }
1768       break;
1769     default:
1770       assert(false, "bad rc_class for spill");
1771       ShouldNotReachHere();
1772     }
1773   }
1774 
1775   if (st) {
1776     st->print("spill ");
1777     if (src_lo_rc == rc_stack) {
1778       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
1779     } else {
1780       st->print("%s -> ", Matcher::regName[src_lo]);
1781     }
1782     if (dst_lo_rc == rc_stack) {
1783       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
1784     } else {
1785       st->print("%s", Matcher::regName[dst_lo]);
1786     }
1787     if (bottom_type()->isa_vect() != NULL) {
1788       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
1789     } else {
1790       st->print("\t# spill size = %d", is64 ? 64:32);
1791     }
1792   }
1793 
1794   return 0;
1795 
1796 }
1797 
1798 #ifndef PRODUCT
1799 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1800   if (!ra_)
1801     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
1802   else
1803     implementation(NULL, ra_, false, st);
1804 }
1805 #endif
1806 
1807 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1808   implementation(&cbuf, ra_, false, NULL);
1809 }
1810 
1811 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1812   return MachNode::size(ra_);
1813 }
1814 
1815 //=============================================================================
1816 
1817 #ifndef PRODUCT
1818 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1819   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1820   int reg = ra_->get_reg_first(this);
1821   st->print("add %s, rsp, #%d]\t# box lock",
1822             Matcher::regName[reg], offset);
1823 }
1824 #endif
1825 
1826 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1827   MacroAssembler _masm(&cbuf);
1828 
1829   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1830   int reg    = ra_->get_encode(this);
1831 
1832   // This add will handle any 24-bit signed offset. 24 bits allows an
1833   // 8 megabyte stack frame.
1834   __ add(as_Register(reg), sp, offset);
1835 }
1836 
1837 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1838   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
1839   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1840 
1841   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
1842     return NativeInstruction::instruction_size;
1843   } else {
1844     return 2 * NativeInstruction::instruction_size;
1845   }
1846 }
1847 
1848 //=============================================================================
1849 
1850 #ifndef PRODUCT
1851 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1852 {
1853   st->print_cr("# MachUEPNode");
1854   if (UseCompressedClassPointers) {
1855     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1856     if (Universe::narrow_klass_shift() != 0) {
1857       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1858     }
1859   } else {
1860    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1861   }
1862   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
1863   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
1864 }
1865 #endif
1866 
1867 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1868 {
1869   // This is the unverified entry point.
1870   MacroAssembler _masm(&cbuf);
1871 
1872   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
1873   Label skip;
1874   // TODO
1875   // can we avoid this skip and still use a reloc?
1876   __ br(Assembler::EQ, skip);
1877   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1878   __ bind(skip);
1879 }
1880 
1881 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1882 {
1883   return MachNode::size(ra_);
1884 }
1885 
1886 // REQUIRED EMIT CODE
1887 
1888 //=============================================================================
1889 
1890 // Emit exception handler code.
1891 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
1892 {
1893   // mov rscratch1 #exception_blob_entry_point
1894   // br rscratch1
1895   // Note that the code buffer's insts_mark is always relative to insts.
1896   // That's why we must use the macroassembler to generate a handler.
1897   MacroAssembler _masm(&cbuf);
1898   address base = __ start_a_stub(size_exception_handler());
1899   if (base == NULL) {
1900     ciEnv::current()->record_failure("CodeCache is full");
1901     return 0;  // CodeBuffer::expand failed
1902   }
1903   int offset = __ offset();
1904   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1905   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1906   __ end_a_stub();
1907   return offset;
1908 }
1909 
1910 // Emit deopt handler code.
1911 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
1912 {
1913   // Note that the code buffer's insts_mark is always relative to insts.
1914   // That's why we must use the macroassembler to generate a handler.
1915   MacroAssembler _masm(&cbuf);
1916   address base = __ start_a_stub(size_deopt_handler());
1917   if (base == NULL) {
1918     ciEnv::current()->record_failure("CodeCache is full");
1919     return 0;  // CodeBuffer::expand failed
1920   }
1921   int offset = __ offset();
1922 
1923   __ adr(lr, __ pc());
1924   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1925 
1926   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1927   __ end_a_stub();
1928   return offset;
1929 }
1930 
1931 // REQUIRED MATCHER CODE
1932 
1933 //=============================================================================
1934 
1935 const bool Matcher::match_rule_supported(int opcode) {
1936 
1937   // TODO 
1938   // identify extra cases that we might want to provide match rules for
1939   // e.g. Op_StrEquals and other intrinsics
1940   if (!has_match_rule(opcode)) {
1941     return false;
1942   }
1943 
1944   return true;  // Per default match rules are supported.
1945 }
1946 
1947 int Matcher::regnum_to_fpu_offset(int regnum)
1948 {
1949   Unimplemented();
1950   return 0;
1951 }
1952 
1953 // Is this branch offset short enough that a short branch can be used?
1954 //
1955 // NOTE: If the platform does not provide any short branch variants, then
1956 //       this method should return false for offset 0.
1957 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1958   // The passed offset is relative to address of the branch.
1959 
1960   return (-32768 <= offset && offset < 32768);
1961 }
1962 
1963 const bool Matcher::isSimpleConstant64(jlong value) {
1964   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1965   // Probably always true, even if a temp register is required.
1966   return true;
1967 }
1968 
1969 // true just means we have fast l2f conversion
1970 const bool Matcher::convL2FSupported(void) {
1971   return true;
1972 }
1973 
1974 // Vector width in bytes.
1975 const int Matcher::vector_width_in_bytes(BasicType bt) {
1976   int size = MIN2(16,(int)MaxVectorSize);
1977   // Minimum 2 values in vector
1978   if (size < 2*type2aelembytes(bt)) size = 0;
1979   // But never < 4
1980   if (size < 4) size = 0;
1981   return size;
1982 }
1983 
1984 // Limits on vector size (number of elements) loaded into vector.
1985 const int Matcher::max_vector_size(const BasicType bt) {
1986   return vector_width_in_bytes(bt)/type2aelembytes(bt);
1987 }
1988 const int Matcher::min_vector_size(const BasicType bt) {
1989 //  For the moment limit the vector size to 8 bytes
1990     int size = 8 / type2aelembytes(bt);
1991     if (size < 2) size = 2;
1992     return size;
1993 }
1994 
1995 // Vector ideal reg.
1996 const uint Matcher::vector_ideal_reg(int len) {
1997   switch(len) {
1998     case  8: return Op_VecD;
1999     case 16: return Op_VecX;
2000   }
2001   ShouldNotReachHere();
2002   return 0;
2003 }
2004 
2005 const uint Matcher::vector_shift_count_ideal_reg(int size) {
2006   switch(size) {
2007     case  8: return Op_VecD;
2008     case 16: return Op_VecX;
2009   }
2010   ShouldNotReachHere();
2011   return 0;
2012 }
2013 
2014 // AES support not yet implemented
2015 const bool Matcher::pass_original_key_for_aes() {
2016   return false;
2017 }
2018 
2019 // x86 supports misaligned vectors store/load.
2020 const bool Matcher::misaligned_vectors_ok() {
2021   return !AlignVector; // can be changed by flag
2022 }
2023 
2024 // false => size gets scaled to BytesPerLong, ok.
2025 const bool Matcher::init_array_count_is_in_bytes = false;
2026 
2027 // Threshold size for cleararray.
2028 const int Matcher::init_array_short_size = 4 * BytesPerLong;
2029 
2030 // Use conditional move (CMOVL)
2031 const int Matcher::long_cmove_cost() {
2032   // long cmoves are no more expensive than int cmoves
2033   return 0;
2034 }
2035 
2036 const int Matcher::float_cmove_cost() {
2037   // float cmoves are no more expensive than int cmoves
2038   return 0;
2039 }
2040 
2041 // Does the CPU require late expand (see block.cpp for description of late expand)?
2042 const bool Matcher::require_postalloc_expand = false;
2043 
2044 // Should the Matcher clone shifts on addressing modes, expecting them
2045 // to be subsumed into complex addressing expressions or compute them
2046 // into registers?  True for Intel but false for most RISCs
2047 const bool Matcher::clone_shift_expressions = false;
2048 
2049 // Do we need to mask the count passed to shift instructions or does
2050 // the cpu only look at the lower 5/6 bits anyway?
2051 const bool Matcher::need_masked_shift_count = false;
2052 
2053 // This affects two different things:
2054 //  - how Decode nodes are matched
2055 //  - how ImplicitNullCheck opportunities are recognized
2056 // If true, the matcher will try to remove all Decodes and match them
2057 // (as operands) into nodes. NullChecks are not prepared to deal with 
2058 // Decodes by final_graph_reshaping().
2059 // If false, final_graph_reshaping() forces the decode behind the Cmp
2060 // for a NullCheck. The matcher matches the Decode node into a register.
2061 // Implicit_null_check optimization moves the Decode along with the 
2062 // memory operation back up before the NullCheck.
2063 bool Matcher::narrow_oop_use_complex_address() {
2064   return Universe::narrow_oop_shift() == 0;
2065 }
2066 
2067 bool Matcher::narrow_klass_use_complex_address() {
2068 // TODO
2069 // decide whether we need to set this to true
2070   return false;
2071 }
2072 
2073 // Is it better to copy float constants, or load them directly from
2074 // memory?  Intel can load a float constant from a direct address,
2075 // requiring no extra registers.  Most RISCs will have to materialize
2076 // an address into a register first, so they would do better to copy
2077 // the constant from stack.
2078 const bool Matcher::rematerialize_float_constants = false;
2079 
2080 // If CPU can load and store mis-aligned doubles directly then no
2081 // fixup is needed.  Else we split the double into 2 integer pieces
2082 // and move it piece-by-piece.  Only happens when passing doubles into
2083 // C code as the Java calling convention forces doubles to be aligned.
2084 const bool Matcher::misaligned_doubles_ok = true;
2085 
2086 // No-op on amd64
2087 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2088   Unimplemented();
2089 }
2090 
2091 // Advertise here if the CPU requires explicit rounding operations to
2092 // implement the UseStrictFP mode.
2093 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2094 
2095 // Are floats converted to double when stored to stack during
2096 // deoptimization?
2097 bool Matcher::float_in_double() { return true; }
2098 
2099 // Do ints take an entire long register or just half?
2100 // The relevant question is how the int is callee-saved:
2101 // the whole long is written but de-opt'ing will have to extract
2102 // the relevant 32 bits.
2103 const bool Matcher::int_in_long = true;
2104 
2105 // Return whether or not this register is ever used as an argument.
2106 // This function is used on startup to build the trampoline stubs in
2107 // generateOptoStub.  Registers not mentioned will be killed by the VM
2108 // call in the trampoline, and arguments in those registers not be
2109 // available to the callee.
2110 bool Matcher::can_be_java_arg(int reg)
2111 {
2112   return
2113     reg ==  R0_num || reg == R0_H_num ||
2114     reg ==  R1_num || reg == R1_H_num ||
2115     reg ==  R2_num || reg == R2_H_num ||
2116     reg ==  R3_num || reg == R3_H_num ||
2117     reg ==  R4_num || reg == R4_H_num ||
2118     reg ==  R5_num || reg == R5_H_num ||
2119     reg ==  R6_num || reg == R6_H_num ||
2120     reg ==  R7_num || reg == R7_H_num ||
2121     reg ==  V0_num || reg == V0_H_num ||
2122     reg ==  V1_num || reg == V1_H_num ||
2123     reg ==  V2_num || reg == V2_H_num ||
2124     reg ==  V3_num || reg == V3_H_num ||
2125     reg ==  V4_num || reg == V4_H_num ||
2126     reg ==  V5_num || reg == V5_H_num ||
2127     reg ==  V6_num || reg == V6_H_num ||
2128     reg ==  V7_num || reg == V7_H_num;
2129 }
2130 
2131 bool Matcher::is_spillable_arg(int reg)
2132 {
2133   return can_be_java_arg(reg);
2134 }
2135 
2136 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2137   return false;
2138 }
2139 
2140 RegMask Matcher::divI_proj_mask() {
2141   ShouldNotReachHere();
2142   return RegMask();
2143 }
2144 
2145 // Register for MODI projection of divmodI.
2146 RegMask Matcher::modI_proj_mask() {
2147   ShouldNotReachHere();
2148   return RegMask();
2149 }
2150 
2151 // Register for DIVL projection of divmodL.
2152 RegMask Matcher::divL_proj_mask() {
2153   ShouldNotReachHere();
2154   return RegMask();
2155 }
2156 
2157 // Register for MODL projection of divmodL.
2158 RegMask Matcher::modL_proj_mask() {
2159   ShouldNotReachHere();
2160   return RegMask();
2161 }
2162 
2163 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2164   return FP_REG_mask();
2165 }
2166 
2167 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
2168   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
2169     Node* u = addp->fast_out(i);
2170     if (u->is_Mem()) {
2171       int opsize = u->as_Mem()->memory_size();
2172       assert(opsize > 0, "unexpected memory operand size");
2173       if (u->as_Mem()->memory_size() != (1<<shift)) {
2174         return false;
2175       }
2176     }
2177   }
2178   return true;
2179 }
2180 
2181 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2182   MacroAssembler _masm(&cbuf);                                              \
2183   {                                                                     \
2184     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2185     guarantee(DISP == 0, "mode not permitted for volatile");            \
2186     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2187     __ INSN(REG, as_Register(BASE));                                    \
2188   }
2189 
2190 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2191 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2192 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2193                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2194 
2195   // Used for all non-volatile memory accesses.  The use of
2196   // $mem->opcode() to discover whether this pattern uses sign-extended
2197   // offsets is something of a kludge.
2198   static void loadStore(MacroAssembler masm, mem_insn insn,
2199                          Register reg, int opcode,
2200                          Register base, int index, int size, int disp)
2201   {
2202     Address::extend scale;
2203 
2204     // Hooboy, this is fugly.  We need a way to communicate to the
2205     // encoder that the index needs to be sign extended, so we have to
2206     // enumerate all the cases.
2207     switch (opcode) {
2208     case INDINDEXSCALEDOFFSETI2L:
2209     case INDINDEXSCALEDI2L:
2210     case INDINDEXSCALEDOFFSETI2LN:
2211     case INDINDEXSCALEDI2LN:
2212     case INDINDEXOFFSETI2L:
2213     case INDINDEXOFFSETI2LN:
2214       scale = Address::sxtw(size);
2215       break;
2216     default:
2217       scale = Address::lsl(size);
2218     }
2219 
2220     if (index == -1) {
2221       (masm.*insn)(reg, Address(base, disp));
2222     } else {
2223       if (disp == 0) {
2224         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2225       } else {
2226         masm.lea(rscratch1, Address(base, disp));
2227         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2228       }
2229     }
2230   }
2231 
2232   static void loadStore(MacroAssembler masm, mem_float_insn insn,
2233                          FloatRegister reg, int opcode,
2234                          Register base, int index, int size, int disp)
2235   {
2236     Address::extend scale;
2237 
2238     switch (opcode) {
2239     case INDINDEXSCALEDOFFSETI2L:
2240     case INDINDEXSCALEDI2L:
2241     case INDINDEXSCALEDOFFSETI2LN:
2242     case INDINDEXSCALEDI2LN:
2243       scale = Address::sxtw(size);
2244       break;
2245     default:
2246       scale = Address::lsl(size);
2247     }
2248 
2249      if (index == -1) {
2250       (masm.*insn)(reg, Address(base, disp));
2251     } else {
2252       if (disp == 0) {
2253         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2254       } else {
2255         masm.lea(rscratch1, Address(base, disp));
2256         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2257       }
2258     }
2259   }
2260 
2261   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2262                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2263                          int opcode, Register base, int index, int size, int disp)
2264   {
2265     if (index == -1) {
2266       (masm.*insn)(reg, T, Address(base, disp));
2267     } else {
2268       assert(disp == 0, "unsupported address mode");
2269       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2270     }
2271   }
2272 
2273 %}
2274 
2275 
2276 
2277 //----------ENCODING BLOCK-----------------------------------------------------
2278 // This block specifies the encoding classes used by the compiler to
2279 // output byte streams.  Encoding classes are parameterized macros
2280 // used by Machine Instruction Nodes in order to generate the bit
2281 // encoding of the instruction.  Operands specify their base encoding
2282 // interface with the interface keyword.  There are currently
2283 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2284 // COND_INTER.  REG_INTER causes an operand to generate a function
2285 // which returns its register number when queried.  CONST_INTER causes
2286 // an operand to generate a function which returns the value of the
2287 // constant when queried.  MEMORY_INTER causes an operand to generate
2288 // four functions which return the Base Register, the Index Register,
2289 // the Scale Value, and the Offset Value of the operand when queried.
2290 // COND_INTER causes an operand to generate six functions which return
2291 // the encoding code (ie - encoding bits for the instruction)
2292 // associated with each basic boolean condition for a conditional
2293 // instruction.
2294 //
2295 // Instructions specify two basic values for encoding.  Again, a
2296 // function is available to check if the constant displacement is an
2297 // oop. They use the ins_encode keyword to specify their encoding
2298 // classes (which must be a sequence of enc_class names, and their
2299 // parameters, specified in the encoding block), and they use the
2300 // opcode keyword to specify, in order, their primary, secondary, and
2301 // tertiary opcode.  Only the opcode sections which a particular
2302 // instruction needs for encoding need to be specified.
2303 encode %{
2304   // Build emit functions for each basic byte or larger field in the
2305   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2306   // from C++ code in the enc_class source block.  Emit functions will
2307   // live in the main source block for now.  In future, we can
2308   // generalize this by adding a syntax that specifies the sizes of
2309   // fields in an order, so that the adlc can build the emit functions
2310   // automagically
2311 
2312   // catch all for unimplemented encodings
2313   enc_class enc_unimplemented %{
2314     MacroAssembler _masm(&cbuf);
2315     __ unimplemented("C2 catch all");    
2316   %}
2317 
2318   // BEGIN Non-volatile memory access
2319 
2320   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
2321     Register dst_reg = as_Register($dst$$reg);
2322     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
2323                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2324   %}
2325 
2326   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
2327     Register dst_reg = as_Register($dst$$reg);
2328     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
2329                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2330   %}
2331 
2332   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
2333     Register dst_reg = as_Register($dst$$reg);
2334     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2335                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2336   %}
2337 
2338   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
2339     Register dst_reg = as_Register($dst$$reg);
2340     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2341                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2342   %}
2343 
2344   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
2345     Register dst_reg = as_Register($dst$$reg);
2346     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
2347                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2348   %}
2349 
2350   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
2351     Register dst_reg = as_Register($dst$$reg);
2352     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
2353                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2354   %}
2355 
2356   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
2357     Register dst_reg = as_Register($dst$$reg);
2358     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2359                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2360   %}
2361 
2362   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
2363     Register dst_reg = as_Register($dst$$reg);
2364     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2365                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2366   %}
2367 
2368   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
2369     Register dst_reg = as_Register($dst$$reg);
2370     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2371                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2372   %}
2373 
2374   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
2375     Register dst_reg = as_Register($dst$$reg);
2376     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2377                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2378   %}
2379 
2380   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
2381     Register dst_reg = as_Register($dst$$reg);
2382     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
2383                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2384   %}
2385 
2386   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
2387     Register dst_reg = as_Register($dst$$reg);
2388     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2389                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2390   %}
2391 
2392   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
2393     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2394     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2395                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2396   %}
2397 
2398   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
2399     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2400     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2401                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2402   %}
2403 
2404   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2405     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2406     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2407        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2408   %}
2409 
2410   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2411     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2412     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2413        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2414   %}
2415 
2416   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2417     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2418     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2419        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2420   %}
2421 
2422   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
2423     Register src_reg = as_Register($src$$reg);
2424     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2425                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2426   %}
2427 
2428   enc_class aarch64_enc_strb0(memory mem) %{
2429     MacroAssembler _masm(&cbuf);
2430     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2431                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2432   %}
2433 
2434   enc_class aarch64_enc_strb0_ordered(memory mem) %{
2435     MacroAssembler _masm(&cbuf);
2436     __ membar(Assembler::StoreStore);
2437     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2438                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2439   %}
2440 
2441   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
2442     Register src_reg = as_Register($src$$reg);
2443     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2444                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2445   %}
2446 
2447   enc_class aarch64_enc_strh0(memory mem) %{
2448     MacroAssembler _masm(&cbuf);
2449     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
2450                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2451   %}
2452 
2453   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
2454     Register src_reg = as_Register($src$$reg);
2455     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
2456                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2457   %}
2458 
2459   enc_class aarch64_enc_strw0(memory mem) %{
2460     MacroAssembler _masm(&cbuf);
2461     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
2462                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2463   %}
2464 
2465   enc_class aarch64_enc_str(iRegL src, memory mem) %{
2466     Register src_reg = as_Register($src$$reg);
2467     // we sometimes get asked to store the stack pointer into the
2468     // current thread -- we cannot do that directly on AArch64
2469     if (src_reg == r31_sp) {
2470       MacroAssembler _masm(&cbuf);
2471       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2472       __ mov(rscratch2, sp);
2473       src_reg = rscratch2;
2474     }
2475     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
2476                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2477   %}
2478 
2479   enc_class aarch64_enc_str0(memory mem) %{
2480     MacroAssembler _masm(&cbuf);
2481     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2482                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2483   %}
2484 
2485   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
2486     FloatRegister src_reg = as_FloatRegister($src$$reg);
2487     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2488                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2489   %}
2490 
2491   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
2492     FloatRegister src_reg = as_FloatRegister($src$$reg);
2493     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
2494                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2495   %}
2496 
2497   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
2498     FloatRegister src_reg = as_FloatRegister($src$$reg);
2499     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
2500        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2501   %}
2502 
2503   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
2504     FloatRegister src_reg = as_FloatRegister($src$$reg);
2505     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
2506        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2507   %}
2508 
2509   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
2510     FloatRegister src_reg = as_FloatRegister($src$$reg);
2511     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
2512        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2513   %}
2514 
2515   // END Non-volatile memory access
2516 
2517   // this encoding writes the address of the first instruction in the
2518   // call sequence for the runtime call into the anchor pc slot. this
2519   // address allows the runtime to i) locate the code buffer for the
2520   // caller (any address in the buffer would do) and ii) find the oop
2521   // map associated with the call (has to address the instruction
2522   // following the call). note that we have to store the address which
2523   // follows the actual call.
2524   // 
2525   // the offset from the current pc can be computed by considering
2526   // what gets generated between this point up to and including the
2527   // call. it looks like this
2528   //
2529   //   movz xscratch1 0xnnnn        <-- current pc is here
2530   //   movk xscratch1 0xnnnn
2531   //   movk xscratch1 0xnnnn
2532   //   str xscratch1, [xthread,#anchor_pc_off]
2533   //   mov xscratch2, sp
2534   //   str xscratch2, [xthread,#anchor_sp_off
2535   //   mov x0, x1
2536   //   . . .
2537   //   mov xn-1, xn
2538   //   mov xn, thread            <-- always passed
2539   //   mov xn+1, rfp             <-- optional iff primary == 1
2540   //   movz xscratch1 0xnnnn
2541   //   movk xscratch1 0xnnnn
2542   //   movk xscratch1 0xnnnn
2543   //   blr  xscratch1
2544   //   . . .
2545   //
2546   // where the called routine has n args (including the thread and,
2547   // possibly the stub's caller return address currently in rfp).  we
2548   // can compute n by looking at the number of args passed into the
2549   // stub. we assert that nargs is < 7.
2550   //
2551   // so the offset we need to add to the pc (in 32-bit words) is
2552   //   3 +        <-- load 48-bit constant return pc
2553   //   1 +        <-- write anchor pc
2554   //   1 +        <-- copy sp
2555   //   1 +        <-- write anchor sp
2556   //   nargs +    <-- java stub arg count
2557   //   1 +        <-- extra thread arg
2558   // [ 1 + ]      <-- optional ret address of stub caller
2559   //   3 +        <-- load 64 bit call target address
2560   //   1          <-- blr instruction
2561   //
2562   // i.e we need to add (nargs + 11) * 4 bytes or (nargs + 12) * 4 bytes
2563   //
2564 
2565   enc_class aarch64_enc_save_pc() %{
2566     Compile* C = ra_->C;
2567     int nargs = C->tf()->domain()->cnt() - TypeFunc::Parms;
2568     if ($primary) { nargs++; }
2569     assert(nargs <= 8, "opto runtime stub has more than 8 args!");
2570     MacroAssembler _masm(&cbuf);
2571     address pc = __ pc();
2572     int call_offset = (nargs + 11) * 4;
2573     int field_offset = in_bytes(JavaThread::frame_anchor_offset()) +
2574                        in_bytes(JavaFrameAnchor::last_Java_pc_offset());
2575     __ lea(rscratch1, InternalAddress(pc + call_offset));
2576     __ str(rscratch1, Address(rthread, field_offset));
2577   %}
2578 
2579   // volatile loads and stores
2580 
2581   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
2582     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2583                  rscratch1, stlrb);
2584     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2585       __ dmb(__ ISH);
2586   %}
2587 
2588   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
2589     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2590                  rscratch1, stlrh);
2591     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2592       __ dmb(__ ISH);
2593   %}
2594 
2595   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
2596     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2597                  rscratch1, stlrw);
2598     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2599       __ dmb(__ ISH);
2600   %}
2601 
2602 
2603   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
2604     Register dst_reg = as_Register($dst$$reg);
2605     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2606              rscratch1, ldarb);
2607     __ sxtbw(dst_reg, dst_reg);
2608   %}
2609 
2610   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
2611     Register dst_reg = as_Register($dst$$reg);
2612     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2613              rscratch1, ldarb);
2614     __ sxtb(dst_reg, dst_reg);
2615   %}
2616 
2617   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
2618     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2619              rscratch1, ldarb);
2620   %}
2621 
2622   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
2623     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2624              rscratch1, ldarb);
2625   %}
2626 
2627   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
2628     Register dst_reg = as_Register($dst$$reg);
2629     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2630              rscratch1, ldarh);
2631     __ sxthw(dst_reg, dst_reg);
2632   %}
2633 
2634   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
2635     Register dst_reg = as_Register($dst$$reg);
2636     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2637              rscratch1, ldarh);
2638     __ sxth(dst_reg, dst_reg);
2639   %}
2640 
2641   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
2642     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2643              rscratch1, ldarh);
2644   %}
2645 
2646   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
2647     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2648              rscratch1, ldarh);
2649   %}
2650 
2651   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
2652     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2653              rscratch1, ldarw);
2654   %}
2655 
2656   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
2657     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2658              rscratch1, ldarw);
2659   %}
2660 
2661   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
2662     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2663              rscratch1, ldar);
2664   %}
2665 
2666   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
2667     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2668              rscratch1, ldarw);
2669     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
2670   %}
2671 
2672   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
2673     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2674              rscratch1, ldar);
2675     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
2676   %}
2677 
2678   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
2679     Register src_reg = as_Register($src$$reg);
2680     // we sometimes get asked to store the stack pointer into the
2681     // current thread -- we cannot do that directly on AArch64
2682     if (src_reg == r31_sp) {
2683         MacroAssembler _masm(&cbuf);
2684       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2685       __ mov(rscratch2, sp);
2686       src_reg = rscratch2;
2687     }
2688     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2689                  rscratch1, stlr);
2690     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2691       __ dmb(__ ISH);
2692   %}
2693 
2694   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
2695     {
2696       MacroAssembler _masm(&cbuf);
2697       FloatRegister src_reg = as_FloatRegister($src$$reg);
2698       __ fmovs(rscratch2, src_reg);
2699     }
2700     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2701                  rscratch1, stlrw);
2702     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2703       __ dmb(__ ISH);
2704   %}
2705 
2706   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
2707     {
2708       MacroAssembler _masm(&cbuf);
2709       FloatRegister src_reg = as_FloatRegister($src$$reg);
2710       __ fmovd(rscratch2, src_reg);
2711     }
2712     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2713                  rscratch1, stlr);
2714     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2715       __ dmb(__ ISH);
2716   %}
2717 
2718   // synchronized read/update encodings
2719 
2720   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
2721     MacroAssembler _masm(&cbuf);
2722     Register dst_reg = as_Register($dst$$reg);
2723     Register base = as_Register($mem$$base);
2724     int index = $mem$$index;
2725     int scale = $mem$$scale;
2726     int disp = $mem$$disp;
2727     if (index == -1) {
2728        if (disp != 0) {      
2729         __ lea(rscratch1, Address(base, disp));
2730         __ ldaxr(dst_reg, rscratch1);
2731       } else {
2732         // TODO
2733         // should we ever get anything other than this case?
2734         __ ldaxr(dst_reg, base);
2735       }
2736     } else {
2737       Register index_reg = as_Register(index);
2738       if (disp == 0) {
2739         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
2740         __ ldaxr(dst_reg, rscratch1);
2741       } else {
2742         __ lea(rscratch1, Address(base, disp));
2743         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
2744         __ ldaxr(dst_reg, rscratch1);
2745       }
2746     }
2747   %}
2748 
2749   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
2750     MacroAssembler _masm(&cbuf);
2751     Register src_reg = as_Register($src$$reg);
2752     Register base = as_Register($mem$$base);
2753     int index = $mem$$index;
2754     int scale = $mem$$scale;
2755     int disp = $mem$$disp;
2756     if (index == -1) {
2757        if (disp != 0) {      
2758         __ lea(rscratch2, Address(base, disp));
2759         __ stlxr(rscratch1, src_reg, rscratch2);
2760       } else {
2761         // TODO
2762         // should we ever get anything other than this case?
2763         __ stlxr(rscratch1, src_reg, base);
2764       }
2765     } else {
2766       Register index_reg = as_Register(index);
2767       if (disp == 0) {
2768         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
2769         __ stlxr(rscratch1, src_reg, rscratch2);
2770       } else {
2771         __ lea(rscratch2, Address(base, disp));
2772         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
2773         __ stlxr(rscratch1, src_reg, rscratch2);
2774       }
2775     }
2776     __ cmpw(rscratch1, zr);
2777   %}
2778 
2779   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2780     MacroAssembler _masm(&cbuf);
2781     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2782     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2783                Assembler::xword, /*acquire*/ false, /*release*/ true);
2784   %}
2785 
2786   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2787     MacroAssembler _masm(&cbuf);
2788     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2789     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2790                Assembler::word, /*acquire*/ false, /*release*/ true);
2791   %}
2792 
2793 
2794   // The only difference between aarch64_enc_cmpxchg and
2795   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
2796   // CompareAndSwap sequence to serve as a barrier on acquiring a
2797   // lock.
2798   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2799     MacroAssembler _masm(&cbuf);
2800     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2801     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2802                Assembler::xword, /*acquire*/ true, /*release*/ true);
2803   %}
2804 
2805   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2806     MacroAssembler _masm(&cbuf);
2807     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2808     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2809                Assembler::word, /*acquire*/ true, /*release*/ true);
2810   %}
2811 
2812   // auxiliary used for CompareAndSwapX to set result register
2813   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
2814     MacroAssembler _masm(&cbuf);
2815     Register res_reg = as_Register($res$$reg);
2816     __ cset(res_reg, Assembler::EQ);
2817   %}
2818 
2819   // prefetch encodings
2820 
2821   enc_class aarch64_enc_prefetchr(memory mem) %{
2822     MacroAssembler _masm(&cbuf);
2823     Register base = as_Register($mem$$base);
2824     int index = $mem$$index;
2825     int scale = $mem$$scale;
2826     int disp = $mem$$disp;
2827     if (index == -1) {
2828       __ prfm(Address(base, disp), PLDL1KEEP);
2829     } else {
2830       Register index_reg = as_Register(index);
2831       if (disp == 0) {
2832         __ prfm(Address(base, index_reg, Address::lsl(scale)), PLDL1KEEP);
2833       } else {
2834         __ lea(rscratch1, Address(base, disp));
2835         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PLDL1KEEP);
2836       }
2837     }
2838   %}
2839 
2840   enc_class aarch64_enc_prefetchw(memory mem) %{
2841     MacroAssembler _masm(&cbuf);
2842     Register base = as_Register($mem$$base);
2843     int index = $mem$$index;
2844     int scale = $mem$$scale;
2845     int disp = $mem$$disp;
2846     if (index == -1) {
2847       __ prfm(Address(base, disp), PSTL1KEEP);
2848     } else {
2849       Register index_reg = as_Register(index);
2850       if (disp == 0) {
2851         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
2852       } else {
2853         __ lea(rscratch1, Address(base, disp));
2854         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
2855       }
2856     }
2857   %}
2858 
2859   enc_class aarch64_enc_prefetchnta(memory mem) %{
2860     MacroAssembler _masm(&cbuf);
2861     Register base = as_Register($mem$$base);
2862     int index = $mem$$index;
2863     int scale = $mem$$scale;
2864     int disp = $mem$$disp;
2865     if (index == -1) {
2866       __ prfm(Address(base, disp), PSTL1STRM);
2867     } else {
2868       Register index_reg = as_Register(index);
2869       if (disp == 0) {
2870         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1STRM);
2871         __ nop();
2872       } else {
2873         __ lea(rscratch1, Address(base, disp));
2874         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1STRM);
2875       }
2876     }
2877   %}
2878 
2879   /// mov envcodings
2880 
2881   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
2882     MacroAssembler _masm(&cbuf);
2883     u_int32_t con = (u_int32_t)$src$$constant;
2884     Register dst_reg = as_Register($dst$$reg);
2885     if (con == 0) {
2886       __ movw(dst_reg, zr);
2887     } else {
2888       __ movw(dst_reg, con);
2889     }
2890   %}
2891 
2892   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
2893     MacroAssembler _masm(&cbuf);
2894     Register dst_reg = as_Register($dst$$reg);
2895     u_int64_t con = (u_int64_t)$src$$constant;
2896     if (con == 0) {
2897       __ mov(dst_reg, zr);
2898     } else {
2899       __ mov(dst_reg, con);
2900     }
2901   %}
2902 
2903   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
2904     MacroAssembler _masm(&cbuf);
2905     Register dst_reg = as_Register($dst$$reg);
2906     address con = (address)$src$$constant;
2907     if (con == NULL || con == (address)1) {
2908       ShouldNotReachHere();
2909     } else {
2910       relocInfo::relocType rtype = $src->constant_reloc();
2911       if (rtype == relocInfo::oop_type) {
2912         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
2913       } else if (rtype == relocInfo::metadata_type) {
2914         __ mov_metadata(dst_reg, (Metadata*)con);
2915       } else {
2916         assert(rtype == relocInfo::none, "unexpected reloc type");
2917         if (con < (address)(uintptr_t)os::vm_page_size()) {
2918           __ mov(dst_reg, con);
2919         } else {
2920           unsigned long offset;
2921           __ adrp(dst_reg, con, offset);
2922           __ add(dst_reg, dst_reg, offset);
2923         }
2924       }
2925     }
2926   %}
2927 
2928   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
2929     MacroAssembler _masm(&cbuf);
2930     Register dst_reg = as_Register($dst$$reg);
2931     __ mov(dst_reg, zr);
2932   %}
2933 
2934   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
2935     MacroAssembler _masm(&cbuf);
2936     Register dst_reg = as_Register($dst$$reg);
2937     __ mov(dst_reg, (u_int64_t)1);
2938   %}
2939 
2940   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
2941     MacroAssembler _masm(&cbuf);
2942     address page = (address)$src$$constant;
2943     Register dst_reg = as_Register($dst$$reg);
2944     unsigned long off;
2945     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
2946     assert(off == 0, "assumed offset == 0");
2947   %}
2948 
2949   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
2950     MacroAssembler _masm(&cbuf);
2951     __ load_byte_map_base($dst$$Register);
2952   %}
2953 
2954   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
2955     MacroAssembler _masm(&cbuf);
2956     Register dst_reg = as_Register($dst$$reg);
2957     address con = (address)$src$$constant;
2958     if (con == NULL) {
2959       ShouldNotReachHere();
2960     } else {
2961       relocInfo::relocType rtype = $src->constant_reloc();
2962       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
2963       __ set_narrow_oop(dst_reg, (jobject)con);
2964     }
2965   %}
2966 
2967   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
2968     MacroAssembler _masm(&cbuf);
2969     Register dst_reg = as_Register($dst$$reg);
2970     __ mov(dst_reg, zr);
2971   %}
2972 
2973   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
2974     MacroAssembler _masm(&cbuf);
2975     Register dst_reg = as_Register($dst$$reg);
2976     address con = (address)$src$$constant;
2977     if (con == NULL) {
2978       ShouldNotReachHere();
2979     } else {
2980       relocInfo::relocType rtype = $src->constant_reloc();
2981       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
2982       __ set_narrow_klass(dst_reg, (Klass *)con);
2983     }
2984   %}
2985 
2986   // arithmetic encodings
2987 
2988   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
2989     MacroAssembler _masm(&cbuf);
2990     Register dst_reg = as_Register($dst$$reg);
2991     Register src_reg = as_Register($src1$$reg);
2992     int32_t con = (int32_t)$src2$$constant;
2993     // add has primary == 0, subtract has primary == 1
2994     if ($primary) { con = -con; }
2995     if (con < 0) {
2996       __ subw(dst_reg, src_reg, -con);
2997     } else {
2998       __ addw(dst_reg, src_reg, con);
2999     }
3000   %}
3001 
3002   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3003     MacroAssembler _masm(&cbuf);
3004     Register dst_reg = as_Register($dst$$reg);
3005     Register src_reg = as_Register($src1$$reg);
3006     int32_t con = (int32_t)$src2$$constant;
3007     // add has primary == 0, subtract has primary == 1
3008     if ($primary) { con = -con; }
3009     if (con < 0) {
3010       __ sub(dst_reg, src_reg, -con);
3011     } else {
3012       __ add(dst_reg, src_reg, con);
3013     }
3014   %}
3015 
3016   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3017     MacroAssembler _masm(&cbuf);
3018    Register dst_reg = as_Register($dst$$reg);
3019    Register src1_reg = as_Register($src1$$reg);
3020    Register src2_reg = as_Register($src2$$reg);
3021     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3022   %}
3023 
3024   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3025     MacroAssembler _masm(&cbuf);
3026    Register dst_reg = as_Register($dst$$reg);
3027    Register src1_reg = as_Register($src1$$reg);
3028    Register src2_reg = as_Register($src2$$reg);
3029     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3030   %}
3031 
3032   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3033     MacroAssembler _masm(&cbuf);
3034    Register dst_reg = as_Register($dst$$reg);
3035    Register src1_reg = as_Register($src1$$reg);
3036    Register src2_reg = as_Register($src2$$reg);
3037     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3038   %}
3039 
3040   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3041     MacroAssembler _masm(&cbuf);
3042    Register dst_reg = as_Register($dst$$reg);
3043    Register src1_reg = as_Register($src1$$reg);
3044    Register src2_reg = as_Register($src2$$reg);
3045     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3046   %}
3047 
3048   // compare instruction encodings
3049 
3050   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3051     MacroAssembler _masm(&cbuf);
3052     Register reg1 = as_Register($src1$$reg);
3053     Register reg2 = as_Register($src2$$reg);
3054     __ cmpw(reg1, reg2);
3055   %}
3056 
3057   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3058     MacroAssembler _masm(&cbuf);
3059     Register reg = as_Register($src1$$reg);
3060     int32_t val = $src2$$constant;
3061     if (val >= 0) {
3062       __ subsw(zr, reg, val);
3063     } else {
3064       __ addsw(zr, reg, -val);
3065     }
3066   %}
3067 
3068   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3069     MacroAssembler _masm(&cbuf);
3070     Register reg1 = as_Register($src1$$reg);
3071     u_int32_t val = (u_int32_t)$src2$$constant;
3072     __ movw(rscratch1, val);
3073     __ cmpw(reg1, rscratch1);
3074   %}
3075 
3076   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3077     MacroAssembler _masm(&cbuf);
3078     Register reg1 = as_Register($src1$$reg);
3079     Register reg2 = as_Register($src2$$reg);
3080     __ cmp(reg1, reg2);
3081   %}
3082 
3083   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3084     MacroAssembler _masm(&cbuf);
3085     Register reg = as_Register($src1$$reg);
3086     int64_t val = $src2$$constant;
3087     if (val >= 0) {
3088       __ subs(zr, reg, val);
3089     } else if (val != -val) {
3090       __ adds(zr, reg, -val);
3091     } else {
3092     // aargh, Long.MIN_VALUE is a special case
3093       __ orr(rscratch1, zr, (u_int64_t)val);
3094       __ subs(zr, reg, rscratch1);
3095     }
3096   %}
3097 
3098   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3099     MacroAssembler _masm(&cbuf);
3100     Register reg1 = as_Register($src1$$reg);
3101     u_int64_t val = (u_int64_t)$src2$$constant;
3102     __ mov(rscratch1, val);
3103     __ cmp(reg1, rscratch1);
3104   %}
3105 
3106   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3107     MacroAssembler _masm(&cbuf);
3108     Register reg1 = as_Register($src1$$reg);
3109     Register reg2 = as_Register($src2$$reg);
3110     __ cmp(reg1, reg2);
3111   %}
3112 
3113   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3114     MacroAssembler _masm(&cbuf);
3115     Register reg1 = as_Register($src1$$reg);
3116     Register reg2 = as_Register($src2$$reg);
3117     __ cmpw(reg1, reg2);
3118   %}
3119 
3120   enc_class aarch64_enc_testp(iRegP src) %{
3121     MacroAssembler _masm(&cbuf);
3122     Register reg = as_Register($src$$reg);
3123     __ cmp(reg, zr);
3124   %}
3125 
3126   enc_class aarch64_enc_testn(iRegN src) %{
3127     MacroAssembler _masm(&cbuf);
3128     Register reg = as_Register($src$$reg);
3129     __ cmpw(reg, zr);
3130   %}
3131 
3132   enc_class aarch64_enc_b(label lbl) %{
3133     MacroAssembler _masm(&cbuf);
3134     Label *L = $lbl$$label;
3135     __ b(*L);
3136   %}
3137 
3138   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3139     MacroAssembler _masm(&cbuf);
3140     Label *L = $lbl$$label;
3141     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3142   %}
3143 
3144   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3145     MacroAssembler _masm(&cbuf);
3146     Label *L = $lbl$$label;
3147     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3148   %}
3149 
3150   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3151   %{
3152      Register sub_reg = as_Register($sub$$reg);
3153      Register super_reg = as_Register($super$$reg);
3154      Register temp_reg = as_Register($temp$$reg);
3155      Register result_reg = as_Register($result$$reg);
3156 
3157      Label miss;
3158      MacroAssembler _masm(&cbuf);
3159      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3160                                      NULL, &miss,
3161                                      /*set_cond_codes:*/ true);
3162      if ($primary) {
3163        __ mov(result_reg, zr);
3164      }
3165      __ bind(miss);
3166   %}
3167 
3168   enc_class aarch64_enc_java_static_call(method meth) %{
3169     MacroAssembler _masm(&cbuf);
3170 
3171     address mark = __ pc();
3172     address addr = (address)$meth$$method;
3173     address call;
3174     if (!_method) {
3175       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3176       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3177     } else if (_optimized_virtual) {
3178       call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
3179     } else {
3180       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
3181     }
3182     if (call == NULL) {
3183       ciEnv::current()->record_failure("CodeCache is full"); 
3184       return;
3185     }
3186 
3187     if (_method) {
3188       // Emit stub for static call
3189       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
3190       if (stub == NULL) {
3191         ciEnv::current()->record_failure("CodeCache is full"); 
3192         return;
3193       }
3194     }
3195   %}
3196 
3197   enc_class aarch64_enc_java_handle_call(method meth) %{
3198     MacroAssembler _masm(&cbuf);
3199     relocInfo::relocType reloc;
3200 
3201     // RFP is preserved across all calls, even compiled calls.
3202     // Use it to preserve SP.
3203     __ mov(rfp, sp);
3204 
3205     address mark = __ pc();
3206     address addr = (address)$meth$$method;
3207     address call;
3208     if (!_method) {
3209       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3210       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3211     } else if (_optimized_virtual) {
3212       call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
3213     } else {
3214       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
3215     }
3216     if (call == NULL) {
3217       ciEnv::current()->record_failure("CodeCache is full"); 
3218       return;
3219     }
3220 
3221     if (_method) {
3222       // Emit stub for static call
3223       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
3224       if (stub == NULL) {
3225         ciEnv::current()->record_failure("CodeCache is full"); 
3226         return;
3227       }
3228     }
3229 
3230     // now restore sp
3231     __ mov(sp, rfp);
3232   %}
3233 
3234   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3235     MacroAssembler _masm(&cbuf);
3236     address call = __ ic_call((address)$meth$$method);
3237     if (call == NULL) {
3238       ciEnv::current()->record_failure("CodeCache is full"); 
3239       return;
3240     }
3241   %}
3242 
3243   enc_class aarch64_enc_call_epilog() %{
3244     MacroAssembler _masm(&cbuf);
3245     if (VerifyStackAtCalls) {
3246       // Check that stack depth is unchanged: find majik cookie on stack
3247       __ call_Unimplemented();
3248     }
3249   %}
3250 
3251   enc_class aarch64_enc_java_to_runtime(method meth) %{
3252     MacroAssembler _masm(&cbuf);
3253 
3254     // some calls to generated routines (arraycopy code) are scheduled
3255     // by C2 as runtime calls. if so we can call them using a br (they
3256     // will be in a reachable segment) otherwise we have to use a blr
3257     // which loads the absolute address into a register.
3258     address entry = (address)$meth$$method;
3259     CodeBlob *cb = CodeCache::find_blob(entry);
3260     if (cb) {
3261       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3262       if (call == NULL) {
3263         ciEnv::current()->record_failure("CodeCache is full"); 
3264         return;
3265       }
3266     } else {
3267       Label retaddr;
3268       __ adr(rscratch2, retaddr);
3269       __ lea(rscratch1, RuntimeAddress(entry));
3270       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3271       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3272       __ blr(rscratch1);
3273       __ bind(retaddr);
3274       __ add(sp, sp, 2 * wordSize);
3275     }
3276   %}
3277 
3278   enc_class aarch64_enc_rethrow() %{
3279     MacroAssembler _masm(&cbuf);
3280     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3281   %}
3282 
3283   enc_class aarch64_enc_ret() %{
3284     MacroAssembler _masm(&cbuf);
3285     __ ret(lr);
3286   %}
3287 
3288   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3289     MacroAssembler _masm(&cbuf);
3290     Register target_reg = as_Register($jump_target$$reg);
3291     __ br(target_reg);
3292   %}
3293 
3294   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3295     MacroAssembler _masm(&cbuf);
3296     Register target_reg = as_Register($jump_target$$reg);
3297     // exception oop should be in r0
3298     // ret addr has been popped into lr
3299     // callee expects it in r3
3300     __ mov(r3, lr);
3301     __ br(target_reg);
3302   %}
3303 
3304   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3305     MacroAssembler _masm(&cbuf);
3306     Register oop = as_Register($object$$reg);
3307     Register box = as_Register($box$$reg);
3308     Register disp_hdr = as_Register($tmp$$reg);
3309     Register tmp = as_Register($tmp2$$reg);
3310     Label cont;
3311     Label object_has_monitor;
3312     Label cas_failed;
3313 
3314     assert_different_registers(oop, box, tmp, disp_hdr);
3315 
3316     // Load markOop from object into displaced_header.
3317     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3318 
3319     // Always do locking in runtime.
3320     if (EmitSync & 0x01) {
3321       __ cmp(oop, zr);
3322       return;
3323     }
3324     
3325     if (UseBiasedLocking && !UseOptoBiasInlining) {
3326       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
3327     }
3328 
3329     // Handle existing monitor
3330     if ((EmitSync & 0x02) == 0) {
3331       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3332     }
3333 
3334     // Set tmp to be (markOop of object | UNLOCK_VALUE).
3335     __ orr(tmp, disp_hdr, markOopDesc::unlocked_value);
3336 
3337     // Load Compare Value application register.
3338 
3339     // Initialize the box. (Must happen before we update the object mark!)
3340     __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3341 
3342     // Compare object markOop with an unlocked value (tmp) and if
3343     // equal exchange the stack address of our box with object markOop.
3344     // On failure disp_hdr contains the possibly locked markOop.
3345     if (UseLSE) {
3346       __ mov(disp_hdr, tmp);
3347       __ casal(Assembler::xword, disp_hdr, box, oop);  // Updates disp_hdr
3348       __ cmp(tmp, disp_hdr);
3349       __ br(Assembler::EQ, cont);
3350     } else {
3351       Label retry_load;
3352       if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
3353         __ prfm(Address(oop), PSTL1STRM);
3354       __ bind(retry_load);
3355       __ ldaxr(disp_hdr, oop);
3356       __ cmp(tmp, disp_hdr);
3357       __ br(Assembler::NE, cas_failed);
3358       // use stlxr to ensure update is immediately visible
3359       __ stlxr(disp_hdr, box, oop);
3360       __ cbzw(disp_hdr, cont);
3361       __ b(retry_load);
3362     }
3363 
3364     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3365 
3366     // If the compare-and-exchange succeeded, then we found an unlocked
3367     // object, will have now locked it will continue at label cont
3368 
3369     __ bind(cas_failed);
3370     // We did not see an unlocked object so try the fast recursive case.
3371 
3372     // Check if the owner is self by comparing the value in the
3373     // markOop of object (disp_hdr) with the stack pointer.
3374     __ mov(rscratch1, sp);
3375     __ sub(disp_hdr, disp_hdr, rscratch1);
3376     __ mov(tmp, (address) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
3377     // If condition is true we are cont and hence we can store 0 as the
3378     // displaced header in the box, which indicates that it is a recursive lock.
3379     __ ands(tmp/*==0?*/, disp_hdr, tmp);
3380     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3381 
3382     // Handle existing monitor.
3383     if ((EmitSync & 0x02) == 0) {
3384       __ b(cont);
3385 
3386       __ bind(object_has_monitor);
3387       // The object's monitor m is unlocked iff m->owner == NULL,
3388       // otherwise m->owner may contain a thread or a stack address.
3389       //
3390       // Try to CAS m->owner from NULL to current thread.
3391       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
3392       __ mov(disp_hdr, zr);
3393 
3394       if (UseLSE) {
3395         __ mov(rscratch1, disp_hdr);
3396         __ casal(Assembler::xword, rscratch1, rthread, tmp);
3397         __ cmp(rscratch1, disp_hdr);
3398       } else {
3399         Label retry_load, fail;
3400         if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
3401           __ prfm(Address(tmp), PSTL1STRM);
3402         __ bind(retry_load);
3403         __ ldaxr(rscratch1, tmp);
3404         __ cmp(disp_hdr, rscratch1);
3405         __ br(Assembler::NE, fail);
3406         // use stlxr to ensure update is immediately visible
3407         __ stlxr(rscratch1, rthread, tmp);
3408         __ cbnzw(rscratch1, retry_load);
3409         __ bind(fail);
3410       }
3411 
3412       // Store a non-null value into the box to avoid looking like a re-entrant
3413       // lock. The fast-path monitor unlock code checks for
3414       // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
3415       // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
3416       __ mov(tmp, (address)markOopDesc::unused_mark());
3417       __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3418     }
3419 
3420     __ bind(cont);
3421     // flag == EQ indicates success
3422     // flag == NE indicates failure
3423   %}
3424 
3425   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3426     MacroAssembler _masm(&cbuf);
3427     Register oop = as_Register($object$$reg);
3428     Register box = as_Register($box$$reg);
3429     Register disp_hdr = as_Register($tmp$$reg);
3430     Register tmp = as_Register($tmp2$$reg);
3431     Label cont;
3432     Label object_has_monitor;
3433 
3434     assert_different_registers(oop, box, tmp, disp_hdr);
3435 
3436     // Always do locking in runtime.
3437     if (EmitSync & 0x01) {
3438       __ cmp(oop, zr); // Oop can't be 0 here => always false.
3439       return;
3440     }
3441 
3442     if (UseBiasedLocking && !UseOptoBiasInlining) {
3443       __ biased_locking_exit(oop, tmp, cont);
3444     }
3445 
3446     // Find the lock address and load the displaced header from the stack.
3447     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3448 
3449     // If the displaced header is 0, we have a recursive unlock.
3450     __ cmp(disp_hdr, zr);
3451     __ br(Assembler::EQ, cont);
3452 
3453     // Handle existing monitor.
3454     if ((EmitSync & 0x02) == 0) {
3455       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
3456       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3457     }
3458 
3459     // Check if it is still a light weight lock, this is is true if we
3460     // see the stack address of the basicLock in the markOop of the
3461     // object.
3462 
3463     if (UseLSE) {
3464       __ mov(tmp, box);
3465       __ casl(Assembler::xword, tmp, disp_hdr, oop);
3466       __ cmp(tmp, box);
3467       __ b(cont);
3468     } else {
3469       Label retry_load;
3470       if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
3471         __ prfm(Address(oop), PSTL1STRM);
3472       __ bind(retry_load);
3473       __ ldxr(tmp, oop);
3474       __ cmp(box, tmp);
3475       __ br(Assembler::NE, cont);
3476       // use stlxr to ensure update is immediately visible
3477       __ stlxr(tmp, disp_hdr, oop);
3478       __ cbzw(tmp, cont);
3479       __ b(retry_load);
3480     }
3481 
3482     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3483 
3484     // Handle existing monitor.
3485     if ((EmitSync & 0x02) == 0) {
3486       __ bind(object_has_monitor);
3487       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
3488       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3489       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
3490       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
3491       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
3492       __ cmp(rscratch1, zr);
3493       __ br(Assembler::NE, cont);
3494 
3495       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
3496       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
3497       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
3498       __ cmp(rscratch1, zr);
3499       __ br(Assembler::NE, cont);
3500       // need a release store here
3501       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3502       __ stlr(zr, tmp); // set unowned
3503     }
3504 
3505     __ bind(cont);
3506     // flag == EQ indicates success
3507     // flag == NE indicates failure
3508   %}
3509 
3510 %}
3511 
3512 //----------FRAME--------------------------------------------------------------
3513 // Definition of frame structure and management information.
3514 //
3515 //  S T A C K   L A Y O U T    Allocators stack-slot number
3516 //                             |   (to get allocators register number
3517 //  G  Owned by    |        |  v    add OptoReg::stack0())
3518 //  r   CALLER     |        |
3519 //  o     |        +--------+      pad to even-align allocators stack-slot
3520 //  w     V        |  pad0  |        numbers; owned by CALLER
3521 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3522 //  h     ^        |   in   |  5
3523 //        |        |  args  |  4   Holes in incoming args owned by SELF
3524 //  |     |        |        |  3
3525 //  |     |        +--------+
3526 //  V     |        | old out|      Empty on Intel, window on Sparc
3527 //        |    old |preserve|      Must be even aligned.
3528 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3529 //        |        |   in   |  3   area for Intel ret address
3530 //     Owned by    |preserve|      Empty on Sparc.
3531 //       SELF      +--------+
3532 //        |        |  pad2  |  2   pad to align old SP
3533 //        |        +--------+  1
3534 //        |        | locks  |  0
3535 //        |        +--------+----> OptoReg::stack0(), even aligned
3536 //        |        |  pad1  | 11   pad to align new SP
3537 //        |        +--------+
3538 //        |        |        | 10
3539 //        |        | spills |  9   spills
3540 //        V        |        |  8   (pad0 slot for callee)
3541 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3542 //        ^        |  out   |  7
3543 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3544 //     Owned by    +--------+
3545 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3546 //        |    new |preserve|      Must be even-aligned.
3547 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3548 //        |        |        |
3549 //
3550 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3551 //         known from SELF's arguments and the Java calling convention.
3552 //         Region 6-7 is determined per call site.
3553 // Note 2: If the calling convention leaves holes in the incoming argument
3554 //         area, those holes are owned by SELF.  Holes in the outgoing area
3555 //         are owned by the CALLEE.  Holes should not be nessecary in the
3556 //         incoming area, as the Java calling convention is completely under
3557 //         the control of the AD file.  Doubles can be sorted and packed to
3558 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3559 //         varargs C calling conventions.
3560 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3561 //         even aligned with pad0 as needed.
3562 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3563 //           (the latter is true on Intel but is it false on AArch64?)
3564 //         region 6-11 is even aligned; it may be padded out more so that
3565 //         the region from SP to FP meets the minimum stack alignment.
3566 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3567 //         alignment.  Region 11, pad1, may be dynamically extended so that
3568 //         SP meets the minimum alignment.
3569 
3570 frame %{
3571   // What direction does stack grow in (assumed to be same for C & Java)
3572   stack_direction(TOWARDS_LOW);
3573 
3574   // These three registers define part of the calling convention
3575   // between compiled code and the interpreter.
3576 
3577   // Inline Cache Register or methodOop for I2C.
3578   inline_cache_reg(R12);
3579 
3580   // Method Oop Register when calling interpreter.
3581   interpreter_method_oop_reg(R12);
3582 
3583   // Number of stack slots consumed by locking an object
3584   sync_stack_slots(2);
3585 
3586   // Compiled code's Frame Pointer
3587   frame_pointer(R31);
3588 
3589   // Interpreter stores its frame pointer in a register which is
3590   // stored to the stack by I2CAdaptors.
3591   // I2CAdaptors convert from interpreted java to compiled java.
3592   interpreter_frame_pointer(R29);
3593 
3594   // Stack alignment requirement
3595   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3596 
3597   // Number of stack slots between incoming argument block and the start of
3598   // a new frame.  The PROLOG must add this many slots to the stack.  The
3599   // EPILOG must remove this many slots. aarch64 needs two slots for
3600   // return address and fp.
3601   // TODO think this is correct but check
3602   in_preserve_stack_slots(4);
3603 
3604   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3605   // for calls to C.  Supports the var-args backing area for register parms.
3606   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3607 
3608   // The after-PROLOG location of the return address.  Location of
3609   // return address specifies a type (REG or STACK) and a number
3610   // representing the register number (i.e. - use a register name) or
3611   // stack slot.
3612   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3613   // Otherwise, it is above the locks and verification slot and alignment word
3614   // TODO this may well be correct but need to check why that - 2 is there
3615   // ppc port uses 0 but we definitely need to allow for fixed_slots
3616   // which folds in the space used for monitors
3617   return_addr(STACK - 2 +
3618               round_to((Compile::current()->in_preserve_stack_slots() +
3619                         Compile::current()->fixed_slots()),
3620                        stack_alignment_in_slots()));
3621 
3622   // Body of function which returns an integer array locating
3623   // arguments either in registers or in stack slots.  Passed an array
3624   // of ideal registers called "sig" and a "length" count.  Stack-slot
3625   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3626   // arguments for a CALLEE.  Incoming stack arguments are
3627   // automatically biased by the preserve_stack_slots field above.
3628 
3629   calling_convention
3630   %{
3631     // No difference between ingoing/outgoing just pass false
3632     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3633   %}
3634 
3635   c_calling_convention
3636   %{
3637     // This is obviously always outgoing
3638     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
3639   %}
3640 
3641   // Location of compiled Java return values.  Same as C for now.
3642   return_value
3643   %{
3644     // TODO do we allow ideal_reg == Op_RegN???
3645     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3646            "only return normal values");
3647 
3648     static const int lo[Op_RegL + 1] = { // enum name
3649       0,                                 // Op_Node
3650       0,                                 // Op_Set
3651       R0_num,                            // Op_RegN
3652       R0_num,                            // Op_RegI
3653       R0_num,                            // Op_RegP
3654       V0_num,                            // Op_RegF
3655       V0_num,                            // Op_RegD
3656       R0_num                             // Op_RegL
3657     };
3658   
3659     static const int hi[Op_RegL + 1] = { // enum name
3660       0,                                 // Op_Node
3661       0,                                 // Op_Set
3662       OptoReg::Bad,                       // Op_RegN
3663       OptoReg::Bad,                      // Op_RegI
3664       R0_H_num,                          // Op_RegP
3665       OptoReg::Bad,                      // Op_RegF
3666       V0_H_num,                          // Op_RegD
3667       R0_H_num                           // Op_RegL
3668     };
3669 
3670     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3671   %}
3672 %}
3673 
3674 //----------ATTRIBUTES---------------------------------------------------------
3675 //----------Operand Attributes-------------------------------------------------
3676 op_attrib op_cost(1);        // Required cost attribute
3677 
3678 //----------Instruction Attributes---------------------------------------------
3679 ins_attrib ins_cost(INSN_COST); // Required cost attribute
3680 ins_attrib ins_size(32);        // Required size attribute (in bits)
3681 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3682                                 // a non-matching short branch variant
3683                                 // of some long branch?
3684 ins_attrib ins_alignment(4);    // Required alignment attribute (must
3685                                 // be a power of 2) specifies the
3686                                 // alignment that some part of the
3687                                 // instruction (not necessarily the
3688                                 // start) requires.  If > 1, a
3689                                 // compute_padding() function must be
3690                                 // provided for the instruction
3691 
3692 //----------OPERANDS-----------------------------------------------------------
3693 // Operand definitions must precede instruction definitions for correct parsing
3694 // in the ADLC because operands constitute user defined types which are used in
3695 // instruction definitions.
3696 
3697 //----------Simple Operands----------------------------------------------------
3698 
3699 // Integer operands 32 bit
3700 // 32 bit immediate
3701 operand immI()
3702 %{
3703   match(ConI);
3704 
3705   op_cost(0);
3706   format %{ %}
3707   interface(CONST_INTER);
3708 %}
3709 
3710 // 32 bit zero
3711 operand immI0()
3712 %{
3713   predicate(n->get_int() == 0);
3714   match(ConI);
3715 
3716   op_cost(0);
3717   format %{ %}
3718   interface(CONST_INTER);
3719 %}
3720 
3721 // 32 bit unit increment
3722 operand immI_1()
3723 %{
3724   predicate(n->get_int() == 1);
3725   match(ConI);
3726 
3727   op_cost(0);
3728   format %{ %}
3729   interface(CONST_INTER);
3730 %}
3731 
3732 // 32 bit unit decrement
3733 operand immI_M1()
3734 %{
3735   predicate(n->get_int() == -1);
3736   match(ConI);
3737 
3738   op_cost(0);
3739   format %{ %}
3740   interface(CONST_INTER);
3741 %}
3742 
3743 operand immI_le_4()
3744 %{
3745   predicate(n->get_int() <= 4);
3746   match(ConI);
3747 
3748   op_cost(0);
3749   format %{ %}
3750   interface(CONST_INTER);
3751 %}
3752 
3753 operand immI_31()
3754 %{
3755   predicate(n->get_int() == 31);
3756   match(ConI);
3757 
3758   op_cost(0);
3759   format %{ %}
3760   interface(CONST_INTER);
3761 %}
3762 
3763 operand immI_8()
3764 %{
3765   predicate(n->get_int() == 8);
3766   match(ConI);
3767 
3768   op_cost(0);
3769   format %{ %}
3770   interface(CONST_INTER);
3771 %}
3772 
3773 operand immI_16()
3774 %{
3775   predicate(n->get_int() == 16);
3776   match(ConI);
3777 
3778   op_cost(0);
3779   format %{ %}
3780   interface(CONST_INTER);
3781 %}
3782 
3783 operand immI_24()
3784 %{
3785   predicate(n->get_int() == 24);
3786   match(ConI);
3787 
3788   op_cost(0);
3789   format %{ %}
3790   interface(CONST_INTER);
3791 %}
3792 
3793 operand immI_32()
3794 %{
3795   predicate(n->get_int() == 32);
3796   match(ConI);
3797 
3798   op_cost(0);
3799   format %{ %}
3800   interface(CONST_INTER);
3801 %}
3802 
3803 operand immI_48()
3804 %{
3805   predicate(n->get_int() == 48);
3806   match(ConI);
3807 
3808   op_cost(0);
3809   format %{ %}
3810   interface(CONST_INTER);
3811 %}
3812 
3813 operand immI_56()
3814 %{
3815   predicate(n->get_int() == 56);
3816   match(ConI);
3817 
3818   op_cost(0);
3819   format %{ %}
3820   interface(CONST_INTER);
3821 %}
3822 
3823 operand immI_64()
3824 %{
3825   predicate(n->get_int() == 64);
3826   match(ConI);
3827 
3828   op_cost(0);
3829   format %{ %}
3830   interface(CONST_INTER);
3831 %}
3832 
3833 operand immI_255()
3834 %{
3835   predicate(n->get_int() == 255);
3836   match(ConI);
3837 
3838   op_cost(0);
3839   format %{ %}
3840   interface(CONST_INTER);
3841 %}
3842 
3843 operand immI_65535()
3844 %{
3845   predicate(n->get_int() == 65535);
3846   match(ConI);
3847 
3848   op_cost(0);
3849   format %{ %}
3850   interface(CONST_INTER);
3851 %}
3852 
3853 operand immL_63()
3854 %{
3855   predicate(n->get_int() == 63);
3856   match(ConI);
3857 
3858   op_cost(0);
3859   format %{ %}
3860   interface(CONST_INTER);
3861 %}
3862 
3863 operand immL_255()
3864 %{
3865   predicate(n->get_int() == 255);
3866   match(ConI);
3867 
3868   op_cost(0);
3869   format %{ %}
3870   interface(CONST_INTER);
3871 %}
3872 
3873 operand immL_65535()
3874 %{
3875   predicate(n->get_long() == 65535L);
3876   match(ConL);
3877 
3878   op_cost(0);
3879   format %{ %}
3880   interface(CONST_INTER);
3881 %}
3882 
3883 operand immL_4294967295()
3884 %{
3885   predicate(n->get_long() == 4294967295L);
3886   match(ConL);
3887 
3888   op_cost(0);
3889   format %{ %}
3890   interface(CONST_INTER);
3891 %}
3892 
3893 operand immL_bitmask()
3894 %{
3895   predicate((n->get_long() != 0)
3896             && ((n->get_long() & 0xc000000000000000l) == 0)
3897             && is_power_of_2(n->get_long() + 1));
3898   match(ConL);
3899 
3900   op_cost(0);
3901   format %{ %}
3902   interface(CONST_INTER);
3903 %}
3904 
3905 operand immI_bitmask()
3906 %{
3907   predicate((n->get_int() != 0)
3908             && ((n->get_int() & 0xc0000000) == 0)
3909             && is_power_of_2(n->get_int() + 1));
3910   match(ConI);
3911 
3912   op_cost(0);
3913   format %{ %}
3914   interface(CONST_INTER);
3915 %}
3916 
3917 // Scale values for scaled offset addressing modes (up to long but not quad)
3918 operand immIScale()
3919 %{
3920   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3921   match(ConI);
3922 
3923   op_cost(0);
3924   format %{ %}
3925   interface(CONST_INTER);
3926 %}
3927 
3928 // 26 bit signed offset -- for pc-relative branches
3929 operand immI26()
3930 %{
3931   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
3932   match(ConI);
3933 
3934   op_cost(0);
3935   format %{ %}
3936   interface(CONST_INTER);
3937 %}
3938 
3939 // 19 bit signed offset -- for pc-relative loads
3940 operand immI19()
3941 %{
3942   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
3943   match(ConI);
3944 
3945   op_cost(0);
3946   format %{ %}
3947   interface(CONST_INTER);
3948 %}
3949 
3950 // 12 bit unsigned offset -- for base plus immediate loads
3951 operand immIU12()
3952 %{
3953   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
3954   match(ConI);
3955 
3956   op_cost(0);
3957   format %{ %}
3958   interface(CONST_INTER);
3959 %}
3960 
3961 operand immLU12()
3962 %{
3963   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
3964   match(ConL);
3965 
3966   op_cost(0);
3967   format %{ %}
3968   interface(CONST_INTER);
3969 %}
3970 
3971 // Offset for scaled or unscaled immediate loads and stores
3972 operand immIOffset()
3973 %{
3974   predicate(Address::offset_ok_for_immed(n->get_int()));
3975   match(ConI);
3976 
3977   op_cost(0);
3978   format %{ %}
3979   interface(CONST_INTER);
3980 %}
3981 
3982 operand immIOffset4()
3983 %{
3984   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
3985   match(ConI);
3986 
3987   op_cost(0);
3988   format %{ %}
3989   interface(CONST_INTER);
3990 %}
3991 
3992 operand immIOffset8()
3993 %{
3994   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
3995   match(ConI);
3996 
3997   op_cost(0);
3998   format %{ %}
3999   interface(CONST_INTER);
4000 %}
4001 
4002 operand immIOffset16()
4003 %{
4004   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
4005   match(ConI);
4006 
4007   op_cost(0);
4008   format %{ %}
4009   interface(CONST_INTER);
4010 %}
4011 
4012 operand immLoffset()
4013 %{
4014   predicate(Address::offset_ok_for_immed(n->get_long()));
4015   match(ConL);
4016 
4017   op_cost(0);
4018   format %{ %}
4019   interface(CONST_INTER);
4020 %}
4021 
4022 operand immLoffset4()
4023 %{
4024   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
4025   match(ConL);
4026 
4027   op_cost(0);
4028   format %{ %}
4029   interface(CONST_INTER);
4030 %}
4031 
4032 operand immLoffset8()
4033 %{
4034   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
4035   match(ConL);
4036 
4037   op_cost(0);
4038   format %{ %}
4039   interface(CONST_INTER);
4040 %}
4041 
4042 operand immLoffset16()
4043 %{
4044   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
4045   match(ConL);
4046 
4047   op_cost(0);
4048   format %{ %}
4049   interface(CONST_INTER);
4050 %}
4051 
4052 // 32 bit integer valid for add sub immediate
4053 operand immIAddSub()
4054 %{
4055   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4056   match(ConI);
4057   op_cost(0);
4058   format %{ %}
4059   interface(CONST_INTER);
4060 %}
4061 
4062 // 32 bit unsigned integer valid for logical immediate
4063 // TODO -- check this is right when e.g the mask is 0x80000000
4064 operand immILog()
4065 %{
4066   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4067   match(ConI);
4068 
4069   op_cost(0);
4070   format %{ %}
4071   interface(CONST_INTER);
4072 %}
4073 
4074 // Integer operands 64 bit
4075 // 64 bit immediate
4076 operand immL()
4077 %{
4078   match(ConL);
4079 
4080   op_cost(0);
4081   format %{ %}
4082   interface(CONST_INTER);
4083 %}
4084 
4085 // 64 bit zero
4086 operand immL0()
4087 %{
4088   predicate(n->get_long() == 0);
4089   match(ConL);
4090 
4091   op_cost(0);
4092   format %{ %}
4093   interface(CONST_INTER);
4094 %}
4095 
4096 // 64 bit unit increment
4097 operand immL_1()
4098 %{
4099   predicate(n->get_long() == 1);
4100   match(ConL);
4101 
4102   op_cost(0);
4103   format %{ %}
4104   interface(CONST_INTER);
4105 %}
4106 
4107 // 64 bit unit decrement
4108 operand immL_M1()
4109 %{
4110   predicate(n->get_long() == -1);
4111   match(ConL);
4112 
4113   op_cost(0);
4114   format %{ %}
4115   interface(CONST_INTER);
4116 %}
4117 
4118 // 32 bit offset of pc in thread anchor
4119 
4120 operand immL_pc_off()
4121 %{
4122   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4123                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4124   match(ConL);
4125 
4126   op_cost(0);
4127   format %{ %}
4128   interface(CONST_INTER);
4129 %}
4130 
4131 // 64 bit integer valid for add sub immediate
4132 operand immLAddSub()
4133 %{
4134   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4135   match(ConL);
4136   op_cost(0);
4137   format %{ %}
4138   interface(CONST_INTER);
4139 %}
4140 
4141 // 64 bit integer valid for logical immediate
4142 operand immLLog()
4143 %{
4144   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4145   match(ConL);
4146   op_cost(0);
4147   format %{ %}
4148   interface(CONST_INTER);
4149 %}
4150 
4151 // Long Immediate: low 32-bit mask
4152 operand immL_32bits()
4153 %{
4154   predicate(n->get_long() == 0xFFFFFFFFL);
4155   match(ConL);
4156   op_cost(0);
4157   format %{ %}
4158   interface(CONST_INTER);
4159 %}
4160 
4161 // Pointer operands
4162 // Pointer Immediate
4163 operand immP()
4164 %{
4165   match(ConP);
4166 
4167   op_cost(0);
4168   format %{ %}
4169   interface(CONST_INTER);
4170 %}
4171 
4172 // NULL Pointer Immediate
4173 operand immP0()
4174 %{
4175   predicate(n->get_ptr() == 0);
4176   match(ConP);
4177 
4178   op_cost(0);
4179   format %{ %}
4180   interface(CONST_INTER);
4181 %}
4182 
4183 // Pointer Immediate One
4184 // this is used in object initialization (initial object header)
4185 operand immP_1()
4186 %{
4187   predicate(n->get_ptr() == 1);
4188   match(ConP);
4189 
4190   op_cost(0);
4191   format %{ %}
4192   interface(CONST_INTER);
4193 %}
4194 
4195 // Polling Page Pointer Immediate
4196 operand immPollPage()
4197 %{
4198   predicate((address)n->get_ptr() == os::get_polling_page());
4199   match(ConP);
4200 
4201   op_cost(0);
4202   format %{ %}
4203   interface(CONST_INTER);
4204 %}
4205 
4206 // Card Table Byte Map Base
4207 operand immByteMapBase()
4208 %{
4209   // Get base of card map
4210   predicate((jbyte*)n->get_ptr() ==
4211         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
4212   match(ConP);
4213 
4214   op_cost(0);
4215   format %{ %}
4216   interface(CONST_INTER);
4217 %}
4218 
4219 // Pointer Immediate Minus One
4220 // this is used when we want to write the current PC to the thread anchor
4221 operand immP_M1()
4222 %{
4223   predicate(n->get_ptr() == -1);
4224   match(ConP);
4225 
4226   op_cost(0);
4227   format %{ %}
4228   interface(CONST_INTER);
4229 %}
4230 
4231 // Pointer Immediate Minus Two
4232 // this is used when we want to write the current PC to the thread anchor
4233 operand immP_M2()
4234 %{
4235   predicate(n->get_ptr() == -2);
4236   match(ConP);
4237 
4238   op_cost(0);
4239   format %{ %}
4240   interface(CONST_INTER);
4241 %}
4242 
4243 // Float and Double operands
4244 // Double Immediate
4245 operand immD()
4246 %{
4247   match(ConD);
4248   op_cost(0);
4249   format %{ %}
4250   interface(CONST_INTER);
4251 %}
4252 
4253 // constant 'double +0.0'.
4254 operand immD0()
4255 %{
4256   predicate((n->getd() == 0) &&
4257             (fpclassify(n->getd()) == FP_ZERO) && (signbit(n->getd()) == 0));
4258   match(ConD);
4259   op_cost(0);
4260   format %{ %}
4261   interface(CONST_INTER);
4262 %}
4263 
4264 // constant 'double +0.0'.
4265 operand immDPacked()
4266 %{
4267   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4268   match(ConD);
4269   op_cost(0);
4270   format %{ %}
4271   interface(CONST_INTER);
4272 %}
4273 
4274 // Float Immediate
4275 operand immF()
4276 %{
4277   match(ConF);
4278   op_cost(0);
4279   format %{ %}
4280   interface(CONST_INTER);
4281 %}
4282 
4283 // constant 'float +0.0'.
4284 operand immF0()
4285 %{
4286   predicate((n->getf() == 0) &&
4287             (fpclassify(n->getf()) == FP_ZERO) && (signbit(n->getf()) == 0));
4288   match(ConF);
4289   op_cost(0);
4290   format %{ %}
4291   interface(CONST_INTER);
4292 %}
4293 
4294 // 
4295 operand immFPacked()
4296 %{
4297   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4298   match(ConF);
4299   op_cost(0);
4300   format %{ %}
4301   interface(CONST_INTER);
4302 %}
4303 
4304 // Narrow pointer operands
4305 // Narrow Pointer Immediate
4306 operand immN()
4307 %{
4308   match(ConN);
4309 
4310   op_cost(0);
4311   format %{ %}
4312   interface(CONST_INTER);
4313 %}
4314 
4315 // Narrow NULL Pointer Immediate
4316 operand immN0()
4317 %{
4318   predicate(n->get_narrowcon() == 0);
4319   match(ConN);
4320 
4321   op_cost(0);
4322   format %{ %}
4323   interface(CONST_INTER);
4324 %}
4325 
4326 operand immNKlass()
4327 %{
4328   match(ConNKlass);
4329 
4330   op_cost(0);
4331   format %{ %}
4332   interface(CONST_INTER);
4333 %}
4334 
4335 // Integer 32 bit Register Operands
4336 // Integer 32 bitRegister (excludes SP)
4337 operand iRegI()
4338 %{
4339   constraint(ALLOC_IN_RC(any_reg32));
4340   match(RegI);
4341   match(iRegINoSp);
4342   op_cost(0);
4343   format %{ %}
4344   interface(REG_INTER);
4345 %}
4346 
4347 // Integer 32 bit Register not Special
4348 operand iRegINoSp()
4349 %{
4350   constraint(ALLOC_IN_RC(no_special_reg32));
4351   match(RegI);
4352   op_cost(0);
4353   format %{ %}
4354   interface(REG_INTER);
4355 %}
4356 
4357 // Integer 64 bit Register Operands
4358 // Integer 64 bit Register (includes SP)
4359 operand iRegL()
4360 %{
4361   constraint(ALLOC_IN_RC(any_reg));
4362   match(RegL);
4363   match(iRegLNoSp);
4364   op_cost(0);
4365   format %{ %}
4366   interface(REG_INTER);
4367 %}
4368 
4369 // Integer 64 bit Register not Special
4370 operand iRegLNoSp()
4371 %{
4372   constraint(ALLOC_IN_RC(no_special_reg));
4373   match(RegL);
4374   format %{ %}
4375   interface(REG_INTER);
4376 %}
4377 
4378 // Pointer Register Operands
4379 // Pointer Register
4380 operand iRegP()
4381 %{
4382   constraint(ALLOC_IN_RC(ptr_reg));
4383   match(RegP);
4384   match(iRegPNoSp);
4385   match(iRegP_R0);
4386   //match(iRegP_R2);
4387   //match(iRegP_R4);
4388   //match(iRegP_R5);
4389   match(thread_RegP);
4390   op_cost(0);
4391   format %{ %}
4392   interface(REG_INTER);
4393 %}
4394 
4395 // Pointer 64 bit Register not Special
4396 operand iRegPNoSp()
4397 %{
4398   constraint(ALLOC_IN_RC(no_special_ptr_reg));
4399   match(RegP);
4400   // match(iRegP);
4401   // match(iRegP_R0);
4402   // match(iRegP_R2);
4403   // match(iRegP_R4);
4404   // match(iRegP_R5);
4405   // match(thread_RegP);
4406   op_cost(0);
4407   format %{ %}
4408   interface(REG_INTER);
4409 %}
4410 
4411 // Pointer 64 bit Register R0 only
4412 operand iRegP_R0()
4413 %{
4414   constraint(ALLOC_IN_RC(r0_reg));
4415   match(RegP);
4416   // match(iRegP);
4417   match(iRegPNoSp);
4418   op_cost(0);
4419   format %{ %}
4420   interface(REG_INTER);
4421 %}
4422 
4423 // Pointer 64 bit Register R1 only
4424 operand iRegP_R1()
4425 %{
4426   constraint(ALLOC_IN_RC(r1_reg));
4427   match(RegP);
4428   // match(iRegP);
4429   match(iRegPNoSp);
4430   op_cost(0);
4431   format %{ %}
4432   interface(REG_INTER);
4433 %}
4434 
4435 // Pointer 64 bit Register R2 only
4436 operand iRegP_R2()
4437 %{
4438   constraint(ALLOC_IN_RC(r2_reg));
4439   match(RegP);
4440   // match(iRegP);
4441   match(iRegPNoSp);
4442   op_cost(0);
4443   format %{ %}
4444   interface(REG_INTER);
4445 %}
4446 
4447 // Pointer 64 bit Register R3 only
4448 operand iRegP_R3()
4449 %{
4450   constraint(ALLOC_IN_RC(r3_reg));
4451   match(RegP);
4452   // match(iRegP);
4453   match(iRegPNoSp);
4454   op_cost(0);
4455   format %{ %}
4456   interface(REG_INTER);
4457 %}
4458 
4459 // Pointer 64 bit Register R4 only
4460 operand iRegP_R4()
4461 %{
4462   constraint(ALLOC_IN_RC(r4_reg));
4463   match(RegP);
4464   // match(iRegP);
4465   match(iRegPNoSp);
4466   op_cost(0);
4467   format %{ %}
4468   interface(REG_INTER);
4469 %}
4470 
4471 // Pointer 64 bit Register R5 only
4472 operand iRegP_R5()
4473 %{
4474   constraint(ALLOC_IN_RC(r5_reg));
4475   match(RegP);
4476   // match(iRegP);
4477   match(iRegPNoSp);
4478   op_cost(0);
4479   format %{ %}
4480   interface(REG_INTER);
4481 %}
4482 
4483 // Pointer 64 bit Register R10 only
4484 operand iRegP_R10()
4485 %{
4486   constraint(ALLOC_IN_RC(r10_reg));
4487   match(RegP);
4488   // match(iRegP);
4489   match(iRegPNoSp);
4490   op_cost(0);
4491   format %{ %}
4492   interface(REG_INTER);
4493 %}
4494 
4495 // Long 64 bit Register R11 only
4496 operand iRegL_R11()
4497 %{
4498   constraint(ALLOC_IN_RC(r11_reg));
4499   match(RegL);
4500   match(iRegLNoSp);
4501   op_cost(0);
4502   format %{ %}
4503   interface(REG_INTER);
4504 %}
4505 
4506 // Pointer 64 bit Register FP only
4507 operand iRegP_FP()
4508 %{
4509   constraint(ALLOC_IN_RC(fp_reg));
4510   match(RegP);
4511   // match(iRegP);
4512   op_cost(0);
4513   format %{ %}
4514   interface(REG_INTER);
4515 %}
4516 
4517 // Register R0 only
4518 operand iRegI_R0()
4519 %{
4520   constraint(ALLOC_IN_RC(int_r0_reg));
4521   match(RegI);
4522   match(iRegINoSp);
4523   op_cost(0);
4524   format %{ %}
4525   interface(REG_INTER);
4526 %}
4527 
4528 // Register R2 only
4529 operand iRegI_R2()
4530 %{
4531   constraint(ALLOC_IN_RC(int_r2_reg));
4532   match(RegI);
4533   match(iRegINoSp);
4534   op_cost(0);
4535   format %{ %}
4536   interface(REG_INTER);
4537 %}
4538 
4539 // Register R3 only
4540 operand iRegI_R3()
4541 %{
4542   constraint(ALLOC_IN_RC(int_r3_reg));
4543   match(RegI);
4544   match(iRegINoSp);
4545   op_cost(0);
4546   format %{ %}
4547   interface(REG_INTER);
4548 %}
4549 
4550 
4551 // Register R2 only
4552 operand iRegI_R4()
4553 %{
4554   constraint(ALLOC_IN_RC(int_r4_reg));
4555   match(RegI);
4556   match(iRegINoSp);
4557   op_cost(0);
4558   format %{ %}
4559   interface(REG_INTER);
4560 %}
4561 
4562 
4563 // Pointer Register Operands
4564 // Narrow Pointer Register
4565 operand iRegN()
4566 %{
4567   constraint(ALLOC_IN_RC(any_reg32));
4568   match(RegN);
4569   match(iRegNNoSp);
4570   op_cost(0);
4571   format %{ %}
4572   interface(REG_INTER);
4573 %}
4574 
4575 // Integer 64 bit Register not Special
4576 operand iRegNNoSp()
4577 %{
4578   constraint(ALLOC_IN_RC(no_special_reg32));
4579   match(RegN);
4580   op_cost(0);
4581   format %{ %}
4582   interface(REG_INTER);
4583 %}
4584 
4585 // heap base register -- used for encoding immN0
4586 
4587 operand iRegIHeapbase()
4588 %{
4589   constraint(ALLOC_IN_RC(heapbase_reg));
4590   match(RegI);
4591   op_cost(0);
4592   format %{ %}
4593   interface(REG_INTER);
4594 %}
4595 
4596 // Float Register
4597 // Float register operands
4598 operand vRegF()
4599 %{
4600   constraint(ALLOC_IN_RC(float_reg));
4601   match(RegF);
4602 
4603   op_cost(0);
4604   format %{ %}
4605   interface(REG_INTER);
4606 %}
4607 
4608 // Double Register
4609 // Double register operands
4610 operand vRegD()
4611 %{
4612   constraint(ALLOC_IN_RC(double_reg));
4613   match(RegD);
4614 
4615   op_cost(0);
4616   format %{ %}
4617   interface(REG_INTER);
4618 %}
4619 
4620 operand vecD()
4621 %{
4622   constraint(ALLOC_IN_RC(vectord_reg));
4623   match(VecD);
4624 
4625   op_cost(0);
4626   format %{ %}
4627   interface(REG_INTER);
4628 %}
4629 
4630 operand vecX()
4631 %{
4632   constraint(ALLOC_IN_RC(vectorx_reg));
4633   match(VecX);
4634 
4635   op_cost(0);
4636   format %{ %}
4637   interface(REG_INTER);
4638 %}
4639 
4640 operand vRegD_V0()
4641 %{
4642   constraint(ALLOC_IN_RC(v0_reg));
4643   match(RegD);
4644   op_cost(0);
4645   format %{ %}
4646   interface(REG_INTER);
4647 %}
4648 
4649 operand vRegD_V1()
4650 %{
4651   constraint(ALLOC_IN_RC(v1_reg));
4652   match(RegD);
4653   op_cost(0);
4654   format %{ %}
4655   interface(REG_INTER);
4656 %}
4657 
4658 operand vRegD_V2()
4659 %{
4660   constraint(ALLOC_IN_RC(v2_reg));
4661   match(RegD);
4662   op_cost(0);
4663   format %{ %}
4664   interface(REG_INTER);
4665 %}
4666 
4667 operand vRegD_V3()
4668 %{
4669   constraint(ALLOC_IN_RC(v3_reg));
4670   match(RegD);
4671   op_cost(0);
4672   format %{ %}
4673   interface(REG_INTER);
4674 %}
4675 
4676 // Flags register, used as output of signed compare instructions
4677 
4678 // note that on AArch64 we also use this register as the output for
4679 // for floating point compare instructions (CmpF CmpD). this ensures
4680 // that ordered inequality tests use GT, GE, LT or LE none of which
4681 // pass through cases where the result is unordered i.e. one or both
4682 // inputs to the compare is a NaN. this means that the ideal code can
4683 // replace e.g. a GT with an LE and not end up capturing the NaN case
4684 // (where the comparison should always fail). EQ and NE tests are
4685 // always generated in ideal code so that unordered folds into the NE
4686 // case, matching the behaviour of AArch64 NE.
4687 //
4688 // This differs from x86 where the outputs of FP compares use a
4689 // special FP flags registers and where compares based on this
4690 // register are distinguished into ordered inequalities (cmpOpUCF) and
4691 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
4692 // to explicitly handle the unordered case in branches. x86 also has
4693 // to include extra CMoveX rules to accept a cmpOpUCF input.
4694 
4695 operand rFlagsReg()
4696 %{
4697   constraint(ALLOC_IN_RC(int_flags));
4698   match(RegFlags);
4699 
4700   op_cost(0);
4701   format %{ "RFLAGS" %}
4702   interface(REG_INTER);
4703 %}
4704 
4705 // Flags register, used as output of unsigned compare instructions
4706 operand rFlagsRegU()
4707 %{
4708   constraint(ALLOC_IN_RC(int_flags));
4709   match(RegFlags);
4710 
4711   op_cost(0);
4712   format %{ "RFLAGSU" %}
4713   interface(REG_INTER);
4714 %}
4715 
4716 // Special Registers
4717 
4718 // Method Register
4719 operand inline_cache_RegP(iRegP reg)
4720 %{
4721   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
4722   match(reg);
4723   match(iRegPNoSp);
4724   op_cost(0);
4725   format %{ %}
4726   interface(REG_INTER);
4727 %}
4728 
4729 operand interpreter_method_oop_RegP(iRegP reg)
4730 %{
4731   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
4732   match(reg);
4733   match(iRegPNoSp);
4734   op_cost(0);
4735   format %{ %}
4736   interface(REG_INTER);
4737 %}
4738 
4739 // Thread Register
4740 operand thread_RegP(iRegP reg)
4741 %{
4742   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
4743   match(reg);
4744   op_cost(0);
4745   format %{ %}
4746   interface(REG_INTER);
4747 %}
4748 
4749 operand lr_RegP(iRegP reg)
4750 %{
4751   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
4752   match(reg);
4753   op_cost(0);
4754   format %{ %}
4755   interface(REG_INTER);
4756 %}
4757 
4758 //----------Memory Operands----------------------------------------------------
4759 
4760 operand indirect(iRegP reg)
4761 %{
4762   constraint(ALLOC_IN_RC(ptr_reg));
4763   match(reg);
4764   op_cost(0);
4765   format %{ "[$reg]" %}
4766   interface(MEMORY_INTER) %{
4767     base($reg);
4768     index(0xffffffff);
4769     scale(0x0);
4770     disp(0x0);
4771   %}
4772 %}
4773 
4774 operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
4775 %{
4776   predicate(size_fits_all_mem_uses(n->as_AddP(),
4777                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
4778   constraint(ALLOC_IN_RC(ptr_reg));
4779   match(AddP (AddP reg (LShiftL lreg scale)) off);
4780   op_cost(INSN_COST);
4781   format %{ "$reg, $lreg lsl($scale), $off" %}
4782   interface(MEMORY_INTER) %{
4783     base($reg);
4784     index($lreg);
4785     scale($scale);
4786     disp($off);
4787   %}
4788 %}
4789 
4790 operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
4791 %{
4792   predicate(size_fits_all_mem_uses(n->as_AddP(),
4793                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
4794   constraint(ALLOC_IN_RC(ptr_reg));
4795   match(AddP (AddP reg (LShiftL lreg scale)) off);
4796   op_cost(INSN_COST);
4797   format %{ "$reg, $lreg lsl($scale), $off" %}
4798   interface(MEMORY_INTER) %{
4799     base($reg);
4800     index($lreg);
4801     scale($scale);
4802     disp($off);
4803   %}
4804 %}
4805 
4806 operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
4807 %{
4808   constraint(ALLOC_IN_RC(ptr_reg));
4809   match(AddP (AddP reg (ConvI2L ireg)) off);
4810   op_cost(INSN_COST);
4811   format %{ "$reg, $ireg, $off I2L" %}
4812   interface(MEMORY_INTER) %{
4813     base($reg);
4814     index($ireg);
4815     scale(0x0);
4816     disp($off);
4817   %}
4818 %}
4819 
4820 operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
4821 %{
4822   predicate(size_fits_all_mem_uses(n->as_AddP(),
4823                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
4824   constraint(ALLOC_IN_RC(ptr_reg));
4825   match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
4826   op_cost(INSN_COST);
4827   format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
4828   interface(MEMORY_INTER) %{
4829     base($reg);
4830     index($ireg);
4831     scale($scale);
4832     disp($off);
4833   %}
4834 %}
4835 
4836 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
4837 %{
4838   predicate(size_fits_all_mem_uses(n->as_AddP(),
4839                                    n->in(AddPNode::Offset)->in(2)->get_int()));
4840   constraint(ALLOC_IN_RC(ptr_reg));
4841   match(AddP reg (LShiftL (ConvI2L ireg) scale));
4842   op_cost(0);
4843   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
4844   interface(MEMORY_INTER) %{
4845     base($reg);
4846     index($ireg);
4847     scale($scale);
4848     disp(0x0);
4849   %}
4850 %}
4851 
4852 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
4853 %{
4854   predicate(size_fits_all_mem_uses(n->as_AddP(),
4855                                    n->in(AddPNode::Offset)->in(2)->get_int()));
4856   constraint(ALLOC_IN_RC(ptr_reg));
4857   match(AddP reg (LShiftL lreg scale));
4858   op_cost(0);
4859   format %{ "$reg, $lreg lsl($scale)" %}
4860   interface(MEMORY_INTER) %{
4861     base($reg);
4862     index($lreg);
4863     scale($scale);
4864     disp(0x0);
4865   %}
4866 %}
4867 
4868 operand indIndex(iRegP reg, iRegL lreg)
4869 %{
4870   constraint(ALLOC_IN_RC(ptr_reg));
4871   match(AddP reg lreg);
4872   op_cost(0);
4873   format %{ "$reg, $lreg" %}
4874   interface(MEMORY_INTER) %{
4875     base($reg);
4876     index($lreg);
4877     scale(0x0);
4878     disp(0x0);
4879   %}
4880 %}
4881 
4882 operand indOffI(iRegP reg, immIOffset off)
4883 %{
4884   constraint(ALLOC_IN_RC(ptr_reg));
4885   match(AddP reg off);
4886   op_cost(0);
4887   format %{ "[$reg, $off]" %}
4888   interface(MEMORY_INTER) %{
4889     base($reg);
4890     index(0xffffffff);
4891     scale(0x0);
4892     disp($off);
4893   %}
4894 %}
4895 
4896 operand indOffI4(iRegP reg, immIOffset4 off)
4897 %{
4898   constraint(ALLOC_IN_RC(ptr_reg));
4899   match(AddP reg off);
4900   op_cost(0);
4901   format %{ "[$reg, $off]" %}
4902   interface(MEMORY_INTER) %{
4903     base($reg);
4904     index(0xffffffff);
4905     scale(0x0);
4906     disp($off);
4907   %}
4908 %}
4909 
4910 operand indOffI8(iRegP reg, immIOffset8 off)
4911 %{
4912   constraint(ALLOC_IN_RC(ptr_reg));
4913   match(AddP reg off);
4914   op_cost(0);
4915   format %{ "[$reg, $off]" %}
4916   interface(MEMORY_INTER) %{
4917     base($reg);
4918     index(0xffffffff);
4919     scale(0x0);
4920     disp($off);
4921   %}
4922 %}
4923 
4924 operand indOffI16(iRegP reg, immIOffset16 off)
4925 %{
4926   constraint(ALLOC_IN_RC(ptr_reg));
4927   match(AddP reg off);
4928   op_cost(0);
4929   format %{ "[$reg, $off]" %}
4930   interface(MEMORY_INTER) %{
4931     base($reg);
4932     index(0xffffffff);
4933     scale(0x0);
4934     disp($off);
4935   %}
4936 %}
4937 
4938 operand indOffL(iRegP reg, immLoffset off)
4939 %{
4940   constraint(ALLOC_IN_RC(ptr_reg));
4941   match(AddP reg off);
4942   op_cost(0);
4943   format %{ "[$reg, $off]" %}
4944   interface(MEMORY_INTER) %{
4945     base($reg);
4946     index(0xffffffff);
4947     scale(0x0);
4948     disp($off);
4949   %}
4950 %}
4951 
4952 operand indOffL4(iRegP reg, immLoffset4 off)
4953 %{
4954   constraint(ALLOC_IN_RC(ptr_reg));
4955   match(AddP reg off);
4956   op_cost(0);
4957   format %{ "[$reg, $off]" %}
4958   interface(MEMORY_INTER) %{
4959     base($reg);
4960     index(0xffffffff);
4961     scale(0x0);
4962     disp($off);
4963   %}
4964 %}
4965 
4966 operand indOffL8(iRegP reg, immLoffset8 off)
4967 %{
4968   constraint(ALLOC_IN_RC(ptr_reg));
4969   match(AddP reg off);
4970   op_cost(0);
4971   format %{ "[$reg, $off]" %}
4972   interface(MEMORY_INTER) %{
4973     base($reg);
4974     index(0xffffffff);
4975     scale(0x0);
4976     disp($off);
4977   %}
4978 %}
4979 
4980 operand indOffL16(iRegP reg, immLoffset16 off)
4981 %{
4982   constraint(ALLOC_IN_RC(ptr_reg));
4983   match(AddP reg off);
4984   op_cost(0);
4985   format %{ "[$reg, $off]" %}
4986   interface(MEMORY_INTER) %{
4987     base($reg);
4988     index(0xffffffff);
4989     scale(0x0);
4990     disp($off);
4991   %}
4992 %}
4993 
4994 operand indirectN(iRegN reg)
4995 %{
4996   predicate(Universe::narrow_oop_shift() == 0);
4997   constraint(ALLOC_IN_RC(ptr_reg));
4998   match(DecodeN reg);
4999   op_cost(0);
5000   format %{ "[$reg]\t# narrow" %}
5001   interface(MEMORY_INTER) %{
5002     base($reg);
5003     index(0xffffffff);
5004     scale(0x0);
5005     disp(0x0);
5006   %}
5007 %}
5008 
5009 operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
5010 %{
5011   predicate(Universe::narrow_oop_shift() == 0 &&
5012             size_fits_all_mem_uses(n->as_AddP(),
5013                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
5014   constraint(ALLOC_IN_RC(ptr_reg));
5015   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5016   op_cost(0);
5017   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5018   interface(MEMORY_INTER) %{
5019     base($reg);
5020     index($lreg);
5021     scale($scale);
5022     disp($off);
5023   %}
5024 %}
5025 
5026 operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
5027 %{
5028   predicate(Universe::narrow_oop_shift() == 0 &&
5029             size_fits_all_mem_uses(n->as_AddP(),
5030                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
5031   constraint(ALLOC_IN_RC(ptr_reg));
5032   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5033   op_cost(INSN_COST);
5034   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5035   interface(MEMORY_INTER) %{
5036     base($reg);
5037     index($lreg);
5038     scale($scale);
5039     disp($off);
5040   %}
5041 %}
5042 
5043 operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
5044 %{
5045   predicate(Universe::narrow_oop_shift() == 0);
5046   constraint(ALLOC_IN_RC(ptr_reg));
5047   match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
5048   op_cost(INSN_COST);
5049   format %{ "$reg, $ireg, $off I2L\t# narrow" %}
5050   interface(MEMORY_INTER) %{
5051     base($reg);
5052     index($ireg);
5053     scale(0x0);
5054     disp($off);
5055   %}
5056 %}
5057 
5058 operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
5059 %{
5060   predicate(Universe::narrow_oop_shift() == 0 &&
5061             size_fits_all_mem_uses(n->as_AddP(),
5062                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
5063   constraint(ALLOC_IN_RC(ptr_reg));
5064   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
5065   op_cost(INSN_COST);
5066   format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
5067   interface(MEMORY_INTER) %{
5068     base($reg);
5069     index($ireg);
5070     scale($scale);
5071     disp($off);
5072   %}
5073 %}
5074 
5075 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
5076 %{
5077   predicate(Universe::narrow_oop_shift() == 0 &&
5078             size_fits_all_mem_uses(n->as_AddP(),
5079                                    n->in(AddPNode::Offset)->in(2)->get_int()));
5080   constraint(ALLOC_IN_RC(ptr_reg));
5081   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
5082   op_cost(0);
5083   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5084   interface(MEMORY_INTER) %{
5085     base($reg);
5086     index($ireg);
5087     scale($scale);
5088     disp(0x0);
5089   %}
5090 %}
5091 
5092 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5093 %{
5094   predicate(Universe::narrow_oop_shift() == 0 &&
5095             size_fits_all_mem_uses(n->as_AddP(),
5096                                    n->in(AddPNode::Offset)->in(2)->get_int()));
5097   constraint(ALLOC_IN_RC(ptr_reg));
5098   match(AddP (DecodeN reg) (LShiftL lreg scale));
5099   op_cost(0);
5100   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5101   interface(MEMORY_INTER) %{
5102     base($reg);
5103     index($lreg);
5104     scale($scale);
5105     disp(0x0);
5106   %}
5107 %}
5108 
5109 operand indIndexN(iRegN reg, iRegL lreg)
5110 %{
5111   predicate(Universe::narrow_oop_shift() == 0);
5112   constraint(ALLOC_IN_RC(ptr_reg));
5113   match(AddP (DecodeN reg) lreg);
5114   op_cost(0);
5115   format %{ "$reg, $lreg\t# narrow" %}
5116   interface(MEMORY_INTER) %{
5117     base($reg);
5118     index($lreg);
5119     scale(0x0);
5120     disp(0x0);
5121   %}
5122 %}
5123 
5124 operand indOffIN(iRegN reg, immIOffset off)
5125 %{
5126   predicate(Universe::narrow_oop_shift() == 0);
5127   constraint(ALLOC_IN_RC(ptr_reg));
5128   match(AddP (DecodeN reg) off);
5129   op_cost(0);
5130   format %{ "[$reg, $off]\t# narrow" %}
5131   interface(MEMORY_INTER) %{
5132     base($reg);
5133     index(0xffffffff);
5134     scale(0x0);
5135     disp($off);
5136   %}
5137 %}
5138 
5139 operand indOffLN(iRegN reg, immLoffset off)
5140 %{
5141   predicate(Universe::narrow_oop_shift() == 0);
5142   constraint(ALLOC_IN_RC(ptr_reg));
5143   match(AddP (DecodeN reg) off);
5144   op_cost(0);
5145   format %{ "[$reg, $off]\t# narrow" %}
5146   interface(MEMORY_INTER) %{
5147     base($reg);
5148     index(0xffffffff);
5149     scale(0x0);
5150     disp($off);
5151   %}
5152 %}
5153 
5154 
5155 
5156 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5157 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5158 %{
5159   constraint(ALLOC_IN_RC(ptr_reg));
5160   match(AddP reg off);
5161   op_cost(0);
5162   format %{ "[$reg, $off]" %}
5163   interface(MEMORY_INTER) %{
5164     base($reg);
5165     index(0xffffffff);
5166     scale(0x0);
5167     disp($off);
5168   %}
5169 %}
5170 
5171 //----------Special Memory Operands--------------------------------------------
5172 // Stack Slot Operand - This operand is used for loading and storing temporary
5173 //                      values on the stack where a match requires a value to
5174 //                      flow through memory.
5175 operand stackSlotP(sRegP reg)
5176 %{
5177   constraint(ALLOC_IN_RC(stack_slots));
5178   op_cost(100);
5179   // No match rule because this operand is only generated in matching
5180   // match(RegP);
5181   format %{ "[$reg]" %}
5182   interface(MEMORY_INTER) %{
5183     base(0x1e);  // RSP
5184     index(0x0);  // No Index
5185     scale(0x0);  // No Scale
5186     disp($reg);  // Stack Offset
5187   %}
5188 %}
5189 
5190 operand stackSlotI(sRegI reg)
5191 %{
5192   constraint(ALLOC_IN_RC(stack_slots));
5193   // No match rule because this operand is only generated in matching
5194   // match(RegI);
5195   format %{ "[$reg]" %}
5196   interface(MEMORY_INTER) %{
5197     base(0x1e);  // RSP
5198     index(0x0);  // No Index
5199     scale(0x0);  // No Scale
5200     disp($reg);  // Stack Offset
5201   %}
5202 %}
5203 
5204 operand stackSlotF(sRegF reg)
5205 %{
5206   constraint(ALLOC_IN_RC(stack_slots));
5207   // No match rule because this operand is only generated in matching
5208   // match(RegF);
5209   format %{ "[$reg]" %}
5210   interface(MEMORY_INTER) %{
5211     base(0x1e);  // RSP
5212     index(0x0);  // No Index
5213     scale(0x0);  // No Scale
5214     disp($reg);  // Stack Offset
5215   %}
5216 %}
5217 
5218 operand stackSlotD(sRegD reg)
5219 %{
5220   constraint(ALLOC_IN_RC(stack_slots));
5221   // No match rule because this operand is only generated in matching
5222   // match(RegD);
5223   format %{ "[$reg]" %}
5224   interface(MEMORY_INTER) %{
5225     base(0x1e);  // RSP
5226     index(0x0);  // No Index
5227     scale(0x0);  // No Scale
5228     disp($reg);  // Stack Offset
5229   %}
5230 %}
5231 
5232 operand stackSlotL(sRegL reg)
5233 %{
5234   constraint(ALLOC_IN_RC(stack_slots));
5235   // No match rule because this operand is only generated in matching
5236   // match(RegL);
5237   format %{ "[$reg]" %}
5238   interface(MEMORY_INTER) %{
5239     base(0x1e);  // RSP
5240     index(0x0);  // No Index
5241     scale(0x0);  // No Scale
5242     disp($reg);  // Stack Offset
5243   %}
5244 %}
5245 
5246 // Operands for expressing Control Flow
5247 // NOTE: Label is a predefined operand which should not be redefined in
5248 //       the AD file. It is generically handled within the ADLC.
5249 
5250 //----------Conditional Branch Operands----------------------------------------
5251 // Comparison Op  - This is the operation of the comparison, and is limited to
5252 //                  the following set of codes:
5253 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5254 //
5255 // Other attributes of the comparison, such as unsignedness, are specified
5256 // by the comparison instruction that sets a condition code flags register.
5257 // That result is represented by a flags operand whose subtype is appropriate
5258 // to the unsignedness (etc.) of the comparison.
5259 //
5260 // Later, the instruction which matches both the Comparison Op (a Bool) and
5261 // the flags (produced by the Cmp) specifies the coding of the comparison op
5262 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5263 
5264 // used for signed integral comparisons and fp comparisons
5265 
5266 operand cmpOp()
5267 %{
5268   match(Bool);
5269 
5270   format %{ "" %}
5271   interface(COND_INTER) %{
5272     equal(0x0, "eq");
5273     not_equal(0x1, "ne");
5274     less(0xb, "lt");
5275     greater_equal(0xa, "ge");
5276     less_equal(0xd, "le");
5277     greater(0xc, "gt");
5278     overflow(0x6, "vs");
5279     no_overflow(0x7, "vc");
5280   %}
5281 %}
5282 
5283 // used for unsigned integral comparisons
5284 
5285 operand cmpOpU()
5286 %{
5287   match(Bool);
5288 
5289   format %{ "" %}
5290   interface(COND_INTER) %{
5291     equal(0x0, "eq");
5292     not_equal(0x1, "ne");
5293     less(0x3, "lo");
5294     greater_equal(0x2, "hs");
5295     less_equal(0x9, "ls");
5296     greater(0x8, "hi");
5297     overflow(0x6, "vs");
5298     no_overflow(0x7, "vc");
5299   %}
5300 %}
5301 
5302 // Special operand allowing long args to int ops to be truncated for free
5303 
5304 operand iRegL2I(iRegL reg) %{
5305 
5306   op_cost(0);
5307 
5308   match(ConvL2I reg);
5309 
5310   format %{ "l2i($reg)" %}
5311 
5312   interface(REG_INTER)
5313 %}
5314 
5315 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
5316 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
5317 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
5318 
5319 //----------OPERAND CLASSES----------------------------------------------------
5320 // Operand Classes are groups of operands that are used as to simplify
5321 // instruction definitions by not requiring the AD writer to specify
5322 // separate instructions for every form of operand when the
5323 // instruction accepts multiple operand types with the same basic
5324 // encoding and format. The classic case of this is memory operands.
5325 
5326 // memory is used to define read/write location for load/store
5327 // instruction defs. we can turn a memory op into an Address
5328 
5329 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
5330                indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
5331  
5332  // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5333 
5334 
5335 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5336 // operations. it allows the src to be either an iRegI or a (ConvL2I
5337 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5338 // can be elided because the 32-bit instruction will just employ the
5339 // lower 32 bits anyway.
5340 //
5341 // n.b. this does not elide all L2I conversions. if the truncated
5342 // value is consumed by more than one operation then the ConvL2I
5343 // cannot be bundled into the consuming nodes so an l2i gets planted
5344 // (actually a movw $dst $src) and the downstream instructions consume
5345 // the result of the l2i as an iRegI input. That's a shame since the
5346 // movw is actually redundant but its not too costly.
5347 
5348 opclass iRegIorL2I(iRegI, iRegL2I);
5349 
5350 //----------PIPELINE-----------------------------------------------------------
5351 // Rules which define the behavior of the target architectures pipeline.
5352 
5353 // For specific pipelines, eg A53, define the stages of that pipeline
5354 //pipe_desc(ISS, EX1, EX2, WR);
5355 #define ISS S0
5356 #define EX1 S1
5357 #define EX2 S2
5358 #define WR  S3
5359 
5360 // Integer ALU reg operation
5361 pipeline %{
5362 
5363 attributes %{
5364   // ARM instructions are of fixed length
5365   fixed_size_instructions;        // Fixed size instructions TODO does
5366   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5367   // ARM instructions come in 32-bit word units
5368   instruction_unit_size = 4;         // An instruction is 4 bytes long
5369   instruction_fetch_unit_size = 64;  // The processor fetches one line
5370   instruction_fetch_units = 1;       // of 64 bytes
5371 
5372   // List of nop instructions
5373   nops( MachNop );
5374 %}
5375 
5376 // We don't use an actual pipeline model so don't care about resources
5377 // or description. we do use pipeline classes to introduce fixed
5378 // latencies
5379 
5380 //----------RESOURCES----------------------------------------------------------
5381 // Resources are the functional units available to the machine
5382 
5383 resources( INS0, INS1, INS01 = INS0 | INS1,
5384            ALU0, ALU1, ALU = ALU0 | ALU1,
5385            MAC,
5386            DIV,
5387            BRANCH,
5388            LDST,
5389            NEON_FP);
5390 
5391 //----------PIPELINE DESCRIPTION-----------------------------------------------
5392 // Pipeline Description specifies the stages in the machine's pipeline
5393 
5394 // Define the pipeline as a generic 6 stage pipeline
5395 pipe_desc(S0, S1, S2, S3, S4, S5);
5396 
5397 //----------PIPELINE CLASSES---------------------------------------------------
5398 // Pipeline Classes describe the stages in which input and output are
5399 // referenced by the hardware pipeline.
5400 
5401 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
5402 %{
5403   single_instruction;
5404   src1   : S1(read);
5405   src2   : S2(read);
5406   dst    : S5(write);
5407   INS01  : ISS;
5408   NEON_FP : S5;
5409 %}
5410 
5411 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
5412 %{
5413   single_instruction;
5414   src1   : S1(read);
5415   src2   : S2(read);
5416   dst    : S5(write);
5417   INS01  : ISS;
5418   NEON_FP : S5;
5419 %}
5420 
5421 pipe_class fp_uop_s(vRegF dst, vRegF src)
5422 %{
5423   single_instruction;
5424   src    : S1(read);
5425   dst    : S5(write);
5426   INS01  : ISS;
5427   NEON_FP : S5;
5428 %}
5429 
5430 pipe_class fp_uop_d(vRegD dst, vRegD src)
5431 %{
5432   single_instruction;
5433   src    : S1(read);
5434   dst    : S5(write);
5435   INS01  : ISS;
5436   NEON_FP : S5;
5437 %}
5438 
5439 pipe_class fp_d2f(vRegF dst, vRegD src)
5440 %{
5441   single_instruction;
5442   src    : S1(read);
5443   dst    : S5(write);
5444   INS01  : ISS;
5445   NEON_FP : S5;
5446 %}
5447 
5448 pipe_class fp_f2d(vRegD dst, vRegF src)
5449 %{
5450   single_instruction;
5451   src    : S1(read);
5452   dst    : S5(write);
5453   INS01  : ISS;
5454   NEON_FP : S5;
5455 %}
5456 
5457 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
5458 %{
5459   single_instruction;
5460   src    : S1(read);
5461   dst    : S5(write);
5462   INS01  : ISS;
5463   NEON_FP : S5;
5464 %}
5465 
5466 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
5467 %{
5468   single_instruction;
5469   src    : S1(read);
5470   dst    : S5(write);
5471   INS01  : ISS;
5472   NEON_FP : S5;
5473 %}
5474 
5475 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
5476 %{
5477   single_instruction;
5478   src    : S1(read);
5479   dst    : S5(write);
5480   INS01  : ISS;
5481   NEON_FP : S5;
5482 %}
5483 
5484 pipe_class fp_l2f(vRegF dst, iRegL src)
5485 %{
5486   single_instruction;
5487   src    : S1(read);
5488   dst    : S5(write);
5489   INS01  : ISS;
5490   NEON_FP : S5;
5491 %}
5492 
5493 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
5494 %{
5495   single_instruction;
5496   src    : S1(read);
5497   dst    : S5(write);
5498   INS01  : ISS;
5499   NEON_FP : S5;
5500 %}
5501 
5502 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
5503 %{
5504   single_instruction;
5505   src    : S1(read);
5506   dst    : S5(write);
5507   INS01  : ISS;
5508   NEON_FP : S5;
5509 %}
5510 
5511 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
5512 %{
5513   single_instruction;
5514   src    : S1(read);
5515   dst    : S5(write);
5516   INS01  : ISS;
5517   NEON_FP : S5;
5518 %}
5519 
5520 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
5521 %{
5522   single_instruction;
5523   src    : S1(read);
5524   dst    : S5(write);
5525   INS01  : ISS;
5526   NEON_FP : S5;
5527 %}
5528 
5529 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
5530 %{
5531   single_instruction;
5532   src1   : S1(read);
5533   src2   : S2(read);
5534   dst    : S5(write);
5535   INS0   : ISS;
5536   NEON_FP : S5;
5537 %}
5538 
5539 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
5540 %{
5541   single_instruction;
5542   src1   : S1(read);
5543   src2   : S2(read);
5544   dst    : S5(write);
5545   INS0   : ISS;
5546   NEON_FP : S5;
5547 %}
5548 
5549 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
5550 %{
5551   single_instruction;
5552   cr     : S1(read);
5553   src1   : S1(read);
5554   src2   : S1(read);
5555   dst    : S3(write);
5556   INS01  : ISS;
5557   NEON_FP : S3;
5558 %}
5559 
5560 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
5561 %{
5562   single_instruction;
5563   cr     : S1(read);
5564   src1   : S1(read);
5565   src2   : S1(read);
5566   dst    : S3(write);
5567   INS01  : ISS;
5568   NEON_FP : S3;
5569 %}
5570 
5571 pipe_class fp_imm_s(vRegF dst)
5572 %{
5573   single_instruction;
5574   dst    : S3(write);
5575   INS01  : ISS;
5576   NEON_FP : S3;
5577 %}
5578 
5579 pipe_class fp_imm_d(vRegD dst)
5580 %{
5581   single_instruction;
5582   dst    : S3(write);
5583   INS01  : ISS;
5584   NEON_FP : S3;
5585 %}
5586 
5587 pipe_class fp_load_constant_s(vRegF dst)
5588 %{
5589   single_instruction;
5590   dst    : S4(write);
5591   INS01  : ISS;
5592   NEON_FP : S4;
5593 %}
5594 
5595 pipe_class fp_load_constant_d(vRegD dst)
5596 %{
5597   single_instruction;
5598   dst    : S4(write);
5599   INS01  : ISS;
5600   NEON_FP : S4;
5601 %}
5602 
5603 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
5604 %{
5605   single_instruction;
5606   dst    : S5(write);
5607   src1   : S1(read);
5608   src2   : S1(read);
5609   INS01  : ISS;
5610   NEON_FP : S5;
5611 %}
5612 
5613 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
5614 %{
5615   single_instruction;
5616   dst    : S5(write);
5617   src1   : S1(read);
5618   src2   : S1(read);
5619   INS0   : ISS;
5620   NEON_FP : S5;
5621 %}
5622 
5623 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
5624 %{
5625   single_instruction;
5626   dst    : S5(write);
5627   src1   : S1(read);
5628   src2   : S1(read);
5629   dst    : S1(read);
5630   INS01  : ISS;
5631   NEON_FP : S5;
5632 %}
5633 
5634 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
5635 %{
5636   single_instruction;
5637   dst    : S5(write);
5638   src1   : S1(read);
5639   src2   : S1(read);
5640   dst    : S1(read);
5641   INS0   : ISS;
5642   NEON_FP : S5;
5643 %}
5644 
5645 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
5646 %{
5647   single_instruction;
5648   dst    : S4(write);
5649   src1   : S2(read);
5650   src2   : S2(read);
5651   INS01  : ISS;
5652   NEON_FP : S4;
5653 %}
5654 
5655 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
5656 %{
5657   single_instruction;
5658   dst    : S4(write);
5659   src1   : S2(read);
5660   src2   : S2(read);
5661   INS0   : ISS;
5662   NEON_FP : S4;
5663 %}
5664 
5665 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
5666 %{
5667   single_instruction;
5668   dst    : S3(write);
5669   src1   : S2(read);
5670   src2   : S2(read);
5671   INS01  : ISS;
5672   NEON_FP : S3;
5673 %}
5674 
5675 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
5676 %{
5677   single_instruction;
5678   dst    : S3(write);
5679   src1   : S2(read);
5680   src2   : S2(read);
5681   INS0   : ISS;
5682   NEON_FP : S3;
5683 %}
5684 
5685 pipe_class vshift64(vecD dst, vecD src, vecX shift)
5686 %{
5687   single_instruction;
5688   dst    : S3(write);
5689   src    : S1(read);
5690   shift  : S1(read);
5691   INS01  : ISS;
5692   NEON_FP : S3;
5693 %}
5694 
5695 pipe_class vshift128(vecX dst, vecX src, vecX shift)
5696 %{
5697   single_instruction;
5698   dst    : S3(write);
5699   src    : S1(read);
5700   shift  : S1(read);
5701   INS0   : ISS;
5702   NEON_FP : S3;
5703 %}
5704 
5705 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
5706 %{
5707   single_instruction;
5708   dst    : S3(write);
5709   src    : S1(read);
5710   INS01  : ISS;
5711   NEON_FP : S3;
5712 %}
5713 
5714 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
5715 %{
5716   single_instruction;
5717   dst    : S3(write);
5718   src    : S1(read);
5719   INS0   : ISS;
5720   NEON_FP : S3;
5721 %}
5722 
5723 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
5724 %{
5725   single_instruction;
5726   dst    : S5(write);
5727   src1   : S1(read);
5728   src2   : S1(read);
5729   INS01  : ISS;
5730   NEON_FP : S5;
5731 %}
5732 
5733 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
5734 %{
5735   single_instruction;
5736   dst    : S5(write);
5737   src1   : S1(read);
5738   src2   : S1(read);
5739   INS0   : ISS;
5740   NEON_FP : S5;
5741 %}
5742 
5743 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
5744 %{
5745   single_instruction;
5746   dst    : S5(write);
5747   src1   : S1(read);
5748   src2   : S1(read);
5749   INS0   : ISS;
5750   NEON_FP : S5;
5751 %}
5752 
5753 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
5754 %{
5755   single_instruction;
5756   dst    : S5(write);
5757   src1   : S1(read);
5758   src2   : S1(read);
5759   INS0   : ISS;
5760   NEON_FP : S5;
5761 %}
5762 
5763 pipe_class vsqrt_fp128(vecX dst, vecX src)
5764 %{
5765   single_instruction;
5766   dst    : S5(write);
5767   src    : S1(read);
5768   INS0   : ISS;
5769   NEON_FP : S5;
5770 %}
5771 
5772 pipe_class vunop_fp64(vecD dst, vecD src)
5773 %{
5774   single_instruction;
5775   dst    : S5(write);
5776   src    : S1(read);
5777   INS01  : ISS;
5778   NEON_FP : S5;
5779 %}
5780 
5781 pipe_class vunop_fp128(vecX dst, vecX src)
5782 %{
5783   single_instruction;
5784   dst    : S5(write);
5785   src    : S1(read);
5786   INS0   : ISS;
5787   NEON_FP : S5;
5788 %}
5789 
5790 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
5791 %{
5792   single_instruction;
5793   dst    : S3(write);
5794   src    : S1(read);
5795   INS01  : ISS;
5796   NEON_FP : S3;
5797 %}
5798 
5799 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
5800 %{
5801   single_instruction;
5802   dst    : S3(write);
5803   src    : S1(read);
5804   INS01  : ISS;
5805   NEON_FP : S3;
5806 %}
5807 
5808 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
5809 %{
5810   single_instruction;
5811   dst    : S3(write);
5812   src    : S1(read);
5813   INS01  : ISS;
5814   NEON_FP : S3;
5815 %}
5816 
5817 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
5818 %{
5819   single_instruction;
5820   dst    : S3(write);
5821   src    : S1(read);
5822   INS01  : ISS;
5823   NEON_FP : S3;
5824 %}
5825 
5826 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
5827 %{
5828   single_instruction;
5829   dst    : S3(write);
5830   src    : S1(read);
5831   INS01  : ISS;
5832   NEON_FP : S3;
5833 %}
5834 
5835 pipe_class vmovi_reg_imm64(vecD dst)
5836 %{
5837   single_instruction;
5838   dst    : S3(write);
5839   INS01  : ISS;
5840   NEON_FP : S3;
5841 %}
5842 
5843 pipe_class vmovi_reg_imm128(vecX dst)
5844 %{
5845   single_instruction;
5846   dst    : S3(write);
5847   INS0   : ISS;
5848   NEON_FP : S3;
5849 %}
5850 
5851 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
5852 %{
5853   single_instruction;
5854   dst    : S5(write);
5855   mem    : ISS(read);
5856   INS01  : ISS;
5857   NEON_FP : S3;
5858 %}
5859 
5860 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
5861 %{
5862   single_instruction;
5863   dst    : S5(write);
5864   mem    : ISS(read);
5865   INS01  : ISS;
5866   NEON_FP : S3;
5867 %}
5868 
5869 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
5870 %{
5871   single_instruction;
5872   mem    : ISS(read);
5873   src    : S2(read);
5874   INS01  : ISS;
5875   NEON_FP : S3;
5876 %}
5877 
5878 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
5879 %{
5880   single_instruction;
5881   mem    : ISS(read);
5882   src    : S2(read);
5883   INS01  : ISS;
5884   NEON_FP : S3;
5885 %}
5886 
5887 //------- Integer ALU operations --------------------------
5888 
5889 // Integer ALU reg-reg operation
5890 // Operands needed in EX1, result generated in EX2
5891 // Eg.  ADD     x0, x1, x2
5892 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
5893 %{
5894   single_instruction;
5895   dst    : EX2(write);
5896   src1   : EX1(read);
5897   src2   : EX1(read);
5898   INS01  : ISS; // Dual issue as instruction 0 or 1
5899   ALU    : EX2;
5900 %}
5901 
5902 // Integer ALU reg-reg operation with constant shift
5903 // Shifted register must be available in LATE_ISS instead of EX1
5904 // Eg.  ADD     x0, x1, x2, LSL #2
5905 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
5906 %{
5907   single_instruction;
5908   dst    : EX2(write);
5909   src1   : EX1(read);
5910   src2   : ISS(read);
5911   INS01  : ISS;
5912   ALU    : EX2;
5913 %}
5914 
5915 // Integer ALU reg operation with constant shift
5916 // Eg.  LSL     x0, x1, #shift
5917 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
5918 %{
5919   single_instruction;
5920   dst    : EX2(write);
5921   src1   : ISS(read);
5922   INS01  : ISS;
5923   ALU    : EX2;
5924 %}
5925 
5926 // Integer ALU reg-reg operation with variable shift
5927 // Both operands must be available in LATE_ISS instead of EX1
5928 // Result is available in EX1 instead of EX2
5929 // Eg.  LSLV    x0, x1, x2
5930 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
5931 %{
5932   single_instruction;
5933   dst    : EX1(write);
5934   src1   : ISS(read);
5935   src2   : ISS(read);
5936   INS01  : ISS;
5937   ALU    : EX1;
5938 %}
5939 
5940 // Integer ALU reg-reg operation with extract
5941 // As for _vshift above, but result generated in EX2
5942 // Eg.  EXTR    x0, x1, x2, #N
5943 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
5944 %{
5945   single_instruction;
5946   dst    : EX2(write);
5947   src1   : ISS(read);
5948   src2   : ISS(read);
5949   INS1   : ISS; // Can only dual issue as Instruction 1
5950   ALU    : EX1;
5951 %}
5952 
5953 // Integer ALU reg operation
5954 // Eg.  NEG     x0, x1
5955 pipe_class ialu_reg(iRegI dst, iRegI src)
5956 %{
5957   single_instruction;
5958   dst    : EX2(write);
5959   src    : EX1(read);
5960   INS01  : ISS;
5961   ALU    : EX2;
5962 %}
5963 
5964 // Integer ALU reg mmediate operation
5965 // Eg.  ADD     x0, x1, #N
5966 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
5967 %{
5968   single_instruction;
5969   dst    : EX2(write);
5970   src1   : EX1(read);
5971   INS01  : ISS;
5972   ALU    : EX2;
5973 %}
5974 
5975 // Integer ALU immediate operation (no source operands)
5976 // Eg.  MOV     x0, #N
5977 pipe_class ialu_imm(iRegI dst)
5978 %{
5979   single_instruction;
5980   dst    : EX1(write);
5981   INS01  : ISS;
5982   ALU    : EX1;
5983 %}
5984 
5985 //------- Compare operation -------------------------------
5986 
5987 // Compare reg-reg
5988 // Eg.  CMP     x0, x1
5989 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
5990 %{
5991   single_instruction;
5992 //  fixed_latency(16);
5993   cr     : EX2(write);
5994   op1    : EX1(read);
5995   op2    : EX1(read);
5996   INS01  : ISS;
5997   ALU    : EX2;
5998 %}
5999 
6000 // Compare reg-reg
6001 // Eg.  CMP     x0, #N
6002 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6003 %{
6004   single_instruction;
6005 //  fixed_latency(16);
6006   cr     : EX2(write);
6007   op1    : EX1(read);
6008   INS01  : ISS;
6009   ALU    : EX2;
6010 %}
6011 
6012 //------- Conditional instructions ------------------------
6013 
6014 // Conditional no operands
6015 // Eg.  CSINC   x0, zr, zr, <cond>
6016 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6017 %{
6018   single_instruction;
6019   cr     : EX1(read);
6020   dst    : EX2(write);
6021   INS01  : ISS;
6022   ALU    : EX2;
6023 %}
6024 
6025 // Conditional 2 operand
6026 // EG.  CSEL    X0, X1, X2, <cond>
6027 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6028 %{
6029   single_instruction;
6030   cr     : EX1(read);
6031   src1   : EX1(read);
6032   src2   : EX1(read);
6033   dst    : EX2(write);
6034   INS01  : ISS;
6035   ALU    : EX2;
6036 %}
6037 
6038 // Conditional 2 operand
6039 // EG.  CSEL    X0, X1, X2, <cond>
6040 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6041 %{
6042   single_instruction;
6043   cr     : EX1(read);
6044   src    : EX1(read);
6045   dst    : EX2(write);
6046   INS01  : ISS;
6047   ALU    : EX2;
6048 %}
6049 
6050 //------- Multiply pipeline operations --------------------
6051 
6052 // Multiply reg-reg
6053 // Eg.  MUL     w0, w1, w2
6054 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6055 %{
6056   single_instruction;
6057   dst    : WR(write);
6058   src1   : ISS(read);
6059   src2   : ISS(read);
6060   INS01  : ISS;
6061   MAC    : WR;
6062 %}
6063 
6064 // Multiply accumulate
6065 // Eg.  MADD    w0, w1, w2, w3
6066 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6067 %{
6068   single_instruction;
6069   dst    : WR(write);
6070   src1   : ISS(read);
6071   src2   : ISS(read);
6072   src3   : ISS(read);
6073   INS01  : ISS;
6074   MAC    : WR;
6075 %}
6076 
6077 // Eg.  MUL     w0, w1, w2
6078 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6079 %{
6080   single_instruction;
6081   fixed_latency(3); // Maximum latency for 64 bit mul
6082   dst    : WR(write);
6083   src1   : ISS(read);
6084   src2   : ISS(read);
6085   INS01  : ISS;
6086   MAC    : WR;
6087 %}
6088 
6089 // Multiply accumulate
6090 // Eg.  MADD    w0, w1, w2, w3
6091 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6092 %{
6093   single_instruction;
6094   fixed_latency(3); // Maximum latency for 64 bit mul
6095   dst    : WR(write);
6096   src1   : ISS(read);
6097   src2   : ISS(read);
6098   src3   : ISS(read);
6099   INS01  : ISS;
6100   MAC    : WR;
6101 %}
6102 
6103 //------- Divide pipeline operations --------------------
6104 
6105 // Eg.  SDIV    w0, w1, w2
6106 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6107 %{
6108   single_instruction;
6109   fixed_latency(8); // Maximum latency for 32 bit divide
6110   dst    : WR(write);
6111   src1   : ISS(read);
6112   src2   : ISS(read);
6113   INS0   : ISS; // Can only dual issue as instruction 0
6114   DIV    : WR;
6115 %}
6116 
6117 // Eg.  SDIV    x0, x1, x2
6118 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6119 %{
6120   single_instruction;
6121   fixed_latency(16); // Maximum latency for 64 bit divide
6122   dst    : WR(write);
6123   src1   : ISS(read);
6124   src2   : ISS(read);
6125   INS0   : ISS; // Can only dual issue as instruction 0
6126   DIV    : WR;
6127 %}
6128 
6129 //------- Load pipeline operations ------------------------
6130 
6131 // Load - prefetch
6132 // Eg.  PFRM    <mem>
6133 pipe_class iload_prefetch(memory mem)
6134 %{
6135   single_instruction;
6136   mem    : ISS(read);
6137   INS01  : ISS;
6138   LDST   : WR;
6139 %}
6140 
6141 // Load - reg, mem
6142 // Eg.  LDR     x0, <mem>
6143 pipe_class iload_reg_mem(iRegI dst, memory mem)
6144 %{
6145   single_instruction;
6146   dst    : WR(write);
6147   mem    : ISS(read);
6148   INS01  : ISS;
6149   LDST   : WR;
6150 %}
6151 
6152 // Load - reg, reg
6153 // Eg.  LDR     x0, [sp, x1]
6154 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6155 %{
6156   single_instruction;
6157   dst    : WR(write);
6158   src    : ISS(read);
6159   INS01  : ISS;
6160   LDST   : WR;
6161 %}
6162 
6163 //------- Store pipeline operations -----------------------
6164 
6165 // Store - zr, mem
6166 // Eg.  STR     zr, <mem>
6167 pipe_class istore_mem(memory mem)
6168 %{
6169   single_instruction;
6170   mem    : ISS(read);
6171   INS01  : ISS;
6172   LDST   : WR;
6173 %}
6174 
6175 // Store - reg, mem
6176 // Eg.  STR     x0, <mem>
6177 pipe_class istore_reg_mem(iRegI src, memory mem)
6178 %{
6179   single_instruction;
6180   mem    : ISS(read);
6181   src    : EX2(read);
6182   INS01  : ISS;
6183   LDST   : WR;
6184 %}
6185 
6186 // Store - reg, reg
6187 // Eg. STR      x0, [sp, x1]
6188 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6189 %{
6190   single_instruction;
6191   dst    : ISS(read);
6192   src    : EX2(read);
6193   INS01  : ISS;
6194   LDST   : WR;
6195 %}
6196 
6197 //------- Store pipeline operations -----------------------
6198 
6199 // Branch
6200 pipe_class pipe_branch()
6201 %{
6202   single_instruction;
6203   INS01  : ISS;
6204   BRANCH : EX1;
6205 %}
6206 
6207 // Conditional branch
6208 pipe_class pipe_branch_cond(rFlagsReg cr)
6209 %{
6210   single_instruction;
6211   cr     : EX1(read);
6212   INS01  : ISS;
6213   BRANCH : EX1;
6214 %}
6215 
6216 // Compare & Branch
6217 // EG.  CBZ/CBNZ
6218 pipe_class pipe_cmp_branch(iRegI op1)
6219 %{
6220   single_instruction;
6221   op1    : EX1(read);
6222   INS01  : ISS;
6223   BRANCH : EX1;
6224 %}
6225 
6226 //------- Synchronisation operations ----------------------
6227 
6228 // Any operation requiring serialization.
6229 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6230 pipe_class pipe_serial()
6231 %{
6232   single_instruction;
6233   force_serialization;
6234   fixed_latency(16);
6235   INS01  : ISS(2); // Cannot dual issue with any other instruction
6236   LDST   : WR;
6237 %}
6238 
6239 // Generic big/slow expanded idiom - also serialized
6240 pipe_class pipe_slow()
6241 %{
6242   instruction_count(10);
6243   multiple_bundles;
6244   force_serialization;
6245   fixed_latency(16);
6246   INS01  : ISS(2); // Cannot dual issue with any other instruction
6247   LDST   : WR;
6248 %}
6249 
6250 // Empty pipeline class
6251 pipe_class pipe_class_empty()
6252 %{
6253   single_instruction;
6254   fixed_latency(0);
6255 %}
6256 
6257 // Default pipeline class.
6258 pipe_class pipe_class_default()
6259 %{
6260   single_instruction;
6261   fixed_latency(2);
6262 %}
6263 
6264 // Pipeline class for compares.
6265 pipe_class pipe_class_compare()
6266 %{
6267   single_instruction;
6268   fixed_latency(16);
6269 %}
6270 
6271 // Pipeline class for memory operations.
6272 pipe_class pipe_class_memory()
6273 %{
6274   single_instruction;
6275   fixed_latency(16);
6276 %}
6277 
6278 // Pipeline class for call.
6279 pipe_class pipe_class_call()
6280 %{
6281   single_instruction;
6282   fixed_latency(100);
6283 %}
6284 
6285 // Define the class for the Nop node.
6286 define %{
6287    MachNop = pipe_class_empty;
6288 %}
6289 
6290 %}
6291 //----------INSTRUCTIONS-------------------------------------------------------
6292 //
6293 // match      -- States which machine-independent subtree may be replaced
6294 //               by this instruction.
6295 // ins_cost   -- The estimated cost of this instruction is used by instruction
6296 //               selection to identify a minimum cost tree of machine
6297 //               instructions that matches a tree of machine-independent
6298 //               instructions.
6299 // format     -- A string providing the disassembly for this instruction.
6300 //               The value of an instruction's operand may be inserted
6301 //               by referring to it with a '$' prefix.
6302 // opcode     -- Three instruction opcodes may be provided.  These are referred
6303 //               to within an encode class as $primary, $secondary, and $tertiary
6304 //               rrspectively.  The primary opcode is commonly used to
6305 //               indicate the type of machine instruction, while secondary
6306 //               and tertiary are often used for prefix options or addressing
6307 //               modes.
6308 // ins_encode -- A list of encode classes with parameters. The encode class
6309 //               name must have been defined in an 'enc_class' specification
6310 //               in the encode section of the architecture description.
6311 
6312 // ============================================================================
6313 // Memory (Load/Store) Instructions
6314 
6315 // Load Instructions
6316 
6317 // Load Byte (8 bit signed)
6318 instruct loadB(iRegINoSp dst, memory mem)
6319 %{
6320   match(Set dst (LoadB mem));
6321   predicate(!needs_acquiring_load(n));
6322 
6323   ins_cost(4 * INSN_COST);
6324   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6325 
6326   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6327 
6328   ins_pipe(iload_reg_mem);
6329 %}
6330 
6331 // Load Byte (8 bit signed) into long
6332 instruct loadB2L(iRegLNoSp dst, memory mem)
6333 %{
6334   match(Set dst (ConvI2L (LoadB mem)));
6335   predicate(!needs_acquiring_load(n->in(1)));
6336 
6337   ins_cost(4 * INSN_COST);
6338   format %{ "ldrsb  $dst, $mem\t# byte" %}
6339 
6340   ins_encode(aarch64_enc_ldrsb(dst, mem));
6341 
6342   ins_pipe(iload_reg_mem);
6343 %}
6344 
6345 // Load Byte (8 bit unsigned)
6346 instruct loadUB(iRegINoSp dst, memory mem)
6347 %{
6348   match(Set dst (LoadUB mem));
6349   predicate(!needs_acquiring_load(n));
6350 
6351   ins_cost(4 * INSN_COST);
6352   format %{ "ldrbw  $dst, $mem\t# byte" %}
6353 
6354   ins_encode(aarch64_enc_ldrb(dst, mem));
6355 
6356   ins_pipe(iload_reg_mem);
6357 %}
6358 
6359 // Load Byte (8 bit unsigned) into long
6360 instruct loadUB2L(iRegLNoSp dst, memory mem)
6361 %{
6362   match(Set dst (ConvI2L (LoadUB mem)));
6363   predicate(!needs_acquiring_load(n->in(1)));
6364 
6365   ins_cost(4 * INSN_COST);
6366   format %{ "ldrb  $dst, $mem\t# byte" %}
6367 
6368   ins_encode(aarch64_enc_ldrb(dst, mem));
6369 
6370   ins_pipe(iload_reg_mem);
6371 %}
6372 
6373 // Load Short (16 bit signed)
6374 instruct loadS(iRegINoSp dst, memory mem)
6375 %{
6376   match(Set dst (LoadS mem));
6377   predicate(!needs_acquiring_load(n));
6378 
6379   ins_cost(4 * INSN_COST);
6380   format %{ "ldrshw  $dst, $mem\t# short" %}
6381 
6382   ins_encode(aarch64_enc_ldrshw(dst, mem));
6383 
6384   ins_pipe(iload_reg_mem);
6385 %}
6386 
6387 // Load Short (16 bit signed) into long
6388 instruct loadS2L(iRegLNoSp dst, memory mem)
6389 %{
6390   match(Set dst (ConvI2L (LoadS mem)));
6391   predicate(!needs_acquiring_load(n->in(1)));
6392 
6393   ins_cost(4 * INSN_COST);
6394   format %{ "ldrsh  $dst, $mem\t# short" %}
6395 
6396   ins_encode(aarch64_enc_ldrsh(dst, mem));
6397 
6398   ins_pipe(iload_reg_mem);
6399 %}
6400 
6401 // Load Char (16 bit unsigned)
6402 instruct loadUS(iRegINoSp dst, memory mem)
6403 %{
6404   match(Set dst (LoadUS mem));
6405   predicate(!needs_acquiring_load(n));
6406 
6407   ins_cost(4 * INSN_COST);
6408   format %{ "ldrh  $dst, $mem\t# short" %}
6409 
6410   ins_encode(aarch64_enc_ldrh(dst, mem));
6411 
6412   ins_pipe(iload_reg_mem);
6413 %}
6414 
6415 // Load Short/Char (16 bit unsigned) into long
6416 instruct loadUS2L(iRegLNoSp dst, memory mem)
6417 %{
6418   match(Set dst (ConvI2L (LoadUS mem)));
6419   predicate(!needs_acquiring_load(n->in(1)));
6420 
6421   ins_cost(4 * INSN_COST);
6422   format %{ "ldrh  $dst, $mem\t# short" %}
6423 
6424   ins_encode(aarch64_enc_ldrh(dst, mem));
6425 
6426   ins_pipe(iload_reg_mem);
6427 %}
6428 
6429 // Load Integer (32 bit signed)
6430 instruct loadI(iRegINoSp dst, memory mem)
6431 %{
6432   match(Set dst (LoadI mem));
6433   predicate(!needs_acquiring_load(n));
6434 
6435   ins_cost(4 * INSN_COST);
6436   format %{ "ldrw  $dst, $mem\t# int" %}
6437 
6438   ins_encode(aarch64_enc_ldrw(dst, mem));
6439 
6440   ins_pipe(iload_reg_mem);
6441 %}
6442 
6443 // Load Integer (32 bit signed) into long
6444 instruct loadI2L(iRegLNoSp dst, memory mem)
6445 %{
6446   match(Set dst (ConvI2L (LoadI mem)));
6447   predicate(!needs_acquiring_load(n->in(1)));
6448 
6449   ins_cost(4 * INSN_COST);
6450   format %{ "ldrsw  $dst, $mem\t# int" %}
6451 
6452   ins_encode(aarch64_enc_ldrsw(dst, mem));
6453 
6454   ins_pipe(iload_reg_mem);
6455 %}
6456 
6457 // Load Integer (32 bit unsigned) into long
6458 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6459 %{
6460   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6461   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6462 
6463   ins_cost(4 * INSN_COST);
6464   format %{ "ldrw  $dst, $mem\t# int" %}
6465 
6466   ins_encode(aarch64_enc_ldrw(dst, mem));
6467 
6468   ins_pipe(iload_reg_mem);
6469 %}
6470 
6471 // Load Long (64 bit signed)
6472 instruct loadL(iRegLNoSp dst, memory mem)
6473 %{
6474   match(Set dst (LoadL mem));
6475   predicate(!needs_acquiring_load(n));
6476 
6477   ins_cost(4 * INSN_COST);
6478   format %{ "ldr  $dst, $mem\t# int" %}
6479 
6480   ins_encode(aarch64_enc_ldr(dst, mem));
6481 
6482   ins_pipe(iload_reg_mem);
6483 %}
6484 
6485 // Load Range
6486 instruct loadRange(iRegINoSp dst, memory mem)
6487 %{
6488   match(Set dst (LoadRange mem));
6489 
6490   ins_cost(4 * INSN_COST);
6491   format %{ "ldrw  $dst, $mem\t# range" %}
6492 
6493   ins_encode(aarch64_enc_ldrw(dst, mem));
6494 
6495   ins_pipe(iload_reg_mem);
6496 %}
6497 
6498 // Load Pointer
6499 instruct loadP(iRegPNoSp dst, memory mem)
6500 %{
6501   match(Set dst (LoadP mem));
6502   predicate(!needs_acquiring_load(n));
6503 
6504   ins_cost(4 * INSN_COST);
6505   format %{ "ldr  $dst, $mem\t# ptr" %}
6506 
6507   ins_encode(aarch64_enc_ldr(dst, mem));
6508 
6509   ins_pipe(iload_reg_mem);
6510 %}
6511 
6512 // Load Compressed Pointer
6513 instruct loadN(iRegNNoSp dst, memory mem)
6514 %{
6515   match(Set dst (LoadN mem));
6516   predicate(!needs_acquiring_load(n));
6517 
6518   ins_cost(4 * INSN_COST);
6519   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6520 
6521   ins_encode(aarch64_enc_ldrw(dst, mem));
6522 
6523   ins_pipe(iload_reg_mem);
6524 %}
6525 
6526 // Load Klass Pointer
6527 instruct loadKlass(iRegPNoSp dst, memory mem)
6528 %{
6529   match(Set dst (LoadKlass mem));
6530   predicate(!needs_acquiring_load(n));
6531 
6532   ins_cost(4 * INSN_COST);
6533   format %{ "ldr  $dst, $mem\t# class" %}
6534 
6535   ins_encode(aarch64_enc_ldr(dst, mem));
6536 
6537   ins_pipe(iload_reg_mem);
6538 %}
6539 
6540 // Load Narrow Klass Pointer
6541 instruct loadNKlass(iRegNNoSp dst, memory mem)
6542 %{
6543   match(Set dst (LoadNKlass mem));
6544   predicate(!needs_acquiring_load(n));
6545 
6546   ins_cost(4 * INSN_COST);
6547   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6548 
6549   ins_encode(aarch64_enc_ldrw(dst, mem));
6550 
6551   ins_pipe(iload_reg_mem);
6552 %}
6553 
6554 // Load Float
6555 instruct loadF(vRegF dst, memory mem)
6556 %{
6557   match(Set dst (LoadF mem));
6558   predicate(!needs_acquiring_load(n));
6559 
6560   ins_cost(4 * INSN_COST);
6561   format %{ "ldrs  $dst, $mem\t# float" %}
6562 
6563   ins_encode( aarch64_enc_ldrs(dst, mem) );
6564 
6565   ins_pipe(pipe_class_memory);
6566 %}
6567 
6568 // Load Double
6569 instruct loadD(vRegD dst, memory mem)
6570 %{
6571   match(Set dst (LoadD mem));
6572   predicate(!needs_acquiring_load(n));
6573 
6574   ins_cost(4 * INSN_COST);
6575   format %{ "ldrd  $dst, $mem\t# double" %}
6576 
6577   ins_encode( aarch64_enc_ldrd(dst, mem) );
6578 
6579   ins_pipe(pipe_class_memory);
6580 %}
6581 
6582 
6583 // Load Int Constant
6584 instruct loadConI(iRegINoSp dst, immI src)
6585 %{
6586   match(Set dst src);
6587 
6588   ins_cost(INSN_COST);
6589   format %{ "mov $dst, $src\t# int" %}
6590 
6591   ins_encode( aarch64_enc_movw_imm(dst, src) );
6592 
6593   ins_pipe(ialu_imm);
6594 %}
6595 
6596 // Load Long Constant
6597 instruct loadConL(iRegLNoSp dst, immL src)
6598 %{
6599   match(Set dst src);
6600 
6601   ins_cost(INSN_COST);
6602   format %{ "mov $dst, $src\t# long" %}
6603 
6604   ins_encode( aarch64_enc_mov_imm(dst, src) );
6605 
6606   ins_pipe(ialu_imm);
6607 %}
6608 
6609 // Load Pointer Constant
6610 
6611 instruct loadConP(iRegPNoSp dst, immP con)
6612 %{
6613   match(Set dst con);
6614 
6615   ins_cost(INSN_COST * 4);
6616   format %{
6617     "mov  $dst, $con\t# ptr\n\t"
6618   %}
6619 
6620   ins_encode(aarch64_enc_mov_p(dst, con));
6621 
6622   ins_pipe(ialu_imm);
6623 %}
6624 
6625 // Load Null Pointer Constant
6626 
6627 instruct loadConP0(iRegPNoSp dst, immP0 con)
6628 %{
6629   match(Set dst con);
6630 
6631   ins_cost(INSN_COST);
6632   format %{ "mov  $dst, $con\t# NULL ptr" %}
6633 
6634   ins_encode(aarch64_enc_mov_p0(dst, con));
6635 
6636   ins_pipe(ialu_imm);
6637 %}
6638 
6639 // Load Pointer Constant One
6640 
6641 instruct loadConP1(iRegPNoSp dst, immP_1 con)
6642 %{
6643   match(Set dst con);
6644 
6645   ins_cost(INSN_COST);
6646   format %{ "mov  $dst, $con\t# NULL ptr" %}
6647 
6648   ins_encode(aarch64_enc_mov_p1(dst, con));
6649 
6650   ins_pipe(ialu_imm);
6651 %}
6652 
6653 // Load Poll Page Constant
6654 
6655 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
6656 %{
6657   match(Set dst con);
6658 
6659   ins_cost(INSN_COST);
6660   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
6661 
6662   ins_encode(aarch64_enc_mov_poll_page(dst, con));
6663 
6664   ins_pipe(ialu_imm);
6665 %}
6666 
6667 // Load Byte Map Base Constant
6668 
6669 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
6670 %{
6671   match(Set dst con);
6672 
6673   ins_cost(INSN_COST);
6674   format %{ "adr  $dst, $con\t# Byte Map Base" %}
6675 
6676   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
6677 
6678   ins_pipe(ialu_imm);
6679 %}
6680 
6681 // Load Narrow Pointer Constant
6682 
6683 instruct loadConN(iRegNNoSp dst, immN con)
6684 %{
6685   match(Set dst con);
6686 
6687   ins_cost(INSN_COST * 4);
6688   format %{ "mov  $dst, $con\t# compressed ptr" %}
6689 
6690   ins_encode(aarch64_enc_mov_n(dst, con));
6691 
6692   ins_pipe(ialu_imm);
6693 %}
6694 
6695 // Load Narrow Null Pointer Constant
6696 
6697 instruct loadConN0(iRegNNoSp dst, immN0 con)
6698 %{
6699   match(Set dst con);
6700 
6701   ins_cost(INSN_COST);
6702   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
6703 
6704   ins_encode(aarch64_enc_mov_n0(dst, con));
6705 
6706   ins_pipe(ialu_imm);
6707 %}
6708 
6709 // Load Narrow Klass Constant
6710 
6711 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
6712 %{
6713   match(Set dst con);
6714 
6715   ins_cost(INSN_COST);
6716   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
6717 
6718   ins_encode(aarch64_enc_mov_nk(dst, con));
6719 
6720   ins_pipe(ialu_imm);
6721 %}
6722 
6723 // Load Packed Float Constant
6724 
6725 instruct loadConF_packed(vRegF dst, immFPacked con) %{
6726   match(Set dst con);
6727   ins_cost(INSN_COST * 4);
6728   format %{ "fmovs  $dst, $con"%}
6729   ins_encode %{
6730     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
6731   %}
6732 
6733   ins_pipe(fp_imm_s);
6734 %}
6735 
6736 // Load Float Constant
6737 
6738 instruct loadConF(vRegF dst, immF con) %{
6739   match(Set dst con);
6740 
6741   ins_cost(INSN_COST * 4);
6742 
6743   format %{
6744     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6745   %}
6746 
6747   ins_encode %{
6748     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6749   %}
6750 
6751   ins_pipe(fp_load_constant_s);
6752 %}
6753 
6754 // Load Packed Double Constant
6755 
6756 instruct loadConD_packed(vRegD dst, immDPacked con) %{
6757   match(Set dst con);
6758   ins_cost(INSN_COST);
6759   format %{ "fmovd  $dst, $con"%}
6760   ins_encode %{
6761     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
6762   %}
6763 
6764   ins_pipe(fp_imm_d);
6765 %}
6766 
6767 // Load Double Constant
6768 
6769 instruct loadConD(vRegD dst, immD con) %{
6770   match(Set dst con);
6771 
6772   ins_cost(INSN_COST * 5);
6773   format %{
6774     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6775   %}
6776 
6777   ins_encode %{
6778     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
6779   %}
6780 
6781   ins_pipe(fp_load_constant_d);
6782 %}
6783 
6784 // Store Instructions
6785 
6786 // Store CMS card-mark Immediate
6787 instruct storeimmCM0(immI0 zero, memory mem)
6788 %{
6789   match(Set mem (StoreCM mem zero));
6790   predicate(unnecessary_storestore(n));
6791 
6792   ins_cost(INSN_COST);
6793   format %{ "strb zr, $mem\t# byte" %}
6794 
6795   ins_encode(aarch64_enc_strb0(mem));
6796 
6797   ins_pipe(istore_mem);
6798 %}
6799 
6800 // Store CMS card-mark Immediate with intervening StoreStore
6801 // needed when using CMS with no conditional card marking
6802 instruct storeimmCM0_ordered(immI0 zero, memory mem)
6803 %{
6804   match(Set mem (StoreCM mem zero));
6805 
6806   ins_cost(INSN_COST * 2);
6807   format %{ "dmb ishst"
6808       "\n\tstrb zr, $mem\t# byte" %}
6809 
6810   ins_encode(aarch64_enc_strb0_ordered(mem));
6811 
6812   ins_pipe(istore_mem);
6813 %}
6814 
6815 // Store Byte
6816 instruct storeB(iRegIorL2I src, memory mem)
6817 %{
6818   match(Set mem (StoreB mem src));
6819   predicate(!needs_releasing_store(n));
6820 
6821   ins_cost(INSN_COST);
6822   format %{ "strb  $src, $mem\t# byte" %}
6823 
6824   ins_encode(aarch64_enc_strb(src, mem));
6825 
6826   ins_pipe(istore_reg_mem);
6827 %}
6828 
6829 
6830 instruct storeimmB0(immI0 zero, memory mem)
6831 %{
6832   match(Set mem (StoreB mem zero));
6833   predicate(!needs_releasing_store(n));
6834 
6835   ins_cost(INSN_COST);
6836   format %{ "strb zr, $mem\t# byte" %}
6837 
6838   ins_encode(aarch64_enc_strb0(mem));
6839 
6840   ins_pipe(istore_mem);
6841 %}
6842 
6843 // Store Char/Short
6844 instruct storeC(iRegIorL2I src, memory mem)
6845 %{
6846   match(Set mem (StoreC mem src));
6847   predicate(!needs_releasing_store(n));
6848 
6849   ins_cost(INSN_COST);
6850   format %{ "strh  $src, $mem\t# short" %}
6851 
6852   ins_encode(aarch64_enc_strh(src, mem));
6853 
6854   ins_pipe(istore_reg_mem);
6855 %}
6856 
6857 instruct storeimmC0(immI0 zero, memory mem)
6858 %{
6859   match(Set mem (StoreC mem zero));
6860   predicate(!needs_releasing_store(n));
6861 
6862   ins_cost(INSN_COST);
6863   format %{ "strh  zr, $mem\t# short" %}
6864 
6865   ins_encode(aarch64_enc_strh0(mem));
6866 
6867   ins_pipe(istore_mem);
6868 %}
6869 
6870 // Store Integer
6871 
6872 instruct storeI(iRegIorL2I src, memory mem)
6873 %{
6874   match(Set mem(StoreI mem src));
6875   predicate(!needs_releasing_store(n));
6876 
6877   ins_cost(INSN_COST);
6878   format %{ "strw  $src, $mem\t# int" %}
6879 
6880   ins_encode(aarch64_enc_strw(src, mem));
6881 
6882   ins_pipe(istore_reg_mem);
6883 %}
6884 
6885 instruct storeimmI0(immI0 zero, memory mem)
6886 %{
6887   match(Set mem(StoreI mem zero));
6888   predicate(!needs_releasing_store(n));
6889 
6890   ins_cost(INSN_COST);
6891   format %{ "strw  zr, $mem\t# int" %}
6892 
6893   ins_encode(aarch64_enc_strw0(mem));
6894 
6895   ins_pipe(istore_mem);
6896 %}
6897 
6898 // Store Long (64 bit signed)
6899 instruct storeL(iRegL src, memory mem)
6900 %{
6901   match(Set mem (StoreL mem src));
6902   predicate(!needs_releasing_store(n));
6903 
6904   ins_cost(INSN_COST);
6905   format %{ "str  $src, $mem\t# int" %}
6906 
6907   ins_encode(aarch64_enc_str(src, mem));
6908 
6909   ins_pipe(istore_reg_mem);
6910 %}
6911 
6912 // Store Long (64 bit signed)
6913 instruct storeimmL0(immL0 zero, memory mem)
6914 %{
6915   match(Set mem (StoreL mem zero));
6916   predicate(!needs_releasing_store(n));
6917 
6918   ins_cost(INSN_COST);
6919   format %{ "str  zr, $mem\t# int" %}
6920 
6921   ins_encode(aarch64_enc_str0(mem));
6922 
6923   ins_pipe(istore_mem);
6924 %}
6925 
6926 // Store Pointer
6927 instruct storeP(iRegP src, memory mem)
6928 %{
6929   match(Set mem (StoreP mem src));
6930   predicate(!needs_releasing_store(n));
6931 
6932   ins_cost(INSN_COST);
6933   format %{ "str  $src, $mem\t# ptr" %}
6934 
6935   ins_encode(aarch64_enc_str(src, mem));
6936 
6937   ins_pipe(istore_reg_mem);
6938 %}
6939 
6940 // Store Pointer
6941 instruct storeimmP0(immP0 zero, memory mem)
6942 %{
6943   match(Set mem (StoreP mem zero));
6944   predicate(!needs_releasing_store(n));
6945 
6946   ins_cost(INSN_COST);
6947   format %{ "str zr, $mem\t# ptr" %}
6948 
6949   ins_encode(aarch64_enc_str0(mem));
6950 
6951   ins_pipe(istore_mem);
6952 %}
6953 
6954 // Store Compressed Pointer
6955 instruct storeN(iRegN src, memory mem)
6956 %{
6957   match(Set mem (StoreN mem src));
6958   predicate(!needs_releasing_store(n));
6959 
6960   ins_cost(INSN_COST);
6961   format %{ "strw  $src, $mem\t# compressed ptr" %}
6962 
6963   ins_encode(aarch64_enc_strw(src, mem));
6964 
6965   ins_pipe(istore_reg_mem);
6966 %}
6967 
6968 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
6969 %{
6970   match(Set mem (StoreN mem zero));
6971   predicate(Universe::narrow_oop_base() == NULL &&
6972             Universe::narrow_klass_base() == NULL  &&
6973             (!needs_releasing_store(n)));
6974 
6975   ins_cost(INSN_COST);
6976   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
6977 
6978   ins_encode(aarch64_enc_strw(heapbase, mem));
6979 
6980   ins_pipe(istore_reg_mem);
6981 %}
6982 
6983 // Store Float
6984 instruct storeF(vRegF src, memory mem)
6985 %{
6986   match(Set mem (StoreF mem src));
6987   predicate(!needs_releasing_store(n));
6988 
6989   ins_cost(INSN_COST);
6990   format %{ "strs  $src, $mem\t# float" %}
6991 
6992   ins_encode( aarch64_enc_strs(src, mem) );
6993 
6994   ins_pipe(pipe_class_memory);
6995 %}
6996 
6997 // TODO
6998 // implement storeImmF0 and storeFImmPacked
6999 
7000 // Store Double
7001 instruct storeD(vRegD src, memory mem)
7002 %{
7003   match(Set mem (StoreD mem src));
7004   predicate(!needs_releasing_store(n));
7005 
7006   ins_cost(INSN_COST);
7007   format %{ "strd  $src, $mem\t# double" %}
7008 
7009   ins_encode( aarch64_enc_strd(src, mem) );
7010 
7011   ins_pipe(pipe_class_memory);
7012 %}
7013 
7014 // Store Compressed Klass Pointer
7015 instruct storeNKlass(iRegN src, memory mem)
7016 %{
7017   predicate(!needs_releasing_store(n));
7018   match(Set mem (StoreNKlass mem src));
7019 
7020   ins_cost(INSN_COST);
7021   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7022 
7023   ins_encode(aarch64_enc_strw(src, mem));
7024 
7025   ins_pipe(istore_reg_mem);
7026 %}
7027 
7028 // TODO
7029 // implement storeImmD0 and storeDImmPacked
7030 
7031 // prefetch instructions
7032 // Must be safe to execute with invalid address (cannot fault).
7033 
7034 instruct prefetchr( memory mem ) %{
7035   match(PrefetchRead mem);
7036 
7037   ins_cost(INSN_COST);
7038   format %{ "prfm $mem, PLDL1KEEP\t# Prefetch into level 1 cache read keep" %}
7039 
7040   ins_encode( aarch64_enc_prefetchr(mem) );
7041 
7042   ins_pipe(iload_prefetch);
7043 %}
7044 
7045 instruct prefetchw( memory mem ) %{
7046   match(PrefetchAllocation mem);
7047 
7048   ins_cost(INSN_COST);
7049   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7050 
7051   ins_encode( aarch64_enc_prefetchw(mem) );
7052 
7053   ins_pipe(iload_prefetch);
7054 %}
7055 
7056 instruct prefetchnta( memory mem ) %{
7057   match(PrefetchWrite mem);
7058 
7059   ins_cost(INSN_COST);
7060   format %{ "prfm $mem, PSTL1STRM\t# Prefetch into level 1 cache write streaming" %}
7061 
7062   ins_encode( aarch64_enc_prefetchnta(mem) );
7063 
7064   ins_pipe(iload_prefetch);
7065 %}
7066 
7067 //  ---------------- volatile loads and stores ----------------
7068 
7069 // Load Byte (8 bit signed)
7070 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7071 %{
7072   match(Set dst (LoadB mem));
7073 
7074   ins_cost(VOLATILE_REF_COST);
7075   format %{ "ldarsb  $dst, $mem\t# byte" %}
7076 
7077   ins_encode(aarch64_enc_ldarsb(dst, mem));
7078 
7079   ins_pipe(pipe_serial);
7080 %}
7081 
7082 // Load Byte (8 bit signed) into long
7083 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7084 %{
7085   match(Set dst (ConvI2L (LoadB mem)));
7086 
7087   ins_cost(VOLATILE_REF_COST);
7088   format %{ "ldarsb  $dst, $mem\t# byte" %}
7089 
7090   ins_encode(aarch64_enc_ldarsb(dst, mem));
7091 
7092   ins_pipe(pipe_serial);
7093 %}
7094 
7095 // Load Byte (8 bit unsigned)
7096 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7097 %{
7098   match(Set dst (LoadUB mem));
7099 
7100   ins_cost(VOLATILE_REF_COST);
7101   format %{ "ldarb  $dst, $mem\t# byte" %}
7102 
7103   ins_encode(aarch64_enc_ldarb(dst, mem));
7104 
7105   ins_pipe(pipe_serial);
7106 %}
7107 
7108 // Load Byte (8 bit unsigned) into long
7109 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7110 %{
7111   match(Set dst (ConvI2L (LoadUB mem)));
7112 
7113   ins_cost(VOLATILE_REF_COST);
7114   format %{ "ldarb  $dst, $mem\t# byte" %}
7115 
7116   ins_encode(aarch64_enc_ldarb(dst, mem));
7117 
7118   ins_pipe(pipe_serial);
7119 %}
7120 
7121 // Load Short (16 bit signed)
7122 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7123 %{
7124   match(Set dst (LoadS mem));
7125 
7126   ins_cost(VOLATILE_REF_COST);
7127   format %{ "ldarshw  $dst, $mem\t# short" %}
7128 
7129   ins_encode(aarch64_enc_ldarshw(dst, mem));
7130 
7131   ins_pipe(pipe_serial);
7132 %}
7133 
7134 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7135 %{
7136   match(Set dst (LoadUS mem));
7137 
7138   ins_cost(VOLATILE_REF_COST);
7139   format %{ "ldarhw  $dst, $mem\t# short" %}
7140 
7141   ins_encode(aarch64_enc_ldarhw(dst, mem));
7142 
7143   ins_pipe(pipe_serial);
7144 %}
7145 
7146 // Load Short/Char (16 bit unsigned) into long
7147 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7148 %{
7149   match(Set dst (ConvI2L (LoadUS mem)));
7150 
7151   ins_cost(VOLATILE_REF_COST);
7152   format %{ "ldarh  $dst, $mem\t# short" %}
7153 
7154   ins_encode(aarch64_enc_ldarh(dst, mem));
7155 
7156   ins_pipe(pipe_serial);
7157 %}
7158 
7159 // Load Short/Char (16 bit signed) into long
7160 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7161 %{
7162   match(Set dst (ConvI2L (LoadS mem)));
7163 
7164   ins_cost(VOLATILE_REF_COST);
7165   format %{ "ldarh  $dst, $mem\t# short" %}
7166 
7167   ins_encode(aarch64_enc_ldarsh(dst, mem));
7168 
7169   ins_pipe(pipe_serial);
7170 %}
7171 
7172 // Load Integer (32 bit signed)
7173 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7174 %{
7175   match(Set dst (LoadI mem));
7176 
7177   ins_cost(VOLATILE_REF_COST);
7178   format %{ "ldarw  $dst, $mem\t# int" %}
7179 
7180   ins_encode(aarch64_enc_ldarw(dst, mem));
7181 
7182   ins_pipe(pipe_serial);
7183 %}
7184 
7185 // Load Integer (32 bit unsigned) into long
7186 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7187 %{
7188   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7189 
7190   ins_cost(VOLATILE_REF_COST);
7191   format %{ "ldarw  $dst, $mem\t# int" %}
7192 
7193   ins_encode(aarch64_enc_ldarw(dst, mem));
7194 
7195   ins_pipe(pipe_serial);
7196 %}
7197 
7198 // Load Long (64 bit signed)
7199 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7200 %{
7201   match(Set dst (LoadL mem));
7202 
7203   ins_cost(VOLATILE_REF_COST);
7204   format %{ "ldar  $dst, $mem\t# int" %}
7205 
7206   ins_encode(aarch64_enc_ldar(dst, mem));
7207 
7208   ins_pipe(pipe_serial);
7209 %}
7210 
7211 // Load Pointer
7212 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7213 %{
7214   match(Set dst (LoadP mem));
7215 
7216   ins_cost(VOLATILE_REF_COST);
7217   format %{ "ldar  $dst, $mem\t# ptr" %}
7218 
7219   ins_encode(aarch64_enc_ldar(dst, mem));
7220 
7221   ins_pipe(pipe_serial);
7222 %}
7223 
7224 // Load Compressed Pointer
7225 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7226 %{
7227   match(Set dst (LoadN mem));
7228 
7229   ins_cost(VOLATILE_REF_COST);
7230   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7231 
7232   ins_encode(aarch64_enc_ldarw(dst, mem));
7233 
7234   ins_pipe(pipe_serial);
7235 %}
7236 
7237 // Load Float
7238 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7239 %{
7240   match(Set dst (LoadF mem));
7241 
7242   ins_cost(VOLATILE_REF_COST);
7243   format %{ "ldars  $dst, $mem\t# float" %}
7244 
7245   ins_encode( aarch64_enc_fldars(dst, mem) );
7246 
7247   ins_pipe(pipe_serial);
7248 %}
7249 
7250 // Load Double
7251 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7252 %{
7253   match(Set dst (LoadD mem));
7254 
7255   ins_cost(VOLATILE_REF_COST);
7256   format %{ "ldard  $dst, $mem\t# double" %}
7257 
7258   ins_encode( aarch64_enc_fldard(dst, mem) );
7259 
7260   ins_pipe(pipe_serial);
7261 %}
7262 
7263 // Store Byte
7264 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7265 %{
7266   match(Set mem (StoreB mem src));
7267 
7268   ins_cost(VOLATILE_REF_COST);
7269   format %{ "stlrb  $src, $mem\t# byte" %}
7270 
7271   ins_encode(aarch64_enc_stlrb(src, mem));
7272 
7273   ins_pipe(pipe_class_memory);
7274 %}
7275 
7276 // Store Char/Short
7277 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7278 %{
7279   match(Set mem (StoreC mem src));
7280 
7281   ins_cost(VOLATILE_REF_COST);
7282   format %{ "stlrh  $src, $mem\t# short" %}
7283 
7284   ins_encode(aarch64_enc_stlrh(src, mem));
7285 
7286   ins_pipe(pipe_class_memory);
7287 %}
7288 
7289 // Store Integer
7290 
7291 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7292 %{
7293   match(Set mem(StoreI mem src));
7294 
7295   ins_cost(VOLATILE_REF_COST);
7296   format %{ "stlrw  $src, $mem\t# int" %}
7297 
7298   ins_encode(aarch64_enc_stlrw(src, mem));
7299 
7300   ins_pipe(pipe_class_memory);
7301 %}
7302 
7303 // Store Long (64 bit signed)
7304 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7305 %{
7306   match(Set mem (StoreL mem src));
7307 
7308   ins_cost(VOLATILE_REF_COST);
7309   format %{ "stlr  $src, $mem\t# int" %}
7310 
7311   ins_encode(aarch64_enc_stlr(src, mem));
7312 
7313   ins_pipe(pipe_class_memory);
7314 %}
7315 
7316 // Store Pointer
7317 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7318 %{
7319   match(Set mem (StoreP mem src));
7320 
7321   ins_cost(VOLATILE_REF_COST);
7322   format %{ "stlr  $src, $mem\t# ptr" %}
7323 
7324   ins_encode(aarch64_enc_stlr(src, mem));
7325 
7326   ins_pipe(pipe_class_memory);
7327 %}
7328 
7329 // Store Compressed Pointer
7330 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7331 %{
7332   match(Set mem (StoreN mem src));
7333 
7334   ins_cost(VOLATILE_REF_COST);
7335   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7336 
7337   ins_encode(aarch64_enc_stlrw(src, mem));
7338 
7339   ins_pipe(pipe_class_memory);
7340 %}
7341 
7342 // Store Float
7343 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7344 %{
7345   match(Set mem (StoreF mem src));
7346 
7347   ins_cost(VOLATILE_REF_COST);
7348   format %{ "stlrs  $src, $mem\t# float" %}
7349 
7350   ins_encode( aarch64_enc_fstlrs(src, mem) );
7351 
7352   ins_pipe(pipe_class_memory);
7353 %}
7354 
7355 // TODO
7356 // implement storeImmF0 and storeFImmPacked
7357 
7358 // Store Double
7359 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7360 %{
7361   match(Set mem (StoreD mem src));
7362 
7363   ins_cost(VOLATILE_REF_COST);
7364   format %{ "stlrd  $src, $mem\t# double" %}
7365 
7366   ins_encode( aarch64_enc_fstlrd(src, mem) );
7367 
7368   ins_pipe(pipe_class_memory);
7369 %}
7370 
7371 //  ---------------- end of volatile loads and stores ----------------
7372 
7373 // ============================================================================
7374 // BSWAP Instructions
7375 
7376 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7377   match(Set dst (ReverseBytesI src));
7378 
7379   ins_cost(INSN_COST);
7380   format %{ "revw  $dst, $src" %}
7381 
7382   ins_encode %{
7383     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7384   %}
7385 
7386   ins_pipe(ialu_reg);
7387 %}
7388 
7389 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7390   match(Set dst (ReverseBytesL src));
7391 
7392   ins_cost(INSN_COST);
7393   format %{ "rev  $dst, $src" %}
7394 
7395   ins_encode %{
7396     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7397   %}
7398 
7399   ins_pipe(ialu_reg);
7400 %}
7401 
7402 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7403   match(Set dst (ReverseBytesUS src));
7404 
7405   ins_cost(INSN_COST);
7406   format %{ "rev16w  $dst, $src" %}
7407 
7408   ins_encode %{
7409     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7410   %}
7411 
7412   ins_pipe(ialu_reg);
7413 %}
7414 
7415 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7416   match(Set dst (ReverseBytesS src));
7417 
7418   ins_cost(INSN_COST);
7419   format %{ "rev16w  $dst, $src\n\t"
7420             "sbfmw $dst, $dst, #0, #15" %}
7421 
7422   ins_encode %{
7423     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7424     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7425   %}
7426 
7427   ins_pipe(ialu_reg);
7428 %}
7429 
7430 // ============================================================================
7431 // Zero Count Instructions
7432 
7433 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7434   match(Set dst (CountLeadingZerosI src));
7435 
7436   ins_cost(INSN_COST);
7437   format %{ "clzw  $dst, $src" %}
7438   ins_encode %{
7439     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7440   %}
7441 
7442   ins_pipe(ialu_reg);
7443 %}
7444 
7445 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7446   match(Set dst (CountLeadingZerosL src));
7447 
7448   ins_cost(INSN_COST);
7449   format %{ "clz   $dst, $src" %}
7450   ins_encode %{
7451     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7452   %}
7453 
7454   ins_pipe(ialu_reg);
7455 %}
7456 
7457 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7458   match(Set dst (CountTrailingZerosI src));
7459 
7460   ins_cost(INSN_COST * 2);
7461   format %{ "rbitw  $dst, $src\n\t"
7462             "clzw   $dst, $dst" %}
7463   ins_encode %{
7464     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7465     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7466   %}
7467 
7468   ins_pipe(ialu_reg);
7469 %}
7470 
7471 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7472   match(Set dst (CountTrailingZerosL src));
7473 
7474   ins_cost(INSN_COST * 2);
7475   format %{ "rbit   $dst, $src\n\t"
7476             "clz    $dst, $dst" %}
7477   ins_encode %{
7478     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7479     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7480   %}
7481 
7482   ins_pipe(ialu_reg);
7483 %}
7484 
7485 //---------- Population Count Instructions -------------------------------------
7486 //
7487 
7488 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
7489   predicate(UsePopCountInstruction);
7490   match(Set dst (PopCountI src));
7491   effect(TEMP tmp);
7492   ins_cost(INSN_COST * 13);
7493 
7494   format %{ "movw   $src, $src\n\t"
7495             "mov    $tmp, $src\t# vector (1D)\n\t"
7496             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7497             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7498             "mov    $dst, $tmp\t# vector (1D)" %}
7499   ins_encode %{
7500     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
7501     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7502     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7503     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7504     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7505   %}
7506 
7507   ins_pipe(pipe_class_default);
7508 %}
7509 
7510 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
7511   predicate(UsePopCountInstruction);
7512   match(Set dst (PopCountI (LoadI mem)));
7513   effect(TEMP tmp);
7514   ins_cost(INSN_COST * 13);
7515 
7516   format %{ "ldrs   $tmp, $mem\n\t"
7517             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7518             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7519             "mov    $dst, $tmp\t# vector (1D)" %}
7520   ins_encode %{
7521     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7522     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
7523                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7524     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7525     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7526     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7527   %}
7528 
7529   ins_pipe(pipe_class_default);
7530 %}
7531 
7532 // Note: Long.bitCount(long) returns an int.
7533 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
7534   predicate(UsePopCountInstruction);
7535   match(Set dst (PopCountL src));
7536   effect(TEMP tmp);
7537   ins_cost(INSN_COST * 13);
7538 
7539   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
7540             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7541             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7542             "mov    $dst, $tmp\t# vector (1D)" %}
7543   ins_encode %{
7544     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7545     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7546     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7547     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7548   %}
7549 
7550   ins_pipe(pipe_class_default);
7551 %}
7552 
7553 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
7554   predicate(UsePopCountInstruction);
7555   match(Set dst (PopCountL (LoadL mem)));
7556   effect(TEMP tmp);
7557   ins_cost(INSN_COST * 13);
7558 
7559   format %{ "ldrd   $tmp, $mem\n\t"
7560             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7561             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7562             "mov    $dst, $tmp\t# vector (1D)" %}
7563   ins_encode %{
7564     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7565     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
7566                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7567     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7568     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7569     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7570   %}
7571 
7572   ins_pipe(pipe_class_default);
7573 %}
7574 
7575 // ============================================================================
7576 // MemBar Instruction
7577 
7578 instruct load_fence() %{
7579   match(LoadFence);
7580   ins_cost(VOLATILE_REF_COST);
7581 
7582   format %{ "load_fence" %}
7583 
7584   ins_encode %{
7585     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7586   %}
7587   ins_pipe(pipe_serial);
7588 %}
7589 
7590 instruct unnecessary_membar_acquire() %{
7591   predicate(unnecessary_acquire(n));
7592   match(MemBarAcquire);
7593   ins_cost(0);
7594 
7595   format %{ "membar_acquire (elided)" %}
7596 
7597   ins_encode %{
7598     __ block_comment("membar_acquire (elided)");
7599   %}
7600 
7601   ins_pipe(pipe_class_empty);
7602 %}
7603 
7604 instruct membar_acquire() %{
7605   match(MemBarAcquire);
7606   ins_cost(VOLATILE_REF_COST);
7607 
7608   format %{ "membar_acquire" %}
7609 
7610   ins_encode %{
7611     __ block_comment("membar_acquire");
7612     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7613   %}
7614 
7615   ins_pipe(pipe_serial);
7616 %}
7617 
7618 
7619 instruct membar_acquire_lock() %{
7620   match(MemBarAcquireLock);
7621   ins_cost(VOLATILE_REF_COST);
7622 
7623   format %{ "membar_acquire_lock (elided)" %}
7624 
7625   ins_encode %{
7626     __ block_comment("membar_acquire_lock (elided)");
7627   %}
7628 
7629   ins_pipe(pipe_serial);
7630 %}
7631 
7632 instruct store_fence() %{
7633   match(StoreFence);
7634   ins_cost(VOLATILE_REF_COST);
7635 
7636   format %{ "store_fence" %}
7637 
7638   ins_encode %{
7639     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7640   %}
7641   ins_pipe(pipe_serial);
7642 %}
7643 
7644 instruct unnecessary_membar_release() %{
7645   predicate(unnecessary_release(n));
7646   match(MemBarRelease);
7647   ins_cost(0);
7648 
7649   format %{ "membar_release (elided)" %}
7650 
7651   ins_encode %{
7652     __ block_comment("membar_release (elided)");
7653   %}
7654   ins_pipe(pipe_serial);
7655 %}
7656 
7657 instruct membar_release() %{
7658   match(MemBarRelease);
7659   ins_cost(VOLATILE_REF_COST);
7660 
7661   format %{ "membar_release" %}
7662 
7663   ins_encode %{
7664     __ block_comment("membar_release");
7665     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7666   %}
7667   ins_pipe(pipe_serial);
7668 %}
7669 
7670 instruct membar_storestore() %{
7671   match(MemBarStoreStore);
7672   ins_cost(VOLATILE_REF_COST);
7673 
7674   format %{ "MEMBAR-store-store" %}
7675 
7676   ins_encode %{
7677     __ membar(Assembler::StoreStore);
7678   %}
7679   ins_pipe(pipe_serial);
7680 %}
7681 
7682 instruct membar_release_lock() %{
7683   match(MemBarReleaseLock);
7684   ins_cost(VOLATILE_REF_COST);
7685 
7686   format %{ "membar_release_lock (elided)" %}
7687 
7688   ins_encode %{
7689     __ block_comment("membar_release_lock (elided)");
7690   %}
7691 
7692   ins_pipe(pipe_serial);
7693 %}
7694 
7695 instruct unnecessary_membar_volatile() %{
7696   predicate(unnecessary_volatile(n));
7697   match(MemBarVolatile);
7698   ins_cost(0);
7699 
7700   format %{ "membar_volatile (elided)" %}
7701 
7702   ins_encode %{
7703     __ block_comment("membar_volatile (elided)");
7704   %}
7705 
7706   ins_pipe(pipe_serial);
7707 %}
7708 
7709 instruct membar_volatile() %{
7710   match(MemBarVolatile);
7711   ins_cost(VOLATILE_REF_COST*100);
7712 
7713   format %{ "membar_volatile" %}
7714 
7715   ins_encode %{
7716     __ block_comment("membar_volatile");
7717     __ membar(Assembler::StoreLoad);
7718     %}
7719 
7720   ins_pipe(pipe_serial);
7721 %}
7722 
7723 // ============================================================================
7724 // Cast/Convert Instructions
7725 
7726 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7727   match(Set dst (CastX2P src));
7728 
7729   ins_cost(INSN_COST);
7730   format %{ "mov $dst, $src\t# long -> ptr" %}
7731 
7732   ins_encode %{
7733     if ($dst$$reg != $src$$reg) {
7734       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7735     }
7736   %}
7737 
7738   ins_pipe(ialu_reg);
7739 %}
7740 
7741 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7742   match(Set dst (CastP2X src));
7743 
7744   ins_cost(INSN_COST);
7745   format %{ "mov $dst, $src\t# ptr -> long" %}
7746 
7747   ins_encode %{
7748     if ($dst$$reg != $src$$reg) {
7749       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7750     }
7751   %}
7752 
7753   ins_pipe(ialu_reg);
7754 %}
7755 
7756 // Convert oop into int for vectors alignment masking
7757 instruct convP2I(iRegINoSp dst, iRegP src) %{
7758   match(Set dst (ConvL2I (CastP2X src)));
7759 
7760   ins_cost(INSN_COST);
7761   format %{ "movw $dst, $src\t# ptr -> int" %}
7762   ins_encode %{
7763     __ movw($dst$$Register, $src$$Register);
7764   %}
7765 
7766   ins_pipe(ialu_reg);
7767 %}
7768 
7769 // Convert compressed oop into int for vectors alignment masking
7770 // in case of 32bit oops (heap < 4Gb).
7771 instruct convN2I(iRegINoSp dst, iRegN src)
7772 %{
7773   predicate(Universe::narrow_oop_shift() == 0);
7774   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7775 
7776   ins_cost(INSN_COST);
7777   format %{ "mov dst, $src\t# compressed ptr -> int" %}
7778   ins_encode %{
7779     __ movw($dst$$Register, $src$$Register);
7780   %}
7781 
7782   ins_pipe(ialu_reg);
7783 %}
7784 
7785 
7786 // Convert oop pointer into compressed form
7787 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7788   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7789   match(Set dst (EncodeP src));
7790   effect(KILL cr);
7791   ins_cost(INSN_COST * 3);
7792   format %{ "encode_heap_oop $dst, $src" %}
7793   ins_encode %{
7794     Register s = $src$$Register;
7795     Register d = $dst$$Register;
7796     __ encode_heap_oop(d, s);
7797   %}
7798   ins_pipe(ialu_reg);
7799 %}
7800 
7801 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7802   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7803   match(Set dst (EncodeP src));
7804   ins_cost(INSN_COST * 3);
7805   format %{ "encode_heap_oop_not_null $dst, $src" %}
7806   ins_encode %{
7807     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7808   %}
7809   ins_pipe(ialu_reg);
7810 %}
7811 
7812 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7813   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
7814             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
7815   match(Set dst (DecodeN src));
7816   ins_cost(INSN_COST * 3);
7817   format %{ "decode_heap_oop $dst, $src" %}
7818   ins_encode %{
7819     Register s = $src$$Register;
7820     Register d = $dst$$Register;
7821     __ decode_heap_oop(d, s);
7822   %}
7823   ins_pipe(ialu_reg);
7824 %}
7825 
7826 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7827   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
7828             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
7829   match(Set dst (DecodeN src));
7830   ins_cost(INSN_COST * 3);
7831   format %{ "decode_heap_oop_not_null $dst, $src" %}
7832   ins_encode %{
7833     Register s = $src$$Register;
7834     Register d = $dst$$Register;
7835     __ decode_heap_oop_not_null(d, s);
7836   %}
7837   ins_pipe(ialu_reg);
7838 %}
7839 
7840 // n.b. AArch64 implementations of encode_klass_not_null and
7841 // decode_klass_not_null do not modify the flags register so, unlike
7842 // Intel, we don't kill CR as a side effect here
7843 
7844 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
7845   match(Set dst (EncodePKlass src));
7846 
7847   ins_cost(INSN_COST * 3);
7848   format %{ "encode_klass_not_null $dst,$src" %}
7849 
7850   ins_encode %{
7851     Register src_reg = as_Register($src$$reg);
7852     Register dst_reg = as_Register($dst$$reg);
7853     __ encode_klass_not_null(dst_reg, src_reg);
7854   %}
7855 
7856    ins_pipe(ialu_reg);
7857 %}
7858 
7859 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
7860   match(Set dst (DecodeNKlass src));
7861 
7862   ins_cost(INSN_COST * 3);
7863   format %{ "decode_klass_not_null $dst,$src" %}
7864 
7865   ins_encode %{
7866     Register src_reg = as_Register($src$$reg);
7867     Register dst_reg = as_Register($dst$$reg);
7868     if (dst_reg != src_reg) {
7869       __ decode_klass_not_null(dst_reg, src_reg);
7870     } else {
7871       __ decode_klass_not_null(dst_reg);
7872     }
7873   %}
7874 
7875    ins_pipe(ialu_reg);
7876 %}
7877 
7878 instruct checkCastPP(iRegPNoSp dst)
7879 %{
7880   match(Set dst (CheckCastPP dst));
7881 
7882   size(0);
7883   format %{ "# checkcastPP of $dst" %}
7884   ins_encode(/* empty encoding */);
7885   ins_pipe(pipe_class_empty);
7886 %}
7887 
7888 instruct castPP(iRegPNoSp dst)
7889 %{
7890   match(Set dst (CastPP dst));
7891 
7892   size(0);
7893   format %{ "# castPP of $dst" %}
7894   ins_encode(/* empty encoding */);
7895   ins_pipe(pipe_class_empty);
7896 %}
7897 
7898 instruct castII(iRegI dst)
7899 %{
7900   match(Set dst (CastII dst));
7901 
7902   size(0);
7903   format %{ "# castII of $dst" %}
7904   ins_encode(/* empty encoding */);
7905   ins_cost(0);
7906   ins_pipe(pipe_class_empty);
7907 %}
7908 
7909 // ============================================================================
7910 // Atomic operation instructions
7911 //
7912 // Intel and SPARC both implement Ideal Node LoadPLocked and
7913 // Store{PIL}Conditional instructions using a normal load for the
7914 // LoadPLocked and a CAS for the Store{PIL}Conditional.
7915 //
7916 // The ideal code appears only to use LoadPLocked/StorePLocked as a
7917 // pair to lock object allocations from Eden space when not using
7918 // TLABs.
7919 //
7920 // There does not appear to be a Load{IL}Locked Ideal Node and the
7921 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
7922 // and to use StoreIConditional only for 32-bit and StoreLConditional
7923 // only for 64-bit.
7924 //
7925 // We implement LoadPLocked and StorePLocked instructions using,
7926 // respectively the AArch64 hw load-exclusive and store-conditional
7927 // instructions. Whereas we must implement each of
7928 // Store{IL}Conditional using a CAS which employs a pair of
7929 // instructions comprising a load-exclusive followed by a
7930 // store-conditional.
7931 
7932 
7933 // Locked-load (linked load) of the current heap-top
7934 // used when updating the eden heap top
7935 // implemented using ldaxr on AArch64
7936 
7937 instruct loadPLocked(iRegPNoSp dst, indirect mem)
7938 %{
7939   match(Set dst (LoadPLocked mem));
7940 
7941   ins_cost(VOLATILE_REF_COST);
7942 
7943   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
7944 
7945   ins_encode(aarch64_enc_ldaxr(dst, mem));
7946 
7947   ins_pipe(pipe_serial);
7948 %}
7949 
7950 // Conditional-store of the updated heap-top.
7951 // Used during allocation of the shared heap.
7952 // Sets flag (EQ) on success.
7953 // implemented using stlxr on AArch64.
7954 
7955 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) 
7956 %{
7957   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7958 
7959   ins_cost(VOLATILE_REF_COST);
7960 
7961  // TODO
7962  // do we need to do a store-conditional release or can we just use a
7963  // plain store-conditional?
7964 
7965   format %{
7966     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
7967     "cmpw rscratch1, zr\t# EQ on successful write"
7968   %}
7969 
7970   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
7971 
7972   ins_pipe(pipe_serial);
7973 %}
7974 
7975 
7976 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
7977 // when attempting to rebias a lock towards the current thread.  We
7978 // must use the acquire form of cmpxchg in order to guarantee acquire
7979 // semantics in this case.
7980 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) 
7981 %{
7982   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7983 
7984   ins_cost(VOLATILE_REF_COST);
7985 
7986   format %{
7987     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
7988     "cmpw rscratch1, zr\t# EQ on successful write"
7989   %}
7990 
7991   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
7992 
7993   ins_pipe(pipe_slow);
7994 %}
7995 
7996 // storeIConditional also has acquire semantics, for no better reason
7997 // than matching storeLConditional.  At the time of writing this
7998 // comment storeIConditional was not used anywhere by AArch64.
7999 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) 
8000 %{
8001   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8002 
8003   ins_cost(VOLATILE_REF_COST);
8004 
8005   format %{
8006     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8007     "cmpw rscratch1, zr\t# EQ on successful write"
8008   %}
8009 
8010   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8011 
8012   ins_pipe(pipe_slow);
8013 %}
8014 
8015 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8016 // can't match them
8017 
8018 // standard CompareAndSwapX when we are using barriers
8019 // these have higher priority than the rules selected by a predicate
8020 
8021 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8022 
8023   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8024   ins_cost(2 * VOLATILE_REF_COST);
8025 
8026   effect(KILL cr);
8027 
8028  format %{
8029     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8030     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8031  %}
8032 
8033  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8034             aarch64_enc_cset_eq(res));
8035 
8036   ins_pipe(pipe_slow);
8037 %}
8038 
8039 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8040 
8041   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8042   ins_cost(2 * VOLATILE_REF_COST);
8043 
8044   effect(KILL cr);
8045 
8046  format %{
8047     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8048     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8049  %}
8050 
8051  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8052             aarch64_enc_cset_eq(res));
8053 
8054   ins_pipe(pipe_slow);
8055 %}
8056 
8057 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8058 
8059   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8060   ins_cost(2 * VOLATILE_REF_COST);
8061 
8062   effect(KILL cr);
8063 
8064  format %{
8065     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8066     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8067  %}
8068 
8069  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8070             aarch64_enc_cset_eq(res));
8071 
8072   ins_pipe(pipe_slow);
8073 %}
8074 
8075 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8076 
8077   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8078   ins_cost(2 * VOLATILE_REF_COST);
8079 
8080   effect(KILL cr);
8081 
8082  format %{
8083     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8084     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8085  %}
8086 
8087  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8088             aarch64_enc_cset_eq(res));
8089 
8090   ins_pipe(pipe_slow);
8091 %}
8092 
8093 
8094 // alternative CompareAndSwapX when we are eliding barriers
8095 
8096 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8097 
8098   predicate(needs_acquiring_load_exclusive(n));
8099   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8100   ins_cost(VOLATILE_REF_COST);
8101 
8102   effect(KILL cr);
8103 
8104  format %{
8105     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8106     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8107  %}
8108 
8109  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8110             aarch64_enc_cset_eq(res));
8111 
8112   ins_pipe(pipe_slow);
8113 %}
8114 
8115 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8116 
8117   predicate(needs_acquiring_load_exclusive(n));
8118   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8119   ins_cost(VOLATILE_REF_COST);
8120 
8121   effect(KILL cr);
8122 
8123  format %{
8124     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8125     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8126  %}
8127 
8128  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8129             aarch64_enc_cset_eq(res));
8130 
8131   ins_pipe(pipe_slow);
8132 %}
8133 
8134 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8135 
8136   predicate(needs_acquiring_load_exclusive(n));
8137   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8138   ins_cost(VOLATILE_REF_COST);
8139 
8140   effect(KILL cr);
8141 
8142  format %{
8143     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8144     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8145  %}
8146 
8147  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8148             aarch64_enc_cset_eq(res));
8149 
8150   ins_pipe(pipe_slow);
8151 %}
8152 
8153 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8154 
8155   predicate(needs_acquiring_load_exclusive(n));
8156   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8157   ins_cost(VOLATILE_REF_COST);
8158 
8159   effect(KILL cr);
8160 
8161  format %{
8162     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8163     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8164  %}
8165 
8166  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8167             aarch64_enc_cset_eq(res));
8168 
8169   ins_pipe(pipe_slow);
8170 %}
8171 
8172 
8173 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
8174   match(Set prev (GetAndSetI mem newv));
8175   ins_cost(2 * VOLATILE_REF_COST);
8176   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8177   ins_encode %{
8178     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8179   %}
8180   ins_pipe(pipe_serial);
8181 %}
8182 
8183 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
8184   match(Set prev (GetAndSetL mem newv));
8185   ins_cost(2 * VOLATILE_REF_COST);
8186   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8187   ins_encode %{
8188     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8189   %}
8190   ins_pipe(pipe_serial);
8191 %}
8192 
8193 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
8194   match(Set prev (GetAndSetN mem newv));
8195   ins_cost(2 * VOLATILE_REF_COST);
8196   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8197   ins_encode %{
8198     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8199   %}
8200   ins_pipe(pipe_serial);
8201 %}
8202 
8203 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
8204   match(Set prev (GetAndSetP mem newv));
8205   ins_cost(2 * VOLATILE_REF_COST);
8206   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8207   ins_encode %{
8208     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8209   %}
8210   ins_pipe(pipe_serial);
8211 %}
8212 
8213 instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) %{
8214   predicate(needs_acquiring_load_exclusive(n));
8215   match(Set prev (GetAndSetI mem newv));
8216   ins_cost(VOLATILE_REF_COST);
8217   format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]" %}
8218   ins_encode %{
8219     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8220   %}
8221   ins_pipe(pipe_serial);
8222 %}
8223 
8224 instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{
8225   predicate(needs_acquiring_load_exclusive(n));
8226   match(Set prev (GetAndSetL mem newv));
8227   ins_cost(VOLATILE_REF_COST);
8228   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8229   ins_encode %{
8230     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8231   %}
8232   ins_pipe(pipe_serial);
8233 %}
8234 
8235 instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
8236   predicate(needs_acquiring_load_exclusive(n));
8237   match(Set prev (GetAndSetN mem newv));
8238   ins_cost(VOLATILE_REF_COST);
8239   format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %}
8240   ins_encode %{
8241     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8242   %}
8243   ins_pipe(pipe_serial);
8244 %}
8245 
8246 instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{
8247   predicate(needs_acquiring_load_exclusive(n));
8248   match(Set prev (GetAndSetP mem newv));
8249   ins_cost(VOLATILE_REF_COST);
8250   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8251   ins_encode %{
8252     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8253   %}
8254   ins_pipe(pipe_serial);
8255 %}
8256 
8257 
8258 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8259   match(Set newval (GetAndAddL mem incr));
8260   ins_cost(2 * VOLATILE_REF_COST + 1);
8261   format %{ "get_and_addL $newval, [$mem], $incr" %}
8262   ins_encode %{
8263     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
8264   %}
8265   ins_pipe(pipe_serial);
8266 %}
8267 
8268 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
8269   predicate(n->as_LoadStore()->result_not_used());
8270   match(Set dummy (GetAndAddL mem incr));
8271   ins_cost(2 * VOLATILE_REF_COST);
8272   format %{ "get_and_addL [$mem], $incr" %}
8273   ins_encode %{
8274     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
8275   %}
8276   ins_pipe(pipe_serial);
8277 %}
8278 
8279 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8280   match(Set newval (GetAndAddL mem incr));
8281   ins_cost(2 * VOLATILE_REF_COST + 1);
8282   format %{ "get_and_addL $newval, [$mem], $incr" %}
8283   ins_encode %{
8284     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
8285   %}
8286   ins_pipe(pipe_serial);
8287 %}
8288 
8289 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
8290   predicate(n->as_LoadStore()->result_not_used());
8291   match(Set dummy (GetAndAddL mem incr));
8292   ins_cost(2 * VOLATILE_REF_COST);
8293   format %{ "get_and_addL [$mem], $incr" %}
8294   ins_encode %{
8295     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
8296   %}
8297   ins_pipe(pipe_serial);
8298 %}
8299 
8300 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8301   match(Set newval (GetAndAddI mem incr));
8302   ins_cost(2 * VOLATILE_REF_COST + 1);
8303   format %{ "get_and_addI $newval, [$mem], $incr" %}
8304   ins_encode %{
8305     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8306   %}
8307   ins_pipe(pipe_serial);
8308 %}
8309 
8310 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
8311   predicate(n->as_LoadStore()->result_not_used());
8312   match(Set dummy (GetAndAddI mem incr));
8313   ins_cost(2 * VOLATILE_REF_COST);
8314   format %{ "get_and_addI [$mem], $incr" %}
8315   ins_encode %{
8316     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
8317   %}
8318   ins_pipe(pipe_serial);
8319 %}
8320 
8321 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8322   match(Set newval (GetAndAddI mem incr));
8323   ins_cost(2 * VOLATILE_REF_COST + 1);
8324   format %{ "get_and_addI $newval, [$mem], $incr" %}
8325   ins_encode %{
8326     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8327   %}
8328   ins_pipe(pipe_serial);
8329 %}
8330 
8331 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
8332   predicate(n->as_LoadStore()->result_not_used());
8333   match(Set dummy (GetAndAddI mem incr));
8334   ins_cost(2 * VOLATILE_REF_COST);
8335   format %{ "get_and_addI [$mem], $incr" %}
8336   ins_encode %{
8337     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
8338   %}
8339   ins_pipe(pipe_serial);
8340 %}
8341 
8342 instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) %{
8343   predicate(needs_acquiring_load_exclusive(n));
8344   match(Set newval (GetAndAddL mem incr));
8345   ins_cost(VOLATILE_REF_COST + 1);
8346   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
8347   ins_encode %{
8348     __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
8349   %}
8350   ins_pipe(pipe_serial);
8351 %}
8352 
8353 instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
8354   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8355   match(Set dummy (GetAndAddL mem incr));
8356   ins_cost(VOLATILE_REF_COST);
8357   format %{ "get_and_addL_acq [$mem], $incr" %}
8358   ins_encode %{
8359     __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
8360   %}
8361   ins_pipe(pipe_serial);
8362 %}
8363 
8364 instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8365   predicate(needs_acquiring_load_exclusive(n));
8366   match(Set newval (GetAndAddL mem incr));
8367   ins_cost(VOLATILE_REF_COST + 1);
8368   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
8369   ins_encode %{
8370     __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
8371   %}
8372   ins_pipe(pipe_serial);
8373 %}
8374 
8375 instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAddSub incr) %{
8376   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8377   match(Set dummy (GetAndAddL mem incr));
8378   ins_cost(VOLATILE_REF_COST);
8379   format %{ "get_and_addL_acq [$mem], $incr" %}
8380   ins_encode %{
8381     __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
8382   %}
8383   ins_pipe(pipe_serial);
8384 %}
8385 
8386 instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8387   predicate(needs_acquiring_load_exclusive(n));
8388   match(Set newval (GetAndAddI mem incr));
8389   ins_cost(VOLATILE_REF_COST + 1);
8390   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
8391   ins_encode %{
8392     __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8393   %}
8394   ins_pipe(pipe_serial);
8395 %}
8396 
8397 instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) %{
8398   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8399   match(Set dummy (GetAndAddI mem incr));
8400   ins_cost(VOLATILE_REF_COST);
8401   format %{ "get_and_addI_acq [$mem], $incr" %}
8402   ins_encode %{
8403     __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
8404   %}
8405   ins_pipe(pipe_serial);
8406 %}
8407 
8408 instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8409   predicate(needs_acquiring_load_exclusive(n));
8410   match(Set newval (GetAndAddI mem incr));
8411   ins_cost(VOLATILE_REF_COST + 1);
8412   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
8413   ins_encode %{
8414     __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8415   %}
8416   ins_pipe(pipe_serial);
8417 %}
8418 
8419 instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAddSub incr) %{
8420   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8421   match(Set dummy (GetAndAddI mem incr));
8422   ins_cost(VOLATILE_REF_COST);
8423   format %{ "get_and_addI_acq [$mem], $incr" %}
8424   ins_encode %{
8425     __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
8426   %}
8427   ins_pipe(pipe_serial);
8428 %}
8429 
8430 // ============================================================================
8431 // Conditional Move Instructions
8432 
8433 // n.b. we have identical rules for both a signed compare op (cmpOp)
8434 // and an unsigned compare op (cmpOpU). it would be nice if we could
8435 // define an op class which merged both inputs and use it to type the
8436 // argument to a single rule. unfortunatelyt his fails because the
8437 // opclass does not live up to the COND_INTER interface of its
8438 // component operands. When the generic code tries to negate the
8439 // operand it ends up running the generci Machoper::negate method
8440 // which throws a ShouldNotHappen. So, we have to provide two flavours
8441 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
8442 
8443 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8444   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8445 
8446   ins_cost(INSN_COST * 2);
8447   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
8448 
8449   ins_encode %{
8450     __ cselw(as_Register($dst$$reg),
8451              as_Register($src2$$reg),
8452              as_Register($src1$$reg),
8453              (Assembler::Condition)$cmp$$cmpcode);
8454   %}
8455 
8456   ins_pipe(icond_reg_reg);
8457 %}
8458 
8459 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8460   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8461 
8462   ins_cost(INSN_COST * 2);
8463   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
8464 
8465   ins_encode %{
8466     __ cselw(as_Register($dst$$reg),
8467              as_Register($src2$$reg),
8468              as_Register($src1$$reg),
8469              (Assembler::Condition)$cmp$$cmpcode);
8470   %}
8471 
8472   ins_pipe(icond_reg_reg);
8473 %}
8474 
8475 // special cases where one arg is zero
8476 
8477 // n.b. this is selected in preference to the rule above because it
8478 // avoids loading constant 0 into a source register
8479 
8480 // TODO
8481 // we ought only to be able to cull one of these variants as the ideal
8482 // transforms ought always to order the zero consistently (to left/right?)
8483 
8484 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8485   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8486 
8487   ins_cost(INSN_COST * 2);
8488   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
8489 
8490   ins_encode %{
8491     __ cselw(as_Register($dst$$reg),
8492              as_Register($src$$reg),
8493              zr,
8494              (Assembler::Condition)$cmp$$cmpcode);
8495   %}
8496 
8497   ins_pipe(icond_reg);
8498 %}
8499 
8500 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8501   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8502 
8503   ins_cost(INSN_COST * 2);
8504   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
8505 
8506   ins_encode %{
8507     __ cselw(as_Register($dst$$reg),
8508              as_Register($src$$reg),
8509              zr,
8510              (Assembler::Condition)$cmp$$cmpcode);
8511   %}
8512 
8513   ins_pipe(icond_reg);
8514 %}
8515 
8516 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8517   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8518 
8519   ins_cost(INSN_COST * 2);
8520   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
8521 
8522   ins_encode %{
8523     __ cselw(as_Register($dst$$reg),
8524              zr,
8525              as_Register($src$$reg),
8526              (Assembler::Condition)$cmp$$cmpcode);
8527   %}
8528 
8529   ins_pipe(icond_reg);
8530 %}
8531 
8532 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8533   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8534 
8535   ins_cost(INSN_COST * 2);
8536   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
8537 
8538   ins_encode %{
8539     __ cselw(as_Register($dst$$reg),
8540              zr,
8541              as_Register($src$$reg),
8542              (Assembler::Condition)$cmp$$cmpcode);
8543   %}
8544 
8545   ins_pipe(icond_reg);
8546 %}
8547 
8548 // special case for creating a boolean 0 or 1
8549 
8550 // n.b. this is selected in preference to the rule above because it
8551 // avoids loading constants 0 and 1 into a source register
8552 
8553 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8554   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8555 
8556   ins_cost(INSN_COST * 2);
8557   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
8558 
8559   ins_encode %{
8560     // equivalently
8561     // cset(as_Register($dst$$reg),
8562     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8563     __ csincw(as_Register($dst$$reg),
8564              zr,
8565              zr,
8566              (Assembler::Condition)$cmp$$cmpcode);
8567   %}
8568 
8569   ins_pipe(icond_none);
8570 %}
8571 
8572 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8573   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8574 
8575   ins_cost(INSN_COST * 2);
8576   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
8577 
8578   ins_encode %{
8579     // equivalently
8580     // cset(as_Register($dst$$reg),
8581     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8582     __ csincw(as_Register($dst$$reg),
8583              zr,
8584              zr,
8585              (Assembler::Condition)$cmp$$cmpcode);
8586   %}
8587 
8588   ins_pipe(icond_none);
8589 %}
8590 
8591 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8592   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8593 
8594   ins_cost(INSN_COST * 2);
8595   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
8596 
8597   ins_encode %{
8598     __ csel(as_Register($dst$$reg),
8599             as_Register($src2$$reg),
8600             as_Register($src1$$reg),
8601             (Assembler::Condition)$cmp$$cmpcode);
8602   %}
8603 
8604   ins_pipe(icond_reg_reg);
8605 %}
8606 
8607 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8608   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8609 
8610   ins_cost(INSN_COST * 2);
8611   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
8612 
8613   ins_encode %{
8614     __ csel(as_Register($dst$$reg),
8615             as_Register($src2$$reg),
8616             as_Register($src1$$reg),
8617             (Assembler::Condition)$cmp$$cmpcode);
8618   %}
8619 
8620   ins_pipe(icond_reg_reg);
8621 %}
8622 
8623 // special cases where one arg is zero
8624 
8625 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8626   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8627 
8628   ins_cost(INSN_COST * 2);
8629   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
8630 
8631   ins_encode %{
8632     __ csel(as_Register($dst$$reg),
8633             zr,
8634             as_Register($src$$reg),
8635             (Assembler::Condition)$cmp$$cmpcode);
8636   %}
8637 
8638   ins_pipe(icond_reg);
8639 %}
8640 
8641 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8642   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8643 
8644   ins_cost(INSN_COST * 2);
8645   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
8646 
8647   ins_encode %{
8648     __ csel(as_Register($dst$$reg),
8649             zr,
8650             as_Register($src$$reg),
8651             (Assembler::Condition)$cmp$$cmpcode);
8652   %}
8653 
8654   ins_pipe(icond_reg);
8655 %}
8656 
8657 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8658   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8659 
8660   ins_cost(INSN_COST * 2);
8661   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
8662 
8663   ins_encode %{
8664     __ csel(as_Register($dst$$reg),
8665             as_Register($src$$reg),
8666             zr,
8667             (Assembler::Condition)$cmp$$cmpcode);
8668   %}
8669 
8670   ins_pipe(icond_reg);
8671 %}
8672 
8673 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8674   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8675 
8676   ins_cost(INSN_COST * 2);
8677   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
8678 
8679   ins_encode %{
8680     __ csel(as_Register($dst$$reg),
8681             as_Register($src$$reg),
8682             zr,
8683             (Assembler::Condition)$cmp$$cmpcode);
8684   %}
8685 
8686   ins_pipe(icond_reg);
8687 %}
8688 
8689 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8690   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8691 
8692   ins_cost(INSN_COST * 2);
8693   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
8694 
8695   ins_encode %{
8696     __ csel(as_Register($dst$$reg),
8697             as_Register($src2$$reg),
8698             as_Register($src1$$reg),
8699             (Assembler::Condition)$cmp$$cmpcode);
8700   %}
8701 
8702   ins_pipe(icond_reg_reg);
8703 %}
8704 
8705 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8706   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8707 
8708   ins_cost(INSN_COST * 2);
8709   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
8710 
8711   ins_encode %{
8712     __ csel(as_Register($dst$$reg),
8713             as_Register($src2$$reg),
8714             as_Register($src1$$reg),
8715             (Assembler::Condition)$cmp$$cmpcode);
8716   %}
8717 
8718   ins_pipe(icond_reg_reg);
8719 %}
8720 
8721 // special cases where one arg is zero
8722 
8723 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8724   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8725 
8726   ins_cost(INSN_COST * 2);
8727   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
8728 
8729   ins_encode %{
8730     __ csel(as_Register($dst$$reg),
8731             zr,
8732             as_Register($src$$reg),
8733             (Assembler::Condition)$cmp$$cmpcode);
8734   %}
8735 
8736   ins_pipe(icond_reg);
8737 %}
8738 
8739 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8740   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8741 
8742   ins_cost(INSN_COST * 2);
8743   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
8744 
8745   ins_encode %{
8746     __ csel(as_Register($dst$$reg),
8747             zr,
8748             as_Register($src$$reg),
8749             (Assembler::Condition)$cmp$$cmpcode);
8750   %}
8751 
8752   ins_pipe(icond_reg);
8753 %}
8754 
8755 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8756   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8757 
8758   ins_cost(INSN_COST * 2);
8759   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
8760 
8761   ins_encode %{
8762     __ csel(as_Register($dst$$reg),
8763             as_Register($src$$reg),
8764             zr,
8765             (Assembler::Condition)$cmp$$cmpcode);
8766   %}
8767 
8768   ins_pipe(icond_reg);
8769 %}
8770 
8771 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8772   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8773 
8774   ins_cost(INSN_COST * 2);
8775   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
8776 
8777   ins_encode %{
8778     __ csel(as_Register($dst$$reg),
8779             as_Register($src$$reg),
8780             zr,
8781             (Assembler::Condition)$cmp$$cmpcode);
8782   %}
8783 
8784   ins_pipe(icond_reg);
8785 %}
8786 
8787 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8788   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8789 
8790   ins_cost(INSN_COST * 2);
8791   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8792 
8793   ins_encode %{
8794     __ cselw(as_Register($dst$$reg),
8795              as_Register($src2$$reg),
8796              as_Register($src1$$reg),
8797              (Assembler::Condition)$cmp$$cmpcode);
8798   %}
8799 
8800   ins_pipe(icond_reg_reg);
8801 %}
8802 
8803 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8804   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8805 
8806   ins_cost(INSN_COST * 2);
8807   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8808 
8809   ins_encode %{
8810     __ cselw(as_Register($dst$$reg),
8811              as_Register($src2$$reg),
8812              as_Register($src1$$reg),
8813              (Assembler::Condition)$cmp$$cmpcode);
8814   %}
8815 
8816   ins_pipe(icond_reg_reg);
8817 %}
8818 
8819 // special cases where one arg is zero
8820 
8821 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8822   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8823 
8824   ins_cost(INSN_COST * 2);
8825   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
8826 
8827   ins_encode %{
8828     __ cselw(as_Register($dst$$reg),
8829              zr,
8830              as_Register($src$$reg),
8831              (Assembler::Condition)$cmp$$cmpcode);
8832   %}
8833 
8834   ins_pipe(icond_reg);
8835 %}
8836 
8837 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8838   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8839 
8840   ins_cost(INSN_COST * 2);
8841   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
8842 
8843   ins_encode %{
8844     __ cselw(as_Register($dst$$reg),
8845              zr,
8846              as_Register($src$$reg),
8847              (Assembler::Condition)$cmp$$cmpcode);
8848   %}
8849 
8850   ins_pipe(icond_reg);
8851 %}
8852 
8853 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
8854   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
8855 
8856   ins_cost(INSN_COST * 2);
8857   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
8858 
8859   ins_encode %{
8860     __ cselw(as_Register($dst$$reg),
8861              as_Register($src$$reg),
8862              zr,
8863              (Assembler::Condition)$cmp$$cmpcode);
8864   %}
8865 
8866   ins_pipe(icond_reg);
8867 %}
8868 
8869 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
8870   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
8871 
8872   ins_cost(INSN_COST * 2);
8873   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
8874 
8875   ins_encode %{
8876     __ cselw(as_Register($dst$$reg),
8877              as_Register($src$$reg),
8878              zr,
8879              (Assembler::Condition)$cmp$$cmpcode);
8880   %}
8881 
8882   ins_pipe(icond_reg);
8883 %}
8884 
8885 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
8886 %{
8887   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
8888 
8889   ins_cost(INSN_COST * 3);
8890 
8891   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
8892   ins_encode %{
8893     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8894     __ fcsels(as_FloatRegister($dst$$reg),
8895               as_FloatRegister($src2$$reg),
8896               as_FloatRegister($src1$$reg),
8897               cond);
8898   %}
8899 
8900   ins_pipe(fp_cond_reg_reg_s);
8901 %}
8902 
8903 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
8904 %{
8905   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
8906 
8907   ins_cost(INSN_COST * 3);
8908 
8909   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
8910   ins_encode %{
8911     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8912     __ fcsels(as_FloatRegister($dst$$reg),
8913               as_FloatRegister($src2$$reg),
8914               as_FloatRegister($src1$$reg),
8915               cond);
8916   %}
8917 
8918   ins_pipe(fp_cond_reg_reg_s);
8919 %}
8920 
8921 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
8922 %{
8923   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
8924 
8925   ins_cost(INSN_COST * 3);
8926 
8927   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
8928   ins_encode %{
8929     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8930     __ fcseld(as_FloatRegister($dst$$reg),
8931               as_FloatRegister($src2$$reg),
8932               as_FloatRegister($src1$$reg),
8933               cond);
8934   %}
8935 
8936   ins_pipe(fp_cond_reg_reg_d);
8937 %}
8938 
8939 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
8940 %{
8941   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
8942 
8943   ins_cost(INSN_COST * 3);
8944 
8945   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
8946   ins_encode %{
8947     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8948     __ fcseld(as_FloatRegister($dst$$reg),
8949               as_FloatRegister($src2$$reg),
8950               as_FloatRegister($src1$$reg),
8951               cond);
8952   %}
8953 
8954   ins_pipe(fp_cond_reg_reg_d);
8955 %}
8956 
8957 // ============================================================================
8958 // Arithmetic Instructions
8959 //
8960 
8961 // Integer Addition
8962 
8963 // TODO
8964 // these currently employ operations which do not set CR and hence are
8965 // not flagged as killing CR but we would like to isolate the cases
8966 // where we want to set flags from those where we don't. need to work
8967 // out how to do that.
8968 
8969 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8970   match(Set dst (AddI src1 src2));
8971 
8972   ins_cost(INSN_COST);
8973   format %{ "addw  $dst, $src1, $src2" %}
8974 
8975   ins_encode %{
8976     __ addw(as_Register($dst$$reg),
8977             as_Register($src1$$reg),
8978             as_Register($src2$$reg));
8979   %}
8980 
8981   ins_pipe(ialu_reg_reg);
8982 %}
8983 
8984 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
8985   match(Set dst (AddI src1 src2));
8986 
8987   ins_cost(INSN_COST);
8988   format %{ "addw $dst, $src1, $src2" %}
8989 
8990   // use opcode to indicate that this is an add not a sub
8991   opcode(0x0);
8992 
8993   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
8994 
8995   ins_pipe(ialu_reg_imm);
8996 %}
8997 
8998 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
8999   match(Set dst (AddI (ConvL2I src1) src2));
9000 
9001   ins_cost(INSN_COST);
9002   format %{ "addw $dst, $src1, $src2" %}
9003 
9004   // use opcode to indicate that this is an add not a sub
9005   opcode(0x0);
9006 
9007   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9008 
9009   ins_pipe(ialu_reg_imm);
9010 %}
9011 
9012 // Pointer Addition
9013 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9014   match(Set dst (AddP src1 src2));
9015 
9016   ins_cost(INSN_COST);
9017   format %{ "add $dst, $src1, $src2\t# ptr" %}
9018 
9019   ins_encode %{
9020     __ add(as_Register($dst$$reg),
9021            as_Register($src1$$reg),
9022            as_Register($src2$$reg));
9023   %}
9024 
9025   ins_pipe(ialu_reg_reg);
9026 %}
9027 
9028 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9029   match(Set dst (AddP src1 (ConvI2L src2)));
9030 
9031   ins_cost(1.9 * INSN_COST);
9032   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9033 
9034   ins_encode %{
9035     __ add(as_Register($dst$$reg),
9036            as_Register($src1$$reg),
9037            as_Register($src2$$reg), ext::sxtw);
9038   %}
9039 
9040   ins_pipe(ialu_reg_reg);
9041 %}
9042 
9043 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9044   match(Set dst (AddP src1 (LShiftL src2 scale)));
9045 
9046   ins_cost(1.9 * INSN_COST);
9047   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9048 
9049   ins_encode %{
9050     __ lea(as_Register($dst$$reg),
9051            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9052                    Address::lsl($scale$$constant)));
9053   %}
9054 
9055   ins_pipe(ialu_reg_reg_shift);
9056 %}
9057 
9058 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9059   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9060 
9061   ins_cost(1.9 * INSN_COST);
9062   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9063 
9064   ins_encode %{
9065     __ lea(as_Register($dst$$reg),
9066            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9067                    Address::sxtw($scale$$constant)));
9068   %}
9069 
9070   ins_pipe(ialu_reg_reg_shift);
9071 %}
9072 
9073 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9074   match(Set dst (LShiftL (ConvI2L src) scale));
9075 
9076   ins_cost(INSN_COST);
9077   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9078 
9079   ins_encode %{
9080     __ sbfiz(as_Register($dst$$reg),
9081           as_Register($src$$reg),
9082           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9083   %}
9084 
9085   ins_pipe(ialu_reg_shift);
9086 %}
9087 
9088 // Pointer Immediate Addition
9089 // n.b. this needs to be more expensive than using an indirect memory
9090 // operand
9091 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9092   match(Set dst (AddP src1 src2));
9093 
9094   ins_cost(INSN_COST);
9095   format %{ "add $dst, $src1, $src2\t# ptr" %}
9096 
9097   // use opcode to indicate that this is an add not a sub
9098   opcode(0x0);
9099 
9100   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9101 
9102   ins_pipe(ialu_reg_imm);
9103 %}
9104 
9105 // Long Addition
9106 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9107 
9108   match(Set dst (AddL src1 src2));
9109 
9110   ins_cost(INSN_COST);
9111   format %{ "add  $dst, $src1, $src2" %}
9112 
9113   ins_encode %{
9114     __ add(as_Register($dst$$reg),
9115            as_Register($src1$$reg),
9116            as_Register($src2$$reg));
9117   %}
9118 
9119   ins_pipe(ialu_reg_reg);
9120 %}
9121 
9122 // No constant pool entries requiredLong Immediate Addition.
9123 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9124   match(Set dst (AddL src1 src2));
9125 
9126   ins_cost(INSN_COST);
9127   format %{ "add $dst, $src1, $src2" %}
9128 
9129   // use opcode to indicate that this is an add not a sub
9130   opcode(0x0);
9131 
9132   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9133 
9134   ins_pipe(ialu_reg_imm);
9135 %}
9136 
9137 // Integer Subtraction
9138 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9139   match(Set dst (SubI src1 src2));
9140 
9141   ins_cost(INSN_COST);
9142   format %{ "subw  $dst, $src1, $src2" %}
9143 
9144   ins_encode %{
9145     __ subw(as_Register($dst$$reg),
9146             as_Register($src1$$reg),
9147             as_Register($src2$$reg));
9148   %}
9149 
9150   ins_pipe(ialu_reg_reg);
9151 %}
9152 
9153 // Immediate Subtraction
9154 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9155   match(Set dst (SubI src1 src2));
9156 
9157   ins_cost(INSN_COST);
9158   format %{ "subw $dst, $src1, $src2" %}
9159 
9160   // use opcode to indicate that this is a sub not an add
9161   opcode(0x1);
9162 
9163   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9164 
9165   ins_pipe(ialu_reg_imm);
9166 %}
9167 
9168 // Long Subtraction
9169 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9170 
9171   match(Set dst (SubL src1 src2));
9172 
9173   ins_cost(INSN_COST);
9174   format %{ "sub  $dst, $src1, $src2" %}
9175 
9176   ins_encode %{
9177     __ sub(as_Register($dst$$reg),
9178            as_Register($src1$$reg),
9179            as_Register($src2$$reg));
9180   %}
9181 
9182   ins_pipe(ialu_reg_reg);
9183 %}
9184 
9185 // No constant pool entries requiredLong Immediate Subtraction.
9186 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9187   match(Set dst (SubL src1 src2));
9188 
9189   ins_cost(INSN_COST);
9190   format %{ "sub$dst, $src1, $src2" %}
9191 
9192   // use opcode to indicate that this is a sub not an add
9193   opcode(0x1);
9194 
9195   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9196 
9197   ins_pipe(ialu_reg_imm);
9198 %}
9199 
9200 // Integer Negation (special case for sub)
9201 
9202 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9203   match(Set dst (SubI zero src));
9204 
9205   ins_cost(INSN_COST);
9206   format %{ "negw $dst, $src\t# int" %}
9207 
9208   ins_encode %{
9209     __ negw(as_Register($dst$$reg),
9210             as_Register($src$$reg));
9211   %}
9212 
9213   ins_pipe(ialu_reg);
9214 %}
9215 
9216 // Long Negation
9217 
9218 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
9219   match(Set dst (SubL zero src));
9220 
9221   ins_cost(INSN_COST);
9222   format %{ "neg $dst, $src\t# long" %}
9223 
9224   ins_encode %{
9225     __ neg(as_Register($dst$$reg),
9226            as_Register($src$$reg));
9227   %}
9228 
9229   ins_pipe(ialu_reg);
9230 %}
9231 
9232 // Integer Multiply
9233 
9234 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9235   match(Set dst (MulI src1 src2));
9236 
9237   ins_cost(INSN_COST * 3);
9238   format %{ "mulw  $dst, $src1, $src2" %}
9239 
9240   ins_encode %{
9241     __ mulw(as_Register($dst$$reg),
9242             as_Register($src1$$reg),
9243             as_Register($src2$$reg));
9244   %}
9245 
9246   ins_pipe(imul_reg_reg);
9247 %}
9248 
9249 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9250   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9251 
9252   ins_cost(INSN_COST * 3);
9253   format %{ "smull  $dst, $src1, $src2" %}
9254 
9255   ins_encode %{
9256     __ smull(as_Register($dst$$reg),
9257              as_Register($src1$$reg),
9258              as_Register($src2$$reg));
9259   %}
9260 
9261   ins_pipe(imul_reg_reg);
9262 %}
9263 
9264 // Long Multiply
9265 
9266 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9267   match(Set dst (MulL src1 src2));
9268 
9269   ins_cost(INSN_COST * 5);
9270   format %{ "mul  $dst, $src1, $src2" %}
9271 
9272   ins_encode %{
9273     __ mul(as_Register($dst$$reg),
9274            as_Register($src1$$reg),
9275            as_Register($src2$$reg));
9276   %}
9277 
9278   ins_pipe(lmul_reg_reg);
9279 %}
9280 
9281 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
9282 %{
9283   match(Set dst (MulHiL src1 src2));
9284 
9285   ins_cost(INSN_COST * 7);
9286   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
9287 
9288   ins_encode %{
9289     __ smulh(as_Register($dst$$reg),
9290              as_Register($src1$$reg),
9291              as_Register($src2$$reg));
9292   %}
9293 
9294   ins_pipe(lmul_reg_reg);
9295 %}
9296 
9297 // Combined Integer Multiply & Add/Sub
9298 
9299 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9300   match(Set dst (AddI src3 (MulI src1 src2)));
9301 
9302   ins_cost(INSN_COST * 3);
9303   format %{ "madd  $dst, $src1, $src2, $src3" %}
9304 
9305   ins_encode %{
9306     __ maddw(as_Register($dst$$reg),
9307              as_Register($src1$$reg),
9308              as_Register($src2$$reg),
9309              as_Register($src3$$reg));
9310   %}
9311 
9312   ins_pipe(imac_reg_reg);
9313 %}
9314 
9315 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9316   match(Set dst (SubI src3 (MulI src1 src2)));
9317 
9318   ins_cost(INSN_COST * 3);
9319   format %{ "msub  $dst, $src1, $src2, $src3" %}
9320 
9321   ins_encode %{
9322     __ msubw(as_Register($dst$$reg),
9323              as_Register($src1$$reg),
9324              as_Register($src2$$reg),
9325              as_Register($src3$$reg));
9326   %}
9327 
9328   ins_pipe(imac_reg_reg);
9329 %}
9330 
9331 // Combined Long Multiply & Add/Sub
9332 
9333 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9334   match(Set dst (AddL src3 (MulL src1 src2)));
9335 
9336   ins_cost(INSN_COST * 5);
9337   format %{ "madd  $dst, $src1, $src2, $src3" %}
9338 
9339   ins_encode %{
9340     __ madd(as_Register($dst$$reg),
9341             as_Register($src1$$reg),
9342             as_Register($src2$$reg),
9343             as_Register($src3$$reg));
9344   %}
9345 
9346   ins_pipe(lmac_reg_reg);
9347 %}
9348 
9349 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9350   match(Set dst (SubL src3 (MulL src1 src2)));
9351 
9352   ins_cost(INSN_COST * 5);
9353   format %{ "msub  $dst, $src1, $src2, $src3" %}
9354 
9355   ins_encode %{
9356     __ msub(as_Register($dst$$reg),
9357             as_Register($src1$$reg),
9358             as_Register($src2$$reg),
9359             as_Register($src3$$reg));
9360   %}
9361 
9362   ins_pipe(lmac_reg_reg);
9363 %}
9364 
9365 // Integer Divide
9366 
9367 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9368   match(Set dst (DivI src1 src2));
9369 
9370   ins_cost(INSN_COST * 19);
9371   format %{ "sdivw  $dst, $src1, $src2" %}
9372 
9373   ins_encode(aarch64_enc_divw(dst, src1, src2));
9374   ins_pipe(idiv_reg_reg);
9375 %}
9376 
9377 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
9378   match(Set dst (URShiftI (RShiftI src1 div1) div2));
9379   ins_cost(INSN_COST);
9380   format %{ "lsrw $dst, $src1, $div1" %}
9381   ins_encode %{
9382     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
9383   %}
9384   ins_pipe(ialu_reg_shift);
9385 %}
9386 
9387 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
9388   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
9389   ins_cost(INSN_COST);
9390   format %{ "addw $dst, $src, LSR $div1" %}
9391 
9392   ins_encode %{
9393     __ addw(as_Register($dst$$reg),
9394               as_Register($src$$reg),
9395               as_Register($src$$reg),
9396               Assembler::LSR, 31);
9397   %}
9398   ins_pipe(ialu_reg);
9399 %}
9400 
9401 // Long Divide
9402 
9403 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9404   match(Set dst (DivL src1 src2));
9405 
9406   ins_cost(INSN_COST * 35);
9407   format %{ "sdiv   $dst, $src1, $src2" %}
9408 
9409   ins_encode(aarch64_enc_div(dst, src1, src2));
9410   ins_pipe(ldiv_reg_reg);
9411 %}
9412 
9413 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
9414   match(Set dst (URShiftL (RShiftL src1 div1) div2));
9415   ins_cost(INSN_COST);
9416   format %{ "lsr $dst, $src1, $div1" %}
9417   ins_encode %{
9418     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
9419   %}
9420   ins_pipe(ialu_reg_shift);
9421 %}
9422 
9423 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
9424   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
9425   ins_cost(INSN_COST);
9426   format %{ "add $dst, $src, $div1" %}
9427 
9428   ins_encode %{
9429     __ add(as_Register($dst$$reg),
9430               as_Register($src$$reg),
9431               as_Register($src$$reg),
9432               Assembler::LSR, 63);
9433   %}
9434   ins_pipe(ialu_reg);
9435 %}
9436 
9437 // Integer Remainder
9438 
9439 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9440   match(Set dst (ModI src1 src2));
9441 
9442   ins_cost(INSN_COST * 22);
9443   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
9444             "msubw($dst, rscratch1, $src2, $src1" %}
9445 
9446   ins_encode(aarch64_enc_modw(dst, src1, src2));
9447   ins_pipe(idiv_reg_reg);
9448 %}
9449 
9450 // Long Remainder
9451 
9452 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9453   match(Set dst (ModL src1 src2));
9454 
9455   ins_cost(INSN_COST * 38);
9456   format %{ "sdiv   rscratch1, $src1, $src2\n"
9457             "msub($dst, rscratch1, $src2, $src1" %}
9458 
9459   ins_encode(aarch64_enc_mod(dst, src1, src2));
9460   ins_pipe(ldiv_reg_reg);
9461 %}
9462 
9463 // Integer Shifts
9464 
9465 // Shift Left Register
9466 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9467   match(Set dst (LShiftI src1 src2));
9468 
9469   ins_cost(INSN_COST * 2);
9470   format %{ "lslvw  $dst, $src1, $src2" %}
9471 
9472   ins_encode %{
9473     __ lslvw(as_Register($dst$$reg),
9474              as_Register($src1$$reg),
9475              as_Register($src2$$reg));
9476   %}
9477 
9478   ins_pipe(ialu_reg_reg_vshift);
9479 %}
9480 
9481 // Shift Left Immediate
9482 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9483   match(Set dst (LShiftI src1 src2));
9484 
9485   ins_cost(INSN_COST);
9486   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
9487 
9488   ins_encode %{
9489     __ lslw(as_Register($dst$$reg),
9490             as_Register($src1$$reg),
9491             $src2$$constant & 0x1f);
9492   %}
9493 
9494   ins_pipe(ialu_reg_shift);
9495 %}
9496 
9497 // Shift Right Logical Register
9498 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9499   match(Set dst (URShiftI src1 src2));
9500 
9501   ins_cost(INSN_COST * 2);
9502   format %{ "lsrvw  $dst, $src1, $src2" %}
9503 
9504   ins_encode %{
9505     __ lsrvw(as_Register($dst$$reg),
9506              as_Register($src1$$reg),
9507              as_Register($src2$$reg));
9508   %}
9509 
9510   ins_pipe(ialu_reg_reg_vshift);
9511 %}
9512 
9513 // Shift Right Logical Immediate
9514 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9515   match(Set dst (URShiftI src1 src2));
9516 
9517   ins_cost(INSN_COST);
9518   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
9519 
9520   ins_encode %{
9521     __ lsrw(as_Register($dst$$reg),
9522             as_Register($src1$$reg),
9523             $src2$$constant & 0x1f);
9524   %}
9525 
9526   ins_pipe(ialu_reg_shift);
9527 %}
9528 
9529 // Shift Right Arithmetic Register
9530 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9531   match(Set dst (RShiftI src1 src2));
9532 
9533   ins_cost(INSN_COST * 2);
9534   format %{ "asrvw  $dst, $src1, $src2" %}
9535 
9536   ins_encode %{
9537     __ asrvw(as_Register($dst$$reg),
9538              as_Register($src1$$reg),
9539              as_Register($src2$$reg));
9540   %}
9541 
9542   ins_pipe(ialu_reg_reg_vshift);
9543 %}
9544 
9545 // Shift Right Arithmetic Immediate
9546 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9547   match(Set dst (RShiftI src1 src2));
9548 
9549   ins_cost(INSN_COST);
9550   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
9551 
9552   ins_encode %{
9553     __ asrw(as_Register($dst$$reg),
9554             as_Register($src1$$reg),
9555             $src2$$constant & 0x1f);
9556   %}
9557 
9558   ins_pipe(ialu_reg_shift);
9559 %}
9560 
9561 // Combined Int Mask and Right Shift (using UBFM)
9562 // TODO
9563 
9564 // Long Shifts
9565 
9566 // Shift Left Register
9567 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9568   match(Set dst (LShiftL src1 src2));
9569 
9570   ins_cost(INSN_COST * 2);
9571   format %{ "lslv  $dst, $src1, $src2" %}
9572 
9573   ins_encode %{
9574     __ lslv(as_Register($dst$$reg),
9575             as_Register($src1$$reg),
9576             as_Register($src2$$reg));
9577   %}
9578 
9579   ins_pipe(ialu_reg_reg_vshift);
9580 %}
9581 
9582 // Shift Left Immediate
9583 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9584   match(Set dst (LShiftL src1 src2));
9585 
9586   ins_cost(INSN_COST);
9587   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
9588 
9589   ins_encode %{
9590     __ lsl(as_Register($dst$$reg),
9591             as_Register($src1$$reg),
9592             $src2$$constant & 0x3f);
9593   %}
9594 
9595   ins_pipe(ialu_reg_shift);
9596 %}
9597 
9598 // Shift Right Logical Register
9599 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9600   match(Set dst (URShiftL src1 src2));
9601 
9602   ins_cost(INSN_COST * 2);
9603   format %{ "lsrv  $dst, $src1, $src2" %}
9604 
9605   ins_encode %{
9606     __ lsrv(as_Register($dst$$reg),
9607             as_Register($src1$$reg),
9608             as_Register($src2$$reg));
9609   %}
9610 
9611   ins_pipe(ialu_reg_reg_vshift);
9612 %}
9613 
9614 // Shift Right Logical Immediate
9615 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9616   match(Set dst (URShiftL src1 src2));
9617 
9618   ins_cost(INSN_COST);
9619   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
9620 
9621   ins_encode %{
9622     __ lsr(as_Register($dst$$reg),
9623            as_Register($src1$$reg),
9624            $src2$$constant & 0x3f);
9625   %}
9626 
9627   ins_pipe(ialu_reg_shift);
9628 %}
9629 
9630 // A special-case pattern for card table stores.
9631 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
9632   match(Set dst (URShiftL (CastP2X src1) src2));
9633 
9634   ins_cost(INSN_COST);
9635   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
9636 
9637   ins_encode %{
9638     __ lsr(as_Register($dst$$reg),
9639            as_Register($src1$$reg),
9640            $src2$$constant & 0x3f);
9641   %}
9642 
9643   ins_pipe(ialu_reg_shift);
9644 %}
9645 
9646 // Shift Right Arithmetic Register
9647 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9648   match(Set dst (RShiftL src1 src2));
9649 
9650   ins_cost(INSN_COST * 2);
9651   format %{ "asrv  $dst, $src1, $src2" %}
9652 
9653   ins_encode %{
9654     __ asrv(as_Register($dst$$reg),
9655             as_Register($src1$$reg),
9656             as_Register($src2$$reg));
9657   %}
9658 
9659   ins_pipe(ialu_reg_reg_vshift);
9660 %}
9661 
9662 // Shift Right Arithmetic Immediate
9663 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9664   match(Set dst (RShiftL src1 src2));
9665 
9666   ins_cost(INSN_COST);
9667   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
9668 
9669   ins_encode %{
9670     __ asr(as_Register($dst$$reg),
9671            as_Register($src1$$reg),
9672            $src2$$constant & 0x3f);
9673   %}
9674 
9675   ins_pipe(ialu_reg_shift);
9676 %}
9677 
9678 // BEGIN This section of the file is automatically generated. Do not edit --------------
9679 
9680 instruct regL_not_reg(iRegLNoSp dst,
9681                          iRegL src1, immL_M1 m1,
9682                          rFlagsReg cr) %{
9683   match(Set dst (XorL src1 m1));
9684   ins_cost(INSN_COST);
9685   format %{ "eon  $dst, $src1, zr" %}
9686 
9687   ins_encode %{
9688     __ eon(as_Register($dst$$reg),
9689               as_Register($src1$$reg),
9690               zr,
9691               Assembler::LSL, 0);
9692   %}
9693 
9694   ins_pipe(ialu_reg);
9695 %}
9696 instruct regI_not_reg(iRegINoSp dst,
9697                          iRegIorL2I src1, immI_M1 m1,
9698                          rFlagsReg cr) %{
9699   match(Set dst (XorI src1 m1));
9700   ins_cost(INSN_COST);
9701   format %{ "eonw  $dst, $src1, zr" %}
9702 
9703   ins_encode %{
9704     __ eonw(as_Register($dst$$reg),
9705               as_Register($src1$$reg),
9706               zr,
9707               Assembler::LSL, 0);
9708   %}
9709 
9710   ins_pipe(ialu_reg);
9711 %}
9712 
9713 instruct AndI_reg_not_reg(iRegINoSp dst,
9714                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9715                          rFlagsReg cr) %{
9716   match(Set dst (AndI src1 (XorI src2 m1)));
9717   ins_cost(INSN_COST);
9718   format %{ "bicw  $dst, $src1, $src2" %}
9719 
9720   ins_encode %{
9721     __ bicw(as_Register($dst$$reg),
9722               as_Register($src1$$reg),
9723               as_Register($src2$$reg),
9724               Assembler::LSL, 0);
9725   %}
9726 
9727   ins_pipe(ialu_reg_reg);
9728 %}
9729 
9730 instruct AndL_reg_not_reg(iRegLNoSp dst,
9731                          iRegL src1, iRegL src2, immL_M1 m1,
9732                          rFlagsReg cr) %{
9733   match(Set dst (AndL src1 (XorL src2 m1)));
9734   ins_cost(INSN_COST);
9735   format %{ "bic  $dst, $src1, $src2" %}
9736 
9737   ins_encode %{
9738     __ bic(as_Register($dst$$reg),
9739               as_Register($src1$$reg),
9740               as_Register($src2$$reg),
9741               Assembler::LSL, 0);
9742   %}
9743 
9744   ins_pipe(ialu_reg_reg);
9745 %}
9746 
9747 instruct OrI_reg_not_reg(iRegINoSp dst,
9748                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9749                          rFlagsReg cr) %{
9750   match(Set dst (OrI src1 (XorI src2 m1)));
9751   ins_cost(INSN_COST);
9752   format %{ "ornw  $dst, $src1, $src2" %}
9753 
9754   ins_encode %{
9755     __ ornw(as_Register($dst$$reg),
9756               as_Register($src1$$reg),
9757               as_Register($src2$$reg),
9758               Assembler::LSL, 0);
9759   %}
9760 
9761   ins_pipe(ialu_reg_reg);
9762 %}
9763 
9764 instruct OrL_reg_not_reg(iRegLNoSp dst,
9765                          iRegL src1, iRegL src2, immL_M1 m1,
9766                          rFlagsReg cr) %{
9767   match(Set dst (OrL src1 (XorL src2 m1)));
9768   ins_cost(INSN_COST);
9769   format %{ "orn  $dst, $src1, $src2" %}
9770 
9771   ins_encode %{
9772     __ orn(as_Register($dst$$reg),
9773               as_Register($src1$$reg),
9774               as_Register($src2$$reg),
9775               Assembler::LSL, 0);
9776   %}
9777 
9778   ins_pipe(ialu_reg_reg);
9779 %}
9780 
9781 instruct XorI_reg_not_reg(iRegINoSp dst,
9782                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9783                          rFlagsReg cr) %{
9784   match(Set dst (XorI m1 (XorI src2 src1)));
9785   ins_cost(INSN_COST);
9786   format %{ "eonw  $dst, $src1, $src2" %}
9787 
9788   ins_encode %{
9789     __ eonw(as_Register($dst$$reg),
9790               as_Register($src1$$reg),
9791               as_Register($src2$$reg),
9792               Assembler::LSL, 0);
9793   %}
9794 
9795   ins_pipe(ialu_reg_reg);
9796 %}
9797 
9798 instruct XorL_reg_not_reg(iRegLNoSp dst,
9799                          iRegL src1, iRegL src2, immL_M1 m1,
9800                          rFlagsReg cr) %{
9801   match(Set dst (XorL m1 (XorL src2 src1)));
9802   ins_cost(INSN_COST);
9803   format %{ "eon  $dst, $src1, $src2" %}
9804 
9805   ins_encode %{
9806     __ eon(as_Register($dst$$reg),
9807               as_Register($src1$$reg),
9808               as_Register($src2$$reg),
9809               Assembler::LSL, 0);
9810   %}
9811 
9812   ins_pipe(ialu_reg_reg);
9813 %}
9814 
9815 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
9816                          iRegIorL2I src1, iRegIorL2I src2,
9817                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9818   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
9819   ins_cost(1.9 * INSN_COST);
9820   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
9821 
9822   ins_encode %{
9823     __ bicw(as_Register($dst$$reg),
9824               as_Register($src1$$reg),
9825               as_Register($src2$$reg),
9826               Assembler::LSR,
9827               $src3$$constant & 0x1f);
9828   %}
9829 
9830   ins_pipe(ialu_reg_reg_shift);
9831 %}
9832 
9833 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
9834                          iRegL src1, iRegL src2,
9835                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9836   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
9837   ins_cost(1.9 * INSN_COST);
9838   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
9839 
9840   ins_encode %{
9841     __ bic(as_Register($dst$$reg),
9842               as_Register($src1$$reg),
9843               as_Register($src2$$reg),
9844               Assembler::LSR,
9845               $src3$$constant & 0x3f);
9846   %}
9847 
9848   ins_pipe(ialu_reg_reg_shift);
9849 %}
9850 
9851 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
9852                          iRegIorL2I src1, iRegIorL2I src2,
9853                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9854   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
9855   ins_cost(1.9 * INSN_COST);
9856   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
9857 
9858   ins_encode %{
9859     __ bicw(as_Register($dst$$reg),
9860               as_Register($src1$$reg),
9861               as_Register($src2$$reg),
9862               Assembler::ASR,
9863               $src3$$constant & 0x1f);
9864   %}
9865 
9866   ins_pipe(ialu_reg_reg_shift);
9867 %}
9868 
9869 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
9870                          iRegL src1, iRegL src2,
9871                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9872   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
9873   ins_cost(1.9 * INSN_COST);
9874   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
9875 
9876   ins_encode %{
9877     __ bic(as_Register($dst$$reg),
9878               as_Register($src1$$reg),
9879               as_Register($src2$$reg),
9880               Assembler::ASR,
9881               $src3$$constant & 0x3f);
9882   %}
9883 
9884   ins_pipe(ialu_reg_reg_shift);
9885 %}
9886 
9887 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
9888                          iRegIorL2I src1, iRegIorL2I src2,
9889                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9890   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
9891   ins_cost(1.9 * INSN_COST);
9892   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
9893 
9894   ins_encode %{
9895     __ bicw(as_Register($dst$$reg),
9896               as_Register($src1$$reg),
9897               as_Register($src2$$reg),
9898               Assembler::LSL,
9899               $src3$$constant & 0x1f);
9900   %}
9901 
9902   ins_pipe(ialu_reg_reg_shift);
9903 %}
9904 
9905 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
9906                          iRegL src1, iRegL src2,
9907                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9908   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
9909   ins_cost(1.9 * INSN_COST);
9910   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
9911 
9912   ins_encode %{
9913     __ bic(as_Register($dst$$reg),
9914               as_Register($src1$$reg),
9915               as_Register($src2$$reg),
9916               Assembler::LSL,
9917               $src3$$constant & 0x3f);
9918   %}
9919 
9920   ins_pipe(ialu_reg_reg_shift);
9921 %}
9922 
9923 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
9924                          iRegIorL2I src1, iRegIorL2I src2,
9925                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9926   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
9927   ins_cost(1.9 * INSN_COST);
9928   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
9929 
9930   ins_encode %{
9931     __ eonw(as_Register($dst$$reg),
9932               as_Register($src1$$reg),
9933               as_Register($src2$$reg),
9934               Assembler::LSR,
9935               $src3$$constant & 0x1f);
9936   %}
9937 
9938   ins_pipe(ialu_reg_reg_shift);
9939 %}
9940 
9941 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
9942                          iRegL src1, iRegL src2,
9943                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9944   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
9945   ins_cost(1.9 * INSN_COST);
9946   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
9947 
9948   ins_encode %{
9949     __ eon(as_Register($dst$$reg),
9950               as_Register($src1$$reg),
9951               as_Register($src2$$reg),
9952               Assembler::LSR,
9953               $src3$$constant & 0x3f);
9954   %}
9955 
9956   ins_pipe(ialu_reg_reg_shift);
9957 %}
9958 
9959 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
9960                          iRegIorL2I src1, iRegIorL2I src2,
9961                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9962   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
9963   ins_cost(1.9 * INSN_COST);
9964   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
9965 
9966   ins_encode %{
9967     __ eonw(as_Register($dst$$reg),
9968               as_Register($src1$$reg),
9969               as_Register($src2$$reg),
9970               Assembler::ASR,
9971               $src3$$constant & 0x1f);
9972   %}
9973 
9974   ins_pipe(ialu_reg_reg_shift);
9975 %}
9976 
9977 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
9978                          iRegL src1, iRegL src2,
9979                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9980   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
9981   ins_cost(1.9 * INSN_COST);
9982   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
9983 
9984   ins_encode %{
9985     __ eon(as_Register($dst$$reg),
9986               as_Register($src1$$reg),
9987               as_Register($src2$$reg),
9988               Assembler::ASR,
9989               $src3$$constant & 0x3f);
9990   %}
9991 
9992   ins_pipe(ialu_reg_reg_shift);
9993 %}
9994 
9995 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
9996                          iRegIorL2I src1, iRegIorL2I src2,
9997                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9998   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
9999   ins_cost(1.9 * INSN_COST);
10000   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10001 
10002   ins_encode %{
10003     __ eonw(as_Register($dst$$reg),
10004               as_Register($src1$$reg),
10005               as_Register($src2$$reg),
10006               Assembler::LSL,
10007               $src3$$constant & 0x1f);
10008   %}
10009 
10010   ins_pipe(ialu_reg_reg_shift);
10011 %}
10012 
10013 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10014                          iRegL src1, iRegL src2,
10015                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10016   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10017   ins_cost(1.9 * INSN_COST);
10018   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10019 
10020   ins_encode %{
10021     __ eon(as_Register($dst$$reg),
10022               as_Register($src1$$reg),
10023               as_Register($src2$$reg),
10024               Assembler::LSL,
10025               $src3$$constant & 0x3f);
10026   %}
10027 
10028   ins_pipe(ialu_reg_reg_shift);
10029 %}
10030 
10031 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10032                          iRegIorL2I src1, iRegIorL2I src2,
10033                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10034   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10035   ins_cost(1.9 * INSN_COST);
10036   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10037 
10038   ins_encode %{
10039     __ ornw(as_Register($dst$$reg),
10040               as_Register($src1$$reg),
10041               as_Register($src2$$reg),
10042               Assembler::LSR,
10043               $src3$$constant & 0x1f);
10044   %}
10045 
10046   ins_pipe(ialu_reg_reg_shift);
10047 %}
10048 
10049 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10050                          iRegL src1, iRegL src2,
10051                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10052   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10053   ins_cost(1.9 * INSN_COST);
10054   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10055 
10056   ins_encode %{
10057     __ orn(as_Register($dst$$reg),
10058               as_Register($src1$$reg),
10059               as_Register($src2$$reg),
10060               Assembler::LSR,
10061               $src3$$constant & 0x3f);
10062   %}
10063 
10064   ins_pipe(ialu_reg_reg_shift);
10065 %}
10066 
10067 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10068                          iRegIorL2I src1, iRegIorL2I src2,
10069                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10070   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10071   ins_cost(1.9 * INSN_COST);
10072   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10073 
10074   ins_encode %{
10075     __ ornw(as_Register($dst$$reg),
10076               as_Register($src1$$reg),
10077               as_Register($src2$$reg),
10078               Assembler::ASR,
10079               $src3$$constant & 0x1f);
10080   %}
10081 
10082   ins_pipe(ialu_reg_reg_shift);
10083 %}
10084 
10085 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10086                          iRegL src1, iRegL src2,
10087                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10088   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10089   ins_cost(1.9 * INSN_COST);
10090   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10091 
10092   ins_encode %{
10093     __ orn(as_Register($dst$$reg),
10094               as_Register($src1$$reg),
10095               as_Register($src2$$reg),
10096               Assembler::ASR,
10097               $src3$$constant & 0x3f);
10098   %}
10099 
10100   ins_pipe(ialu_reg_reg_shift);
10101 %}
10102 
10103 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10104                          iRegIorL2I src1, iRegIorL2I src2,
10105                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10106   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10107   ins_cost(1.9 * INSN_COST);
10108   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10109 
10110   ins_encode %{
10111     __ ornw(as_Register($dst$$reg),
10112               as_Register($src1$$reg),
10113               as_Register($src2$$reg),
10114               Assembler::LSL,
10115               $src3$$constant & 0x1f);
10116   %}
10117 
10118   ins_pipe(ialu_reg_reg_shift);
10119 %}
10120 
10121 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10122                          iRegL src1, iRegL src2,
10123                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10124   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10125   ins_cost(1.9 * INSN_COST);
10126   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10127 
10128   ins_encode %{
10129     __ orn(as_Register($dst$$reg),
10130               as_Register($src1$$reg),
10131               as_Register($src2$$reg),
10132               Assembler::LSL,
10133               $src3$$constant & 0x3f);
10134   %}
10135 
10136   ins_pipe(ialu_reg_reg_shift);
10137 %}
10138 
10139 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10140                          iRegIorL2I src1, iRegIorL2I src2,
10141                          immI src3, rFlagsReg cr) %{
10142   match(Set dst (AndI src1 (URShiftI src2 src3)));
10143 
10144   ins_cost(1.9 * INSN_COST);
10145   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10146 
10147   ins_encode %{
10148     __ andw(as_Register($dst$$reg),
10149               as_Register($src1$$reg),
10150               as_Register($src2$$reg),
10151               Assembler::LSR,
10152               $src3$$constant & 0x1f);
10153   %}
10154 
10155   ins_pipe(ialu_reg_reg_shift);
10156 %}
10157 
10158 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10159                          iRegL src1, iRegL src2,
10160                          immI src3, rFlagsReg cr) %{
10161   match(Set dst (AndL src1 (URShiftL src2 src3)));
10162 
10163   ins_cost(1.9 * INSN_COST);
10164   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10165 
10166   ins_encode %{
10167     __ andr(as_Register($dst$$reg),
10168               as_Register($src1$$reg),
10169               as_Register($src2$$reg),
10170               Assembler::LSR,
10171               $src3$$constant & 0x3f);
10172   %}
10173 
10174   ins_pipe(ialu_reg_reg_shift);
10175 %}
10176 
10177 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10178                          iRegIorL2I src1, iRegIorL2I src2,
10179                          immI src3, rFlagsReg cr) %{
10180   match(Set dst (AndI src1 (RShiftI src2 src3)));
10181 
10182   ins_cost(1.9 * INSN_COST);
10183   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10184 
10185   ins_encode %{
10186     __ andw(as_Register($dst$$reg),
10187               as_Register($src1$$reg),
10188               as_Register($src2$$reg),
10189               Assembler::ASR,
10190               $src3$$constant & 0x1f);
10191   %}
10192 
10193   ins_pipe(ialu_reg_reg_shift);
10194 %}
10195 
10196 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10197                          iRegL src1, iRegL src2,
10198                          immI src3, rFlagsReg cr) %{
10199   match(Set dst (AndL src1 (RShiftL src2 src3)));
10200 
10201   ins_cost(1.9 * INSN_COST);
10202   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10203 
10204   ins_encode %{
10205     __ andr(as_Register($dst$$reg),
10206               as_Register($src1$$reg),
10207               as_Register($src2$$reg),
10208               Assembler::ASR,
10209               $src3$$constant & 0x3f);
10210   %}
10211 
10212   ins_pipe(ialu_reg_reg_shift);
10213 %}
10214 
10215 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10216                          iRegIorL2I src1, iRegIorL2I src2,
10217                          immI src3, rFlagsReg cr) %{
10218   match(Set dst (AndI src1 (LShiftI src2 src3)));
10219 
10220   ins_cost(1.9 * INSN_COST);
10221   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10222 
10223   ins_encode %{
10224     __ andw(as_Register($dst$$reg),
10225               as_Register($src1$$reg),
10226               as_Register($src2$$reg),
10227               Assembler::LSL,
10228               $src3$$constant & 0x1f);
10229   %}
10230 
10231   ins_pipe(ialu_reg_reg_shift);
10232 %}
10233 
10234 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10235                          iRegL src1, iRegL src2,
10236                          immI src3, rFlagsReg cr) %{
10237   match(Set dst (AndL src1 (LShiftL src2 src3)));
10238 
10239   ins_cost(1.9 * INSN_COST);
10240   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10241 
10242   ins_encode %{
10243     __ andr(as_Register($dst$$reg),
10244               as_Register($src1$$reg),
10245               as_Register($src2$$reg),
10246               Assembler::LSL,
10247               $src3$$constant & 0x3f);
10248   %}
10249 
10250   ins_pipe(ialu_reg_reg_shift);
10251 %}
10252 
10253 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10254                          iRegIorL2I src1, iRegIorL2I src2,
10255                          immI src3, rFlagsReg cr) %{
10256   match(Set dst (XorI src1 (URShiftI src2 src3)));
10257 
10258   ins_cost(1.9 * INSN_COST);
10259   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10260 
10261   ins_encode %{
10262     __ eorw(as_Register($dst$$reg),
10263               as_Register($src1$$reg),
10264               as_Register($src2$$reg),
10265               Assembler::LSR,
10266               $src3$$constant & 0x1f);
10267   %}
10268 
10269   ins_pipe(ialu_reg_reg_shift);
10270 %}
10271 
10272 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10273                          iRegL src1, iRegL src2,
10274                          immI src3, rFlagsReg cr) %{
10275   match(Set dst (XorL src1 (URShiftL src2 src3)));
10276 
10277   ins_cost(1.9 * INSN_COST);
10278   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10279 
10280   ins_encode %{
10281     __ eor(as_Register($dst$$reg),
10282               as_Register($src1$$reg),
10283               as_Register($src2$$reg),
10284               Assembler::LSR,
10285               $src3$$constant & 0x3f);
10286   %}
10287 
10288   ins_pipe(ialu_reg_reg_shift);
10289 %}
10290 
10291 instruct XorI_reg_RShift_reg(iRegINoSp dst,
10292                          iRegIorL2I src1, iRegIorL2I src2,
10293                          immI src3, rFlagsReg cr) %{
10294   match(Set dst (XorI src1 (RShiftI src2 src3)));
10295 
10296   ins_cost(1.9 * INSN_COST);
10297   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
10298 
10299   ins_encode %{
10300     __ eorw(as_Register($dst$$reg),
10301               as_Register($src1$$reg),
10302               as_Register($src2$$reg),
10303               Assembler::ASR,
10304               $src3$$constant & 0x1f);
10305   %}
10306 
10307   ins_pipe(ialu_reg_reg_shift);
10308 %}
10309 
10310 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
10311                          iRegL src1, iRegL src2,
10312                          immI src3, rFlagsReg cr) %{
10313   match(Set dst (XorL src1 (RShiftL src2 src3)));
10314 
10315   ins_cost(1.9 * INSN_COST);
10316   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
10317 
10318   ins_encode %{
10319     __ eor(as_Register($dst$$reg),
10320               as_Register($src1$$reg),
10321               as_Register($src2$$reg),
10322               Assembler::ASR,
10323               $src3$$constant & 0x3f);
10324   %}
10325 
10326   ins_pipe(ialu_reg_reg_shift);
10327 %}
10328 
10329 instruct XorI_reg_LShift_reg(iRegINoSp dst,
10330                          iRegIorL2I src1, iRegIorL2I src2,
10331                          immI src3, rFlagsReg cr) %{
10332   match(Set dst (XorI src1 (LShiftI src2 src3)));
10333 
10334   ins_cost(1.9 * INSN_COST);
10335   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
10336 
10337   ins_encode %{
10338     __ eorw(as_Register($dst$$reg),
10339               as_Register($src1$$reg),
10340               as_Register($src2$$reg),
10341               Assembler::LSL,
10342               $src3$$constant & 0x1f);
10343   %}
10344 
10345   ins_pipe(ialu_reg_reg_shift);
10346 %}
10347 
10348 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
10349                          iRegL src1, iRegL src2,
10350                          immI src3, rFlagsReg cr) %{
10351   match(Set dst (XorL src1 (LShiftL src2 src3)));
10352 
10353   ins_cost(1.9 * INSN_COST);
10354   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
10355 
10356   ins_encode %{
10357     __ eor(as_Register($dst$$reg),
10358               as_Register($src1$$reg),
10359               as_Register($src2$$reg),
10360               Assembler::LSL,
10361               $src3$$constant & 0x3f);
10362   %}
10363 
10364   ins_pipe(ialu_reg_reg_shift);
10365 %}
10366 
10367 instruct OrI_reg_URShift_reg(iRegINoSp dst,
10368                          iRegIorL2I src1, iRegIorL2I src2,
10369                          immI src3, rFlagsReg cr) %{
10370   match(Set dst (OrI src1 (URShiftI src2 src3)));
10371 
10372   ins_cost(1.9 * INSN_COST);
10373   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
10374 
10375   ins_encode %{
10376     __ orrw(as_Register($dst$$reg),
10377               as_Register($src1$$reg),
10378               as_Register($src2$$reg),
10379               Assembler::LSR,
10380               $src3$$constant & 0x1f);
10381   %}
10382 
10383   ins_pipe(ialu_reg_reg_shift);
10384 %}
10385 
10386 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
10387                          iRegL src1, iRegL src2,
10388                          immI src3, rFlagsReg cr) %{
10389   match(Set dst (OrL src1 (URShiftL src2 src3)));
10390 
10391   ins_cost(1.9 * INSN_COST);
10392   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
10393 
10394   ins_encode %{
10395     __ orr(as_Register($dst$$reg),
10396               as_Register($src1$$reg),
10397               as_Register($src2$$reg),
10398               Assembler::LSR,
10399               $src3$$constant & 0x3f);
10400   %}
10401 
10402   ins_pipe(ialu_reg_reg_shift);
10403 %}
10404 
10405 instruct OrI_reg_RShift_reg(iRegINoSp dst,
10406                          iRegIorL2I src1, iRegIorL2I src2,
10407                          immI src3, rFlagsReg cr) %{
10408   match(Set dst (OrI src1 (RShiftI src2 src3)));
10409 
10410   ins_cost(1.9 * INSN_COST);
10411   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
10412 
10413   ins_encode %{
10414     __ orrw(as_Register($dst$$reg),
10415               as_Register($src1$$reg),
10416               as_Register($src2$$reg),
10417               Assembler::ASR,
10418               $src3$$constant & 0x1f);
10419   %}
10420 
10421   ins_pipe(ialu_reg_reg_shift);
10422 %}
10423 
10424 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
10425                          iRegL src1, iRegL src2,
10426                          immI src3, rFlagsReg cr) %{
10427   match(Set dst (OrL src1 (RShiftL src2 src3)));
10428 
10429   ins_cost(1.9 * INSN_COST);
10430   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
10431 
10432   ins_encode %{
10433     __ orr(as_Register($dst$$reg),
10434               as_Register($src1$$reg),
10435               as_Register($src2$$reg),
10436               Assembler::ASR,
10437               $src3$$constant & 0x3f);
10438   %}
10439 
10440   ins_pipe(ialu_reg_reg_shift);
10441 %}
10442 
10443 instruct OrI_reg_LShift_reg(iRegINoSp dst,
10444                          iRegIorL2I src1, iRegIorL2I src2,
10445                          immI src3, rFlagsReg cr) %{
10446   match(Set dst (OrI src1 (LShiftI src2 src3)));
10447 
10448   ins_cost(1.9 * INSN_COST);
10449   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
10450 
10451   ins_encode %{
10452     __ orrw(as_Register($dst$$reg),
10453               as_Register($src1$$reg),
10454               as_Register($src2$$reg),
10455               Assembler::LSL,
10456               $src3$$constant & 0x1f);
10457   %}
10458 
10459   ins_pipe(ialu_reg_reg_shift);
10460 %}
10461 
10462 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
10463                          iRegL src1, iRegL src2,
10464                          immI src3, rFlagsReg cr) %{
10465   match(Set dst (OrL src1 (LShiftL src2 src3)));
10466 
10467   ins_cost(1.9 * INSN_COST);
10468   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
10469 
10470   ins_encode %{
10471     __ orr(as_Register($dst$$reg),
10472               as_Register($src1$$reg),
10473               as_Register($src2$$reg),
10474               Assembler::LSL,
10475               $src3$$constant & 0x3f);
10476   %}
10477 
10478   ins_pipe(ialu_reg_reg_shift);
10479 %}
10480 
10481 instruct AddI_reg_URShift_reg(iRegINoSp dst,
10482                          iRegIorL2I src1, iRegIorL2I src2,
10483                          immI src3, rFlagsReg cr) %{
10484   match(Set dst (AddI src1 (URShiftI src2 src3)));
10485 
10486   ins_cost(1.9 * INSN_COST);
10487   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
10488 
10489   ins_encode %{
10490     __ addw(as_Register($dst$$reg),
10491               as_Register($src1$$reg),
10492               as_Register($src2$$reg),
10493               Assembler::LSR,
10494               $src3$$constant & 0x1f);
10495   %}
10496 
10497   ins_pipe(ialu_reg_reg_shift);
10498 %}
10499 
10500 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
10501                          iRegL src1, iRegL src2,
10502                          immI src3, rFlagsReg cr) %{
10503   match(Set dst (AddL src1 (URShiftL src2 src3)));
10504 
10505   ins_cost(1.9 * INSN_COST);
10506   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
10507 
10508   ins_encode %{
10509     __ add(as_Register($dst$$reg),
10510               as_Register($src1$$reg),
10511               as_Register($src2$$reg),
10512               Assembler::LSR,
10513               $src3$$constant & 0x3f);
10514   %}
10515 
10516   ins_pipe(ialu_reg_reg_shift);
10517 %}
10518 
10519 instruct AddI_reg_RShift_reg(iRegINoSp dst,
10520                          iRegIorL2I src1, iRegIorL2I src2,
10521                          immI src3, rFlagsReg cr) %{
10522   match(Set dst (AddI src1 (RShiftI src2 src3)));
10523 
10524   ins_cost(1.9 * INSN_COST);
10525   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
10526 
10527   ins_encode %{
10528     __ addw(as_Register($dst$$reg),
10529               as_Register($src1$$reg),
10530               as_Register($src2$$reg),
10531               Assembler::ASR,
10532               $src3$$constant & 0x1f);
10533   %}
10534 
10535   ins_pipe(ialu_reg_reg_shift);
10536 %}
10537 
10538 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
10539                          iRegL src1, iRegL src2,
10540                          immI src3, rFlagsReg cr) %{
10541   match(Set dst (AddL src1 (RShiftL src2 src3)));
10542 
10543   ins_cost(1.9 * INSN_COST);
10544   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
10545 
10546   ins_encode %{
10547     __ add(as_Register($dst$$reg),
10548               as_Register($src1$$reg),
10549               as_Register($src2$$reg),
10550               Assembler::ASR,
10551               $src3$$constant & 0x3f);
10552   %}
10553 
10554   ins_pipe(ialu_reg_reg_shift);
10555 %}
10556 
10557 instruct AddI_reg_LShift_reg(iRegINoSp dst,
10558                          iRegIorL2I src1, iRegIorL2I src2,
10559                          immI src3, rFlagsReg cr) %{
10560   match(Set dst (AddI src1 (LShiftI src2 src3)));
10561 
10562   ins_cost(1.9 * INSN_COST);
10563   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
10564 
10565   ins_encode %{
10566     __ addw(as_Register($dst$$reg),
10567               as_Register($src1$$reg),
10568               as_Register($src2$$reg),
10569               Assembler::LSL,
10570               $src3$$constant & 0x1f);
10571   %}
10572 
10573   ins_pipe(ialu_reg_reg_shift);
10574 %}
10575 
10576 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
10577                          iRegL src1, iRegL src2,
10578                          immI src3, rFlagsReg cr) %{
10579   match(Set dst (AddL src1 (LShiftL src2 src3)));
10580 
10581   ins_cost(1.9 * INSN_COST);
10582   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
10583 
10584   ins_encode %{
10585     __ add(as_Register($dst$$reg),
10586               as_Register($src1$$reg),
10587               as_Register($src2$$reg),
10588               Assembler::LSL,
10589               $src3$$constant & 0x3f);
10590   %}
10591 
10592   ins_pipe(ialu_reg_reg_shift);
10593 %}
10594 
10595 instruct SubI_reg_URShift_reg(iRegINoSp dst,
10596                          iRegIorL2I src1, iRegIorL2I src2,
10597                          immI src3, rFlagsReg cr) %{
10598   match(Set dst (SubI src1 (URShiftI src2 src3)));
10599 
10600   ins_cost(1.9 * INSN_COST);
10601   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
10602 
10603   ins_encode %{
10604     __ subw(as_Register($dst$$reg),
10605               as_Register($src1$$reg),
10606               as_Register($src2$$reg),
10607               Assembler::LSR,
10608               $src3$$constant & 0x1f);
10609   %}
10610 
10611   ins_pipe(ialu_reg_reg_shift);
10612 %}
10613 
10614 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
10615                          iRegL src1, iRegL src2,
10616                          immI src3, rFlagsReg cr) %{
10617   match(Set dst (SubL src1 (URShiftL src2 src3)));
10618 
10619   ins_cost(1.9 * INSN_COST);
10620   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
10621 
10622   ins_encode %{
10623     __ sub(as_Register($dst$$reg),
10624               as_Register($src1$$reg),
10625               as_Register($src2$$reg),
10626               Assembler::LSR,
10627               $src3$$constant & 0x3f);
10628   %}
10629 
10630   ins_pipe(ialu_reg_reg_shift);
10631 %}
10632 
10633 instruct SubI_reg_RShift_reg(iRegINoSp dst,
10634                          iRegIorL2I src1, iRegIorL2I src2,
10635                          immI src3, rFlagsReg cr) %{
10636   match(Set dst (SubI src1 (RShiftI src2 src3)));
10637 
10638   ins_cost(1.9 * INSN_COST);
10639   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
10640 
10641   ins_encode %{
10642     __ subw(as_Register($dst$$reg),
10643               as_Register($src1$$reg),
10644               as_Register($src2$$reg),
10645               Assembler::ASR,
10646               $src3$$constant & 0x1f);
10647   %}
10648 
10649   ins_pipe(ialu_reg_reg_shift);
10650 %}
10651 
10652 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
10653                          iRegL src1, iRegL src2,
10654                          immI src3, rFlagsReg cr) %{
10655   match(Set dst (SubL src1 (RShiftL src2 src3)));
10656 
10657   ins_cost(1.9 * INSN_COST);
10658   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
10659 
10660   ins_encode %{
10661     __ sub(as_Register($dst$$reg),
10662               as_Register($src1$$reg),
10663               as_Register($src2$$reg),
10664               Assembler::ASR,
10665               $src3$$constant & 0x3f);
10666   %}
10667 
10668   ins_pipe(ialu_reg_reg_shift);
10669 %}
10670 
10671 instruct SubI_reg_LShift_reg(iRegINoSp dst,
10672                          iRegIorL2I src1, iRegIorL2I src2,
10673                          immI src3, rFlagsReg cr) %{
10674   match(Set dst (SubI src1 (LShiftI src2 src3)));
10675 
10676   ins_cost(1.9 * INSN_COST);
10677   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
10678 
10679   ins_encode %{
10680     __ subw(as_Register($dst$$reg),
10681               as_Register($src1$$reg),
10682               as_Register($src2$$reg),
10683               Assembler::LSL,
10684               $src3$$constant & 0x1f);
10685   %}
10686 
10687   ins_pipe(ialu_reg_reg_shift);
10688 %}
10689 
10690 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
10691                          iRegL src1, iRegL src2,
10692                          immI src3, rFlagsReg cr) %{
10693   match(Set dst (SubL src1 (LShiftL src2 src3)));
10694 
10695   ins_cost(1.9 * INSN_COST);
10696   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
10697 
10698   ins_encode %{
10699     __ sub(as_Register($dst$$reg),
10700               as_Register($src1$$reg),
10701               as_Register($src2$$reg),
10702               Assembler::LSL,
10703               $src3$$constant & 0x3f);
10704   %}
10705 
10706   ins_pipe(ialu_reg_reg_shift);
10707 %}
10708 
10709 
10710 
10711 // Shift Left followed by Shift Right.
10712 // This idiom is used by the compiler for the i2b bytecode etc.
10713 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10714 %{
10715   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
10716   // Make sure we are not going to exceed what sbfm can do.
10717   predicate((unsigned int)n->in(2)->get_int() <= 63
10718             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10719 
10720   ins_cost(INSN_COST * 2);
10721   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10722   ins_encode %{
10723     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10724     int s = 63 - lshift;
10725     int r = (rshift - lshift) & 63;
10726     __ sbfm(as_Register($dst$$reg),
10727             as_Register($src$$reg),
10728             r, s);
10729   %}
10730 
10731   ins_pipe(ialu_reg_shift);
10732 %}
10733 
10734 // Shift Left followed by Shift Right.
10735 // This idiom is used by the compiler for the i2b bytecode etc.
10736 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10737 %{
10738   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
10739   // Make sure we are not going to exceed what sbfmw can do.
10740   predicate((unsigned int)n->in(2)->get_int() <= 31
10741             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10742 
10743   ins_cost(INSN_COST * 2);
10744   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10745   ins_encode %{
10746     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10747     int s = 31 - lshift;
10748     int r = (rshift - lshift) & 31;
10749     __ sbfmw(as_Register($dst$$reg),
10750             as_Register($src$$reg),
10751             r, s);
10752   %}
10753 
10754   ins_pipe(ialu_reg_shift);
10755 %}
10756 
10757 // Shift Left followed by Shift Right.
10758 // This idiom is used by the compiler for the i2b bytecode etc.
10759 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10760 %{
10761   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
10762   // Make sure we are not going to exceed what ubfm can do.
10763   predicate((unsigned int)n->in(2)->get_int() <= 63
10764             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10765 
10766   ins_cost(INSN_COST * 2);
10767   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10768   ins_encode %{
10769     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10770     int s = 63 - lshift;
10771     int r = (rshift - lshift) & 63;
10772     __ ubfm(as_Register($dst$$reg),
10773             as_Register($src$$reg),
10774             r, s);
10775   %}
10776 
10777   ins_pipe(ialu_reg_shift);
10778 %}
10779 
10780 // Shift Left followed by Shift Right.
10781 // This idiom is used by the compiler for the i2b bytecode etc.
10782 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10783 %{
10784   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
10785   // Make sure we are not going to exceed what ubfmw can do.
10786   predicate((unsigned int)n->in(2)->get_int() <= 31
10787             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10788 
10789   ins_cost(INSN_COST * 2);
10790   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10791   ins_encode %{
10792     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10793     int s = 31 - lshift;
10794     int r = (rshift - lshift) & 31;
10795     __ ubfmw(as_Register($dst$$reg),
10796             as_Register($src$$reg),
10797             r, s);
10798   %}
10799 
10800   ins_pipe(ialu_reg_shift);
10801 %}
10802 // Bitfield extract with shift & mask
10803 
10804 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10805 %{
10806   match(Set dst (AndI (URShiftI src rshift) mask));
10807   // Make sure we are not going to exceed what ubfxw can do.
10808   predicate((exact_log2(n->in(2)->get_int() + 1) + (n->in(1)->in(2)->get_int() & 31)) <= (31 + 1));
10809 
10810   ins_cost(INSN_COST);
10811   format %{ "ubfxw $dst, $src, $mask" %}
10812   ins_encode %{
10813     int rshift = $rshift$$constant & 31;
10814     long mask = $mask$$constant;
10815     int width = exact_log2(mask+1);
10816     __ ubfxw(as_Register($dst$$reg),
10817             as_Register($src$$reg), rshift, width);
10818   %}
10819   ins_pipe(ialu_reg_shift);
10820 %}
10821 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
10822 %{
10823   match(Set dst (AndL (URShiftL src rshift) mask));
10824   // Make sure we are not going to exceed what ubfx can do.
10825   predicate((exact_log2_long(n->in(2)->get_long() + 1) + (n->in(1)->in(2)->get_int() & 63)) <= (63 + 1));
10826 
10827   ins_cost(INSN_COST);
10828   format %{ "ubfx $dst, $src, $mask" %}
10829   ins_encode %{
10830     int rshift = $rshift$$constant & 63;
10831     long mask = $mask$$constant;
10832     int width = exact_log2_long(mask+1);
10833     __ ubfx(as_Register($dst$$reg),
10834             as_Register($src$$reg), rshift, width);
10835   %}
10836   ins_pipe(ialu_reg_shift);
10837 %}
10838 
10839 // We can use ubfx when extending an And with a mask when we know mask
10840 // is positive.  We know that because immI_bitmask guarantees it.
10841 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10842 %{
10843   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
10844   // Make sure we are not going to exceed what ubfxw can do.
10845   predicate((exact_log2(n->in(1)->in(2)->get_int() + 1) + (n->in(1)->in(1)->in(2)->get_int() & 31)) <= (31 + 1));
10846 
10847   ins_cost(INSN_COST * 2);
10848   format %{ "ubfx $dst, $src, $mask" %}
10849   ins_encode %{
10850     int rshift = $rshift$$constant & 31;
10851     long mask = $mask$$constant;
10852     int width = exact_log2(mask+1);
10853     __ ubfx(as_Register($dst$$reg),
10854             as_Register($src$$reg), rshift, width);
10855   %}
10856   ins_pipe(ialu_reg_shift);
10857 %}
10858 
10859 // Rotations
10860 
10861 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
10862 %{
10863   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
10864   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
10865 
10866   ins_cost(INSN_COST);
10867   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10868 
10869   ins_encode %{
10870     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10871             $rshift$$constant & 63);
10872   %}
10873   ins_pipe(ialu_reg_reg_extr);
10874 %}
10875 
10876 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
10877 %{
10878   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
10879   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
10880 
10881   ins_cost(INSN_COST);
10882   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10883 
10884   ins_encode %{
10885     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10886             $rshift$$constant & 31);
10887   %}
10888   ins_pipe(ialu_reg_reg_extr);
10889 %}
10890 
10891 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
10892 %{
10893   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
10894   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
10895 
10896   ins_cost(INSN_COST);
10897   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10898 
10899   ins_encode %{
10900     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10901             $rshift$$constant & 63);
10902   %}
10903   ins_pipe(ialu_reg_reg_extr);
10904 %}
10905 
10906 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
10907 %{
10908   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
10909   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
10910 
10911   ins_cost(INSN_COST);
10912   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10913 
10914   ins_encode %{
10915     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10916             $rshift$$constant & 31);
10917   %}
10918   ins_pipe(ialu_reg_reg_extr);
10919 %}
10920 
10921 
10922 // rol expander
10923 
10924 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
10925 %{
10926   effect(DEF dst, USE src, USE shift);
10927 
10928   format %{ "rol    $dst, $src, $shift" %}
10929   ins_cost(INSN_COST * 3);
10930   ins_encode %{
10931     __ subw(rscratch1, zr, as_Register($shift$$reg));
10932     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
10933             rscratch1);
10934     %}
10935   ins_pipe(ialu_reg_reg_vshift);
10936 %}
10937 
10938 // rol expander
10939 
10940 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
10941 %{
10942   effect(DEF dst, USE src, USE shift);
10943 
10944   format %{ "rol    $dst, $src, $shift" %}
10945   ins_cost(INSN_COST * 3);
10946   ins_encode %{
10947     __ subw(rscratch1, zr, as_Register($shift$$reg));
10948     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
10949             rscratch1);
10950     %}
10951   ins_pipe(ialu_reg_reg_vshift);
10952 %}
10953 
10954 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
10955 %{
10956   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
10957 
10958   expand %{
10959     rolL_rReg(dst, src, shift, cr);
10960   %}
10961 %}
10962 
10963 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
10964 %{
10965   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
10966 
10967   expand %{
10968     rolL_rReg(dst, src, shift, cr);
10969   %}
10970 %}
10971 
10972 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
10973 %{
10974   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
10975 
10976   expand %{
10977     rolI_rReg(dst, src, shift, cr);
10978   %}
10979 %}
10980 
10981 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
10982 %{
10983   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
10984 
10985   expand %{
10986     rolI_rReg(dst, src, shift, cr);
10987   %}
10988 %}
10989 
10990 // ror expander
10991 
10992 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
10993 %{
10994   effect(DEF dst, USE src, USE shift);
10995 
10996   format %{ "ror    $dst, $src, $shift" %}
10997   ins_cost(INSN_COST);
10998   ins_encode %{
10999     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11000             as_Register($shift$$reg));
11001     %}
11002   ins_pipe(ialu_reg_reg_vshift);
11003 %}
11004 
11005 // ror expander
11006 
11007 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11008 %{
11009   effect(DEF dst, USE src, USE shift);
11010 
11011   format %{ "ror    $dst, $src, $shift" %}
11012   ins_cost(INSN_COST);
11013   ins_encode %{
11014     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11015             as_Register($shift$$reg));
11016     %}
11017   ins_pipe(ialu_reg_reg_vshift);
11018 %}
11019 
11020 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11021 %{
11022   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11023 
11024   expand %{
11025     rorL_rReg(dst, src, shift, cr);
11026   %}
11027 %}
11028 
11029 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11030 %{
11031   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11032 
11033   expand %{
11034     rorL_rReg(dst, src, shift, cr);
11035   %}
11036 %}
11037 
11038 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11039 %{
11040   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11041 
11042   expand %{
11043     rorI_rReg(dst, src, shift, cr);
11044   %}
11045 %}
11046 
11047 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11048 %{
11049   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11050 
11051   expand %{
11052     rorI_rReg(dst, src, shift, cr);
11053   %}
11054 %}
11055 
11056 // Add/subtract (extended)
11057 
11058 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11059 %{
11060   match(Set dst (AddL src1 (ConvI2L src2)));
11061   ins_cost(INSN_COST);
11062   format %{ "add  $dst, $src1, sxtw $src2" %}
11063 
11064    ins_encode %{
11065      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11066             as_Register($src2$$reg), ext::sxtw);
11067    %}
11068   ins_pipe(ialu_reg_reg);
11069 %};
11070 
11071 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11072 %{
11073   match(Set dst (SubL src1 (ConvI2L src2)));
11074   ins_cost(INSN_COST);
11075   format %{ "sub  $dst, $src1, sxtw $src2" %}
11076 
11077    ins_encode %{
11078      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11079             as_Register($src2$$reg), ext::sxtw);
11080    %}
11081   ins_pipe(ialu_reg_reg);
11082 %};
11083 
11084 
11085 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11086 %{
11087   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11088   ins_cost(INSN_COST);
11089   format %{ "add  $dst, $src1, sxth $src2" %}
11090 
11091    ins_encode %{
11092      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11093             as_Register($src2$$reg), ext::sxth);
11094    %}
11095   ins_pipe(ialu_reg_reg);
11096 %}
11097 
11098 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11099 %{
11100   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11101   ins_cost(INSN_COST);
11102   format %{ "add  $dst, $src1, sxtb $src2" %}
11103 
11104    ins_encode %{
11105      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11106             as_Register($src2$$reg), ext::sxtb);
11107    %}
11108   ins_pipe(ialu_reg_reg);
11109 %}
11110 
11111 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11112 %{
11113   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11114   ins_cost(INSN_COST);
11115   format %{ "add  $dst, $src1, uxtb $src2" %}
11116 
11117    ins_encode %{
11118      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11119             as_Register($src2$$reg), ext::uxtb);
11120    %}
11121   ins_pipe(ialu_reg_reg);
11122 %}
11123 
11124 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11125 %{
11126   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11127   ins_cost(INSN_COST);
11128   format %{ "add  $dst, $src1, sxth $src2" %}
11129 
11130    ins_encode %{
11131      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11132             as_Register($src2$$reg), ext::sxth);
11133    %}
11134   ins_pipe(ialu_reg_reg);
11135 %}
11136 
11137 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11138 %{
11139   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11140   ins_cost(INSN_COST);
11141   format %{ "add  $dst, $src1, sxtw $src2" %}
11142 
11143    ins_encode %{
11144      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11145             as_Register($src2$$reg), ext::sxtw);
11146    %}
11147   ins_pipe(ialu_reg_reg);
11148 %}
11149 
11150 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11151 %{
11152   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11153   ins_cost(INSN_COST);
11154   format %{ "add  $dst, $src1, sxtb $src2" %}
11155 
11156    ins_encode %{
11157      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11158             as_Register($src2$$reg), ext::sxtb);
11159    %}
11160   ins_pipe(ialu_reg_reg);
11161 %}
11162 
11163 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11164 %{
11165   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11166   ins_cost(INSN_COST);
11167   format %{ "add  $dst, $src1, uxtb $src2" %}
11168 
11169    ins_encode %{
11170      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11171             as_Register($src2$$reg), ext::uxtb);
11172    %}
11173   ins_pipe(ialu_reg_reg);
11174 %}
11175 
11176 
11177 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11178 %{
11179   match(Set dst (AddI src1 (AndI src2 mask)));
11180   ins_cost(INSN_COST);
11181   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11182 
11183    ins_encode %{
11184      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11185             as_Register($src2$$reg), ext::uxtb);
11186    %}
11187   ins_pipe(ialu_reg_reg);
11188 %}
11189 
11190 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11191 %{
11192   match(Set dst (AddI src1 (AndI src2 mask)));
11193   ins_cost(INSN_COST);
11194   format %{ "addw  $dst, $src1, $src2, uxth" %}
11195 
11196    ins_encode %{
11197      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11198             as_Register($src2$$reg), ext::uxth);
11199    %}
11200   ins_pipe(ialu_reg_reg);
11201 %}
11202 
11203 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11204 %{
11205   match(Set dst (AddL src1 (AndL src2 mask)));
11206   ins_cost(INSN_COST);
11207   format %{ "add  $dst, $src1, $src2, uxtb" %}
11208 
11209    ins_encode %{
11210      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11211             as_Register($src2$$reg), ext::uxtb);
11212    %}
11213   ins_pipe(ialu_reg_reg);
11214 %}
11215 
11216 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11217 %{
11218   match(Set dst (AddL src1 (AndL src2 mask)));
11219   ins_cost(INSN_COST);
11220   format %{ "add  $dst, $src1, $src2, uxth" %}
11221 
11222    ins_encode %{
11223      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11224             as_Register($src2$$reg), ext::uxth);
11225    %}
11226   ins_pipe(ialu_reg_reg);
11227 %}
11228 
11229 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11230 %{
11231   match(Set dst (AddL src1 (AndL src2 mask)));
11232   ins_cost(INSN_COST);
11233   format %{ "add  $dst, $src1, $src2, uxtw" %}
11234 
11235    ins_encode %{
11236      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11237             as_Register($src2$$reg), ext::uxtw);
11238    %}
11239   ins_pipe(ialu_reg_reg);
11240 %}
11241 
11242 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11243 %{
11244   match(Set dst (SubI src1 (AndI src2 mask)));
11245   ins_cost(INSN_COST);
11246   format %{ "subw  $dst, $src1, $src2, uxtb" %}
11247 
11248    ins_encode %{
11249      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11250             as_Register($src2$$reg), ext::uxtb);
11251    %}
11252   ins_pipe(ialu_reg_reg);
11253 %}
11254 
11255 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11256 %{
11257   match(Set dst (SubI src1 (AndI src2 mask)));
11258   ins_cost(INSN_COST);
11259   format %{ "subw  $dst, $src1, $src2, uxth" %}
11260 
11261    ins_encode %{
11262      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11263             as_Register($src2$$reg), ext::uxth);
11264    %}
11265   ins_pipe(ialu_reg_reg);
11266 %}
11267 
11268 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11269 %{
11270   match(Set dst (SubL src1 (AndL src2 mask)));
11271   ins_cost(INSN_COST);
11272   format %{ "sub  $dst, $src1, $src2, uxtb" %}
11273 
11274    ins_encode %{
11275      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11276             as_Register($src2$$reg), ext::uxtb);
11277    %}
11278   ins_pipe(ialu_reg_reg);
11279 %}
11280 
11281 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11282 %{
11283   match(Set dst (SubL src1 (AndL src2 mask)));
11284   ins_cost(INSN_COST);
11285   format %{ "sub  $dst, $src1, $src2, uxth" %}
11286 
11287    ins_encode %{
11288      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11289             as_Register($src2$$reg), ext::uxth);
11290    %}
11291   ins_pipe(ialu_reg_reg);
11292 %}
11293 
11294 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11295 %{
11296   match(Set dst (SubL src1 (AndL src2 mask)));
11297   ins_cost(INSN_COST);
11298   format %{ "sub  $dst, $src1, $src2, uxtw" %}
11299 
11300    ins_encode %{
11301      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11302             as_Register($src2$$reg), ext::uxtw);
11303    %}
11304   ins_pipe(ialu_reg_reg);
11305 %}
11306 
11307 // END This section of the file is automatically generated. Do not edit --------------
11308 
11309 // ============================================================================
11310 // Floating Point Arithmetic Instructions
11311 
11312 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11313   match(Set dst (AddF src1 src2));
11314 
11315   ins_cost(INSN_COST * 5);
11316   format %{ "fadds   $dst, $src1, $src2" %}
11317 
11318   ins_encode %{
11319     __ fadds(as_FloatRegister($dst$$reg),
11320              as_FloatRegister($src1$$reg),
11321              as_FloatRegister($src2$$reg));
11322   %}
11323 
11324   ins_pipe(fp_dop_reg_reg_s);
11325 %}
11326 
11327 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11328   match(Set dst (AddD src1 src2));
11329 
11330   ins_cost(INSN_COST * 5);
11331   format %{ "faddd   $dst, $src1, $src2" %}
11332 
11333   ins_encode %{
11334     __ faddd(as_FloatRegister($dst$$reg),
11335              as_FloatRegister($src1$$reg),
11336              as_FloatRegister($src2$$reg));
11337   %}
11338 
11339   ins_pipe(fp_dop_reg_reg_d);
11340 %}
11341 
11342 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11343   match(Set dst (SubF src1 src2));
11344 
11345   ins_cost(INSN_COST * 5);
11346   format %{ "fsubs   $dst, $src1, $src2" %}
11347 
11348   ins_encode %{
11349     __ fsubs(as_FloatRegister($dst$$reg),
11350              as_FloatRegister($src1$$reg),
11351              as_FloatRegister($src2$$reg));
11352   %}
11353 
11354   ins_pipe(fp_dop_reg_reg_s);
11355 %}
11356 
11357 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11358   match(Set dst (SubD src1 src2));
11359 
11360   ins_cost(INSN_COST * 5);
11361   format %{ "fsubd   $dst, $src1, $src2" %}
11362 
11363   ins_encode %{
11364     __ fsubd(as_FloatRegister($dst$$reg),
11365              as_FloatRegister($src1$$reg),
11366              as_FloatRegister($src2$$reg));
11367   %}
11368 
11369   ins_pipe(fp_dop_reg_reg_d);
11370 %}
11371 
11372 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11373   match(Set dst (MulF src1 src2));
11374 
11375   ins_cost(INSN_COST * 6);
11376   format %{ "fmuls   $dst, $src1, $src2" %}
11377 
11378   ins_encode %{
11379     __ fmuls(as_FloatRegister($dst$$reg),
11380              as_FloatRegister($src1$$reg),
11381              as_FloatRegister($src2$$reg));
11382   %}
11383 
11384   ins_pipe(fp_dop_reg_reg_s);
11385 %}
11386 
11387 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11388   match(Set dst (MulD src1 src2));
11389 
11390   ins_cost(INSN_COST * 6);
11391   format %{ "fmuld   $dst, $src1, $src2" %}
11392 
11393   ins_encode %{
11394     __ fmuld(as_FloatRegister($dst$$reg),
11395              as_FloatRegister($src1$$reg),
11396              as_FloatRegister($src2$$reg));
11397   %}
11398 
11399   ins_pipe(fp_dop_reg_reg_d);
11400 %}
11401 
11402 // We cannot use these fused mul w add/sub ops because they don't
11403 // produce the same result as the equivalent separated ops
11404 // (essentially they don't round the intermediate result). that's a
11405 // shame. leaving them here in case we can idenitfy cases where it is
11406 // legitimate to use them
11407 
11408 
11409 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11410 //   match(Set dst (AddF (MulF src1 src2) src3));
11411 
11412 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
11413 
11414 //   ins_encode %{
11415 //     __ fmadds(as_FloatRegister($dst$$reg),
11416 //              as_FloatRegister($src1$$reg),
11417 //              as_FloatRegister($src2$$reg),
11418 //              as_FloatRegister($src3$$reg));
11419 //   %}
11420 
11421 //   ins_pipe(pipe_class_default);
11422 // %}
11423 
11424 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11425 //   match(Set dst (AddD (MulD src1 src2) src3));
11426 
11427 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
11428 
11429 //   ins_encode %{
11430 //     __ fmaddd(as_FloatRegister($dst$$reg),
11431 //              as_FloatRegister($src1$$reg),
11432 //              as_FloatRegister($src2$$reg),
11433 //              as_FloatRegister($src3$$reg));
11434 //   %}
11435 
11436 //   ins_pipe(pipe_class_default);
11437 // %}
11438 
11439 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11440 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
11441 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
11442 
11443 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
11444 
11445 //   ins_encode %{
11446 //     __ fmsubs(as_FloatRegister($dst$$reg),
11447 //               as_FloatRegister($src1$$reg),
11448 //               as_FloatRegister($src2$$reg),
11449 //              as_FloatRegister($src3$$reg));
11450 //   %}
11451 
11452 //   ins_pipe(pipe_class_default);
11453 // %}
11454 
11455 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11456 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
11457 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
11458 
11459 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
11460 
11461 //   ins_encode %{
11462 //     __ fmsubd(as_FloatRegister($dst$$reg),
11463 //               as_FloatRegister($src1$$reg),
11464 //               as_FloatRegister($src2$$reg),
11465 //               as_FloatRegister($src3$$reg));
11466 //   %}
11467 
11468 //   ins_pipe(pipe_class_default);
11469 // %}
11470 
11471 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11472 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
11473 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
11474 
11475 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
11476 
11477 //   ins_encode %{
11478 //     __ fnmadds(as_FloatRegister($dst$$reg),
11479 //                as_FloatRegister($src1$$reg),
11480 //                as_FloatRegister($src2$$reg),
11481 //                as_FloatRegister($src3$$reg));
11482 //   %}
11483 
11484 //   ins_pipe(pipe_class_default);
11485 // %}
11486 
11487 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11488 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
11489 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
11490 
11491 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
11492 
11493 //   ins_encode %{
11494 //     __ fnmaddd(as_FloatRegister($dst$$reg),
11495 //                as_FloatRegister($src1$$reg),
11496 //                as_FloatRegister($src2$$reg),
11497 //                as_FloatRegister($src3$$reg));
11498 //   %}
11499 
11500 //   ins_pipe(pipe_class_default);
11501 // %}
11502 
11503 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
11504 //   match(Set dst (SubF (MulF src1 src2) src3));
11505 
11506 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
11507 
11508 //   ins_encode %{
11509 //     __ fnmsubs(as_FloatRegister($dst$$reg),
11510 //                as_FloatRegister($src1$$reg),
11511 //                as_FloatRegister($src2$$reg),
11512 //                as_FloatRegister($src3$$reg));
11513 //   %}
11514 
11515 //   ins_pipe(pipe_class_default);
11516 // %}
11517 
11518 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
11519 //   match(Set dst (SubD (MulD src1 src2) src3));
11520 
11521 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
11522 
11523 //   ins_encode %{
11524 //   // n.b. insn name should be fnmsubd
11525 //     __ fnmsub(as_FloatRegister($dst$$reg),
11526 //                as_FloatRegister($src1$$reg),
11527 //                as_FloatRegister($src2$$reg),
11528 //                as_FloatRegister($src3$$reg));
11529 //   %}
11530 
11531 //   ins_pipe(pipe_class_default);
11532 // %}
11533 
11534 
11535 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11536   match(Set dst (DivF src1  src2));
11537 
11538   ins_cost(INSN_COST * 18);
11539   format %{ "fdivs   $dst, $src1, $src2" %}
11540 
11541   ins_encode %{
11542     __ fdivs(as_FloatRegister($dst$$reg),
11543              as_FloatRegister($src1$$reg),
11544              as_FloatRegister($src2$$reg));
11545   %}
11546 
11547   ins_pipe(fp_div_s);
11548 %}
11549 
11550 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11551   match(Set dst (DivD src1  src2));
11552 
11553   ins_cost(INSN_COST * 32);
11554   format %{ "fdivd   $dst, $src1, $src2" %}
11555 
11556   ins_encode %{
11557     __ fdivd(as_FloatRegister($dst$$reg),
11558              as_FloatRegister($src1$$reg),
11559              as_FloatRegister($src2$$reg));
11560   %}
11561 
11562   ins_pipe(fp_div_d);
11563 %}
11564 
11565 instruct negF_reg_reg(vRegF dst, vRegF src) %{
11566   match(Set dst (NegF src));
11567 
11568   ins_cost(INSN_COST * 3);
11569   format %{ "fneg   $dst, $src" %}
11570 
11571   ins_encode %{
11572     __ fnegs(as_FloatRegister($dst$$reg),
11573              as_FloatRegister($src$$reg));
11574   %}
11575 
11576   ins_pipe(fp_uop_s);
11577 %}
11578 
11579 instruct negD_reg_reg(vRegD dst, vRegD src) %{
11580   match(Set dst (NegD src));
11581 
11582   ins_cost(INSN_COST * 3);
11583   format %{ "fnegd   $dst, $src" %}
11584 
11585   ins_encode %{
11586     __ fnegd(as_FloatRegister($dst$$reg),
11587              as_FloatRegister($src$$reg));
11588   %}
11589 
11590   ins_pipe(fp_uop_d);
11591 %}
11592 
11593 instruct absF_reg(vRegF dst, vRegF src) %{
11594   match(Set dst (AbsF src));
11595 
11596   ins_cost(INSN_COST * 3);
11597   format %{ "fabss   $dst, $src" %}
11598   ins_encode %{
11599     __ fabss(as_FloatRegister($dst$$reg),
11600              as_FloatRegister($src$$reg));
11601   %}
11602 
11603   ins_pipe(fp_uop_s);
11604 %}
11605 
11606 instruct absD_reg(vRegD dst, vRegD src) %{
11607   match(Set dst (AbsD src));
11608 
11609   ins_cost(INSN_COST * 3);
11610   format %{ "fabsd   $dst, $src" %}
11611   ins_encode %{
11612     __ fabsd(as_FloatRegister($dst$$reg),
11613              as_FloatRegister($src$$reg));
11614   %}
11615 
11616   ins_pipe(fp_uop_d);
11617 %}
11618 
11619 instruct sqrtD_reg(vRegD dst, vRegD src) %{
11620   match(Set dst (SqrtD src));
11621 
11622   ins_cost(INSN_COST * 50);
11623   format %{ "fsqrtd  $dst, $src" %}
11624   ins_encode %{
11625     __ fsqrtd(as_FloatRegister($dst$$reg),
11626              as_FloatRegister($src$$reg));
11627   %}
11628 
11629   ins_pipe(fp_div_s);
11630 %}
11631 
11632 instruct sqrtF_reg(vRegF dst, vRegF src) %{
11633   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11634 
11635   ins_cost(INSN_COST * 50);
11636   format %{ "fsqrts  $dst, $src" %}
11637   ins_encode %{
11638     __ fsqrts(as_FloatRegister($dst$$reg),
11639              as_FloatRegister($src$$reg));
11640   %}
11641 
11642   ins_pipe(fp_div_d);
11643 %}
11644 
11645 // ============================================================================
11646 // Logical Instructions
11647 
11648 // Integer Logical Instructions
11649 
11650 // And Instructions
11651 
11652 
11653 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
11654   match(Set dst (AndI src1 src2));
11655 
11656   format %{ "andw  $dst, $src1, $src2\t# int" %}
11657 
11658   ins_cost(INSN_COST);
11659   ins_encode %{
11660     __ andw(as_Register($dst$$reg),
11661             as_Register($src1$$reg),
11662             as_Register($src2$$reg));
11663   %}
11664 
11665   ins_pipe(ialu_reg_reg);
11666 %}
11667 
11668 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
11669   match(Set dst (AndI src1 src2));
11670 
11671   format %{ "andsw  $dst, $src1, $src2\t# int" %}
11672 
11673   ins_cost(INSN_COST);
11674   ins_encode %{
11675     __ andw(as_Register($dst$$reg),
11676             as_Register($src1$$reg),
11677             (unsigned long)($src2$$constant));
11678   %}
11679 
11680   ins_pipe(ialu_reg_imm);
11681 %}
11682 
11683 // Or Instructions
11684 
11685 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11686   match(Set dst (OrI src1 src2));
11687 
11688   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11689 
11690   ins_cost(INSN_COST);
11691   ins_encode %{
11692     __ orrw(as_Register($dst$$reg),
11693             as_Register($src1$$reg),
11694             as_Register($src2$$reg));
11695   %}
11696 
11697   ins_pipe(ialu_reg_reg);
11698 %}
11699 
11700 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11701   match(Set dst (OrI src1 src2));
11702 
11703   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11704 
11705   ins_cost(INSN_COST);
11706   ins_encode %{
11707     __ orrw(as_Register($dst$$reg),
11708             as_Register($src1$$reg),
11709             (unsigned long)($src2$$constant));
11710   %}
11711 
11712   ins_pipe(ialu_reg_imm);
11713 %}
11714 
11715 // Xor Instructions
11716 
11717 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11718   match(Set dst (XorI src1 src2));
11719 
11720   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11721 
11722   ins_cost(INSN_COST);
11723   ins_encode %{
11724     __ eorw(as_Register($dst$$reg),
11725             as_Register($src1$$reg),
11726             as_Register($src2$$reg));
11727   %}
11728 
11729   ins_pipe(ialu_reg_reg);
11730 %}
11731 
11732 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11733   match(Set dst (XorI src1 src2));
11734 
11735   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11736 
11737   ins_cost(INSN_COST);
11738   ins_encode %{
11739     __ eorw(as_Register($dst$$reg),
11740             as_Register($src1$$reg),
11741             (unsigned long)($src2$$constant));
11742   %}
11743 
11744   ins_pipe(ialu_reg_imm);
11745 %}
11746 
11747 // Long Logical Instructions
11748 // TODO
11749 
11750 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
11751   match(Set dst (AndL src1 src2));
11752 
11753   format %{ "and  $dst, $src1, $src2\t# int" %}
11754 
11755   ins_cost(INSN_COST);
11756   ins_encode %{
11757     __ andr(as_Register($dst$$reg),
11758             as_Register($src1$$reg),
11759             as_Register($src2$$reg));
11760   %}
11761 
11762   ins_pipe(ialu_reg_reg);
11763 %}
11764 
11765 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
11766   match(Set dst (AndL src1 src2));
11767 
11768   format %{ "and  $dst, $src1, $src2\t# int" %}
11769 
11770   ins_cost(INSN_COST);
11771   ins_encode %{
11772     __ andr(as_Register($dst$$reg),
11773             as_Register($src1$$reg),
11774             (unsigned long)($src2$$constant));
11775   %}
11776 
11777   ins_pipe(ialu_reg_imm);
11778 %}
11779 
11780 // Or Instructions
11781 
11782 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11783   match(Set dst (OrL src1 src2));
11784 
11785   format %{ "orr  $dst, $src1, $src2\t# int" %}
11786 
11787   ins_cost(INSN_COST);
11788   ins_encode %{
11789     __ orr(as_Register($dst$$reg),
11790            as_Register($src1$$reg),
11791            as_Register($src2$$reg));
11792   %}
11793 
11794   ins_pipe(ialu_reg_reg);
11795 %}
11796 
11797 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
11798   match(Set dst (OrL src1 src2));
11799 
11800   format %{ "orr  $dst, $src1, $src2\t# int" %}
11801 
11802   ins_cost(INSN_COST);
11803   ins_encode %{
11804     __ orr(as_Register($dst$$reg),
11805            as_Register($src1$$reg),
11806            (unsigned long)($src2$$constant));
11807   %}
11808 
11809   ins_pipe(ialu_reg_imm);
11810 %}
11811 
11812 // Xor Instructions
11813 
11814 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11815   match(Set dst (XorL src1 src2));
11816 
11817   format %{ "eor  $dst, $src1, $src2\t# int" %}
11818 
11819   ins_cost(INSN_COST);
11820   ins_encode %{
11821     __ eor(as_Register($dst$$reg),
11822            as_Register($src1$$reg),
11823            as_Register($src2$$reg));
11824   %}
11825 
11826   ins_pipe(ialu_reg_reg);
11827 %}
11828 
11829 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
11830   match(Set dst (XorL src1 src2));
11831 
11832   ins_cost(INSN_COST);
11833   format %{ "eor  $dst, $src1, $src2\t# int" %}
11834 
11835   ins_encode %{
11836     __ eor(as_Register($dst$$reg),
11837            as_Register($src1$$reg),
11838            (unsigned long)($src2$$constant));
11839   %}
11840 
11841   ins_pipe(ialu_reg_imm);
11842 %}
11843 
11844 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
11845 %{
11846   match(Set dst (ConvI2L src));
11847 
11848   ins_cost(INSN_COST);
11849   format %{ "sxtw  $dst, $src\t# i2l" %}
11850   ins_encode %{
11851     __ sbfm($dst$$Register, $src$$Register, 0, 31);
11852   %}
11853   ins_pipe(ialu_reg_shift);
11854 %}
11855 
11856 // this pattern occurs in bigmath arithmetic
11857 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
11858 %{
11859   match(Set dst (AndL (ConvI2L src) mask));
11860 
11861   ins_cost(INSN_COST);
11862   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
11863   ins_encode %{
11864     __ ubfm($dst$$Register, $src$$Register, 0, 31);
11865   %}
11866 
11867   ins_pipe(ialu_reg_shift);
11868 %}
11869 
11870 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
11871   match(Set dst (ConvL2I src));
11872 
11873   ins_cost(INSN_COST);
11874   format %{ "movw  $dst, $src \t// l2i" %}
11875 
11876   ins_encode %{
11877     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
11878   %}
11879 
11880   ins_pipe(ialu_reg);
11881 %}
11882 
11883 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
11884 %{
11885   match(Set dst (Conv2B src));
11886   effect(KILL cr);
11887 
11888   format %{
11889     "cmpw $src, zr\n\t"
11890     "cset $dst, ne"
11891   %}
11892 
11893   ins_encode %{
11894     __ cmpw(as_Register($src$$reg), zr);
11895     __ cset(as_Register($dst$$reg), Assembler::NE);
11896   %}
11897 
11898   ins_pipe(ialu_reg);
11899 %}
11900 
11901 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
11902 %{
11903   match(Set dst (Conv2B src));
11904   effect(KILL cr);
11905 
11906   format %{
11907     "cmp  $src, zr\n\t"
11908     "cset $dst, ne"
11909   %}
11910 
11911   ins_encode %{
11912     __ cmp(as_Register($src$$reg), zr);
11913     __ cset(as_Register($dst$$reg), Assembler::NE);
11914   %}
11915 
11916   ins_pipe(ialu_reg);
11917 %}
11918 
11919 instruct convD2F_reg(vRegF dst, vRegD src) %{
11920   match(Set dst (ConvD2F src));
11921 
11922   ins_cost(INSN_COST * 5);
11923   format %{ "fcvtd  $dst, $src \t// d2f" %}
11924 
11925   ins_encode %{
11926     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
11927   %}
11928 
11929   ins_pipe(fp_d2f);
11930 %}
11931 
11932 instruct convF2D_reg(vRegD dst, vRegF src) %{
11933   match(Set dst (ConvF2D src));
11934 
11935   ins_cost(INSN_COST * 5);
11936   format %{ "fcvts  $dst, $src \t// f2d" %}
11937 
11938   ins_encode %{
11939     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
11940   %}
11941 
11942   ins_pipe(fp_f2d);
11943 %}
11944 
11945 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
11946   match(Set dst (ConvF2I src));
11947 
11948   ins_cost(INSN_COST * 5);
11949   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
11950 
11951   ins_encode %{
11952     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
11953   %}
11954 
11955   ins_pipe(fp_f2i);
11956 %}
11957 
11958 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
11959   match(Set dst (ConvF2L src));
11960 
11961   ins_cost(INSN_COST * 5);
11962   format %{ "fcvtzs  $dst, $src \t// f2l" %}
11963 
11964   ins_encode %{
11965     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
11966   %}
11967 
11968   ins_pipe(fp_f2l);
11969 %}
11970 
11971 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
11972   match(Set dst (ConvI2F src));
11973 
11974   ins_cost(INSN_COST * 5);
11975   format %{ "scvtfws  $dst, $src \t// i2f" %}
11976 
11977   ins_encode %{
11978     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
11979   %}
11980 
11981   ins_pipe(fp_i2f);
11982 %}
11983 
11984 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
11985   match(Set dst (ConvL2F src));
11986 
11987   ins_cost(INSN_COST * 5);
11988   format %{ "scvtfs  $dst, $src \t// l2f" %}
11989 
11990   ins_encode %{
11991     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
11992   %}
11993 
11994   ins_pipe(fp_l2f);
11995 %}
11996 
11997 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
11998   match(Set dst (ConvD2I src));
11999 
12000   ins_cost(INSN_COST * 5);
12001   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
12002 
12003   ins_encode %{
12004     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12005   %}
12006 
12007   ins_pipe(fp_d2i);
12008 %}
12009 
12010 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12011   match(Set dst (ConvD2L src));
12012 
12013   ins_cost(INSN_COST * 5);
12014   format %{ "fcvtzd  $dst, $src \t// d2l" %}
12015 
12016   ins_encode %{
12017     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12018   %}
12019 
12020   ins_pipe(fp_d2l);
12021 %}
12022 
12023 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
12024   match(Set dst (ConvI2D src));
12025 
12026   ins_cost(INSN_COST * 5);
12027   format %{ "scvtfwd  $dst, $src \t// i2d" %}
12028 
12029   ins_encode %{
12030     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12031   %}
12032 
12033   ins_pipe(fp_i2d);
12034 %}
12035 
12036 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
12037   match(Set dst (ConvL2D src));
12038 
12039   ins_cost(INSN_COST * 5);
12040   format %{ "scvtfd  $dst, $src \t// l2d" %}
12041 
12042   ins_encode %{
12043     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12044   %}
12045 
12046   ins_pipe(fp_l2d);
12047 %}
12048 
12049 // stack <-> reg and reg <-> reg shuffles with no conversion
12050 
12051 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
12052 
12053   match(Set dst (MoveF2I src));
12054 
12055   effect(DEF dst, USE src);
12056 
12057   ins_cost(4 * INSN_COST);
12058 
12059   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
12060 
12061   ins_encode %{
12062     __ ldrw($dst$$Register, Address(sp, $src$$disp));
12063   %}
12064 
12065   ins_pipe(iload_reg_reg);
12066 
12067 %}
12068 
12069 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
12070 
12071   match(Set dst (MoveI2F src));
12072 
12073   effect(DEF dst, USE src);
12074 
12075   ins_cost(4 * INSN_COST);
12076 
12077   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
12078 
12079   ins_encode %{
12080     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12081   %}
12082 
12083   ins_pipe(pipe_class_memory);
12084 
12085 %}
12086 
12087 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
12088 
12089   match(Set dst (MoveD2L src));
12090 
12091   effect(DEF dst, USE src);
12092 
12093   ins_cost(4 * INSN_COST);
12094 
12095   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
12096 
12097   ins_encode %{
12098     __ ldr($dst$$Register, Address(sp, $src$$disp));
12099   %}
12100 
12101   ins_pipe(iload_reg_reg);
12102 
12103 %}
12104 
12105 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
12106 
12107   match(Set dst (MoveL2D src));
12108 
12109   effect(DEF dst, USE src);
12110 
12111   ins_cost(4 * INSN_COST);
12112 
12113   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
12114 
12115   ins_encode %{
12116     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12117   %}
12118 
12119   ins_pipe(pipe_class_memory);
12120 
12121 %}
12122 
12123 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
12124 
12125   match(Set dst (MoveF2I src));
12126 
12127   effect(DEF dst, USE src);
12128 
12129   ins_cost(INSN_COST);
12130 
12131   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
12132 
12133   ins_encode %{
12134     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12135   %}
12136 
12137   ins_pipe(pipe_class_memory);
12138 
12139 %}
12140 
12141 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
12142 
12143   match(Set dst (MoveI2F src));
12144 
12145   effect(DEF dst, USE src);
12146 
12147   ins_cost(INSN_COST);
12148 
12149   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
12150 
12151   ins_encode %{
12152     __ strw($src$$Register, Address(sp, $dst$$disp));
12153   %}
12154 
12155   ins_pipe(istore_reg_reg);
12156 
12157 %}
12158 
12159 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
12160 
12161   match(Set dst (MoveD2L src));
12162 
12163   effect(DEF dst, USE src);
12164 
12165   ins_cost(INSN_COST);
12166 
12167   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
12168 
12169   ins_encode %{
12170     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12171   %}
12172 
12173   ins_pipe(pipe_class_memory);
12174 
12175 %}
12176 
12177 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
12178 
12179   match(Set dst (MoveL2D src));
12180 
12181   effect(DEF dst, USE src);
12182 
12183   ins_cost(INSN_COST);
12184 
12185   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
12186 
12187   ins_encode %{
12188     __ str($src$$Register, Address(sp, $dst$$disp));
12189   %}
12190 
12191   ins_pipe(istore_reg_reg);
12192 
12193 %}
12194 
12195 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12196 
12197   match(Set dst (MoveF2I src));
12198 
12199   effect(DEF dst, USE src);
12200 
12201   ins_cost(INSN_COST);
12202 
12203   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
12204 
12205   ins_encode %{
12206     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
12207   %}
12208 
12209   ins_pipe(fp_f2i);
12210 
12211 %}
12212 
12213 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
12214 
12215   match(Set dst (MoveI2F src));
12216 
12217   effect(DEF dst, USE src);
12218 
12219   ins_cost(INSN_COST);
12220 
12221   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
12222 
12223   ins_encode %{
12224     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
12225   %}
12226 
12227   ins_pipe(fp_i2f);
12228 
12229 %}
12230 
12231 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12232 
12233   match(Set dst (MoveD2L src));
12234 
12235   effect(DEF dst, USE src);
12236 
12237   ins_cost(INSN_COST);
12238 
12239   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
12240 
12241   ins_encode %{
12242     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
12243   %}
12244 
12245   ins_pipe(fp_d2l);
12246 
12247 %}
12248 
12249 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
12250 
12251   match(Set dst (MoveL2D src));
12252 
12253   effect(DEF dst, USE src);
12254 
12255   ins_cost(INSN_COST);
12256 
12257   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
12258 
12259   ins_encode %{
12260     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
12261   %}
12262 
12263   ins_pipe(fp_l2d);
12264 
12265 %}
12266 
12267 // ============================================================================
12268 // clearing of an array
12269 
12270 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
12271 %{
12272   match(Set dummy (ClearArray cnt base));
12273   effect(USE_KILL cnt, USE_KILL base, KILL cr);
12274 
12275   ins_cost(4 * INSN_COST);
12276   format %{ "ClearArray $cnt, $base" %}
12277 
12278   ins_encode %{
12279     __ zero_words($base$$Register, $cnt$$Register);
12280   %}
12281 
12282   ins_pipe(pipe_class_memory);
12283 %}
12284 
12285 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 tmp, Universe dummy, rFlagsReg cr)
12286 %{
12287   match(Set dummy (ClearArray cnt base));
12288   effect(USE_KILL base, TEMP tmp, KILL cr);
12289 
12290   ins_cost(4 * INSN_COST);
12291   format %{ "ClearArray $cnt, $base" %}
12292 
12293   ins_encode %{
12294     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
12295   %}
12296 
12297   ins_pipe(pipe_class_memory);
12298 %}
12299 
12300 // ============================================================================
12301 // Overflow Math Instructions
12302 
12303 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12304 %{
12305   match(Set cr (OverflowAddI op1 op2));
12306 
12307   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12308   ins_cost(INSN_COST);
12309   ins_encode %{
12310     __ cmnw($op1$$Register, $op2$$Register);
12311   %}
12312 
12313   ins_pipe(icmp_reg_reg);
12314 %}
12315 
12316 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12317 %{
12318   match(Set cr (OverflowAddI op1 op2));
12319 
12320   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12321   ins_cost(INSN_COST);
12322   ins_encode %{
12323     __ cmnw($op1$$Register, $op2$$constant);
12324   %}
12325 
12326   ins_pipe(icmp_reg_imm);
12327 %}
12328 
12329 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12330 %{
12331   match(Set cr (OverflowAddL op1 op2));
12332 
12333   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12334   ins_cost(INSN_COST);
12335   ins_encode %{
12336     __ cmn($op1$$Register, $op2$$Register);
12337   %}
12338 
12339   ins_pipe(icmp_reg_reg);
12340 %}
12341 
12342 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12343 %{
12344   match(Set cr (OverflowAddL op1 op2));
12345 
12346   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12347   ins_cost(INSN_COST);
12348   ins_encode %{
12349     __ cmn($op1$$Register, $op2$$constant);
12350   %}
12351 
12352   ins_pipe(icmp_reg_imm);
12353 %}
12354 
12355 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12356 %{
12357   match(Set cr (OverflowSubI op1 op2));
12358 
12359   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12360   ins_cost(INSN_COST);
12361   ins_encode %{
12362     __ cmpw($op1$$Register, $op2$$Register);
12363   %}
12364 
12365   ins_pipe(icmp_reg_reg);
12366 %}
12367 
12368 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12369 %{
12370   match(Set cr (OverflowSubI op1 op2));
12371 
12372   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12373   ins_cost(INSN_COST);
12374   ins_encode %{
12375     __ cmpw($op1$$Register, $op2$$constant);
12376   %}
12377 
12378   ins_pipe(icmp_reg_imm);
12379 %}
12380 
12381 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12382 %{
12383   match(Set cr (OverflowSubL op1 op2));
12384 
12385   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12386   ins_cost(INSN_COST);
12387   ins_encode %{
12388     __ cmp($op1$$Register, $op2$$Register);
12389   %}
12390 
12391   ins_pipe(icmp_reg_reg);
12392 %}
12393 
12394 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12395 %{
12396   match(Set cr (OverflowSubL op1 op2));
12397 
12398   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12399   ins_cost(INSN_COST);
12400   ins_encode %{
12401     __ cmp($op1$$Register, $op2$$constant);
12402   %}
12403 
12404   ins_pipe(icmp_reg_imm);
12405 %}
12406 
12407 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
12408 %{
12409   match(Set cr (OverflowSubI zero op1));
12410 
12411   format %{ "cmpw  zr, $op1\t# overflow check int" %}
12412   ins_cost(INSN_COST);
12413   ins_encode %{
12414     __ cmpw(zr, $op1$$Register);
12415   %}
12416 
12417   ins_pipe(icmp_reg_imm);
12418 %}
12419 
12420 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
12421 %{
12422   match(Set cr (OverflowSubL zero op1));
12423 
12424   format %{ "cmp   zr, $op1\t# overflow check long" %}
12425   ins_cost(INSN_COST);
12426   ins_encode %{
12427     __ cmp(zr, $op1$$Register);
12428   %}
12429 
12430   ins_pipe(icmp_reg_imm);
12431 %}
12432 
12433 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12434 %{
12435   match(Set cr (OverflowMulI op1 op2));
12436 
12437   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12438             "cmp   rscratch1, rscratch1, sxtw\n\t"
12439             "movw  rscratch1, #0x80000000\n\t"
12440             "cselw rscratch1, rscratch1, zr, NE\n\t"
12441             "cmpw  rscratch1, #1" %}
12442   ins_cost(5 * INSN_COST);
12443   ins_encode %{
12444     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12445     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12446     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12447     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12448     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12449   %}
12450 
12451   ins_pipe(pipe_slow);
12452 %}
12453 
12454 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
12455 %{
12456   match(If cmp (OverflowMulI op1 op2));
12457   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12458             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12459   effect(USE labl, KILL cr);
12460 
12461   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12462             "cmp   rscratch1, rscratch1, sxtw\n\t"
12463             "b$cmp   $labl" %}
12464   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
12465   ins_encode %{
12466     Label* L = $labl$$label;
12467     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12468     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12469     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12470     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12471   %}
12472 
12473   ins_pipe(pipe_serial);
12474 %}
12475 
12476 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12477 %{
12478   match(Set cr (OverflowMulL op1 op2));
12479 
12480   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12481             "smulh rscratch2, $op1, $op2\n\t"
12482             "cmp   rscratch2, rscratch1, ASR #63\n\t"
12483             "movw  rscratch1, #0x80000000\n\t"
12484             "cselw rscratch1, rscratch1, zr, NE\n\t"
12485             "cmpw  rscratch1, #1" %}
12486   ins_cost(6 * INSN_COST);
12487   ins_encode %{
12488     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12489     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12490     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
12491     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12492     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12493     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12494   %}
12495 
12496   ins_pipe(pipe_slow);
12497 %}
12498 
12499 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
12500 %{
12501   match(If cmp (OverflowMulL op1 op2));
12502   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12503             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12504   effect(USE labl, KILL cr);
12505 
12506   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12507             "smulh rscratch2, $op1, $op2\n\t"
12508             "cmp   rscratch2, rscratch1, ASR #63\n\t"
12509             "b$cmp $labl" %}
12510   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
12511   ins_encode %{
12512     Label* L = $labl$$label;
12513     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12514     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12515     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12516     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
12517     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12518   %}
12519 
12520   ins_pipe(pipe_serial);
12521 %}
12522 
12523 // ============================================================================
12524 // Compare Instructions
12525 
12526 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
12527 %{
12528   match(Set cr (CmpI op1 op2));
12529 
12530   effect(DEF cr, USE op1, USE op2);
12531 
12532   ins_cost(INSN_COST);
12533   format %{ "cmpw  $op1, $op2" %}
12534 
12535   ins_encode(aarch64_enc_cmpw(op1, op2));
12536 
12537   ins_pipe(icmp_reg_reg);
12538 %}
12539 
12540 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
12541 %{
12542   match(Set cr (CmpI op1 zero));
12543 
12544   effect(DEF cr, USE op1);
12545 
12546   ins_cost(INSN_COST);
12547   format %{ "cmpw $op1, 0" %}
12548 
12549   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12550 
12551   ins_pipe(icmp_reg_imm);
12552 %}
12553 
12554 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
12555 %{
12556   match(Set cr (CmpI op1 op2));
12557 
12558   effect(DEF cr, USE op1);
12559 
12560   ins_cost(INSN_COST);
12561   format %{ "cmpw  $op1, $op2" %}
12562 
12563   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12564 
12565   ins_pipe(icmp_reg_imm);
12566 %}
12567 
12568 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
12569 %{
12570   match(Set cr (CmpI op1 op2));
12571 
12572   effect(DEF cr, USE op1);
12573 
12574   ins_cost(INSN_COST * 2);
12575   format %{ "cmpw  $op1, $op2" %}
12576 
12577   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12578 
12579   ins_pipe(icmp_reg_imm);
12580 %}
12581 
12582 // Unsigned compare Instructions; really, same as signed compare
12583 // except it should only be used to feed an If or a CMovI which takes a
12584 // cmpOpU.
12585 
12586 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
12587 %{
12588   match(Set cr (CmpU op1 op2));
12589 
12590   effect(DEF cr, USE op1, USE op2);
12591 
12592   ins_cost(INSN_COST);
12593   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12594 
12595   ins_encode(aarch64_enc_cmpw(op1, op2));
12596 
12597   ins_pipe(icmp_reg_reg);
12598 %}
12599 
12600 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
12601 %{
12602   match(Set cr (CmpU op1 zero));
12603 
12604   effect(DEF cr, USE op1);
12605 
12606   ins_cost(INSN_COST);
12607   format %{ "cmpw $op1, #0\t# unsigned" %}
12608 
12609   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12610 
12611   ins_pipe(icmp_reg_imm);
12612 %}
12613 
12614 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
12615 %{
12616   match(Set cr (CmpU op1 op2));
12617 
12618   effect(DEF cr, USE op1);
12619 
12620   ins_cost(INSN_COST);
12621   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12622 
12623   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12624 
12625   ins_pipe(icmp_reg_imm);
12626 %}
12627 
12628 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
12629 %{
12630   match(Set cr (CmpU op1 op2));
12631 
12632   effect(DEF cr, USE op1);
12633 
12634   ins_cost(INSN_COST * 2);
12635   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12636 
12637   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12638 
12639   ins_pipe(icmp_reg_imm);
12640 %}
12641 
12642 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12643 %{
12644   match(Set cr (CmpL op1 op2));
12645 
12646   effect(DEF cr, USE op1, USE op2);
12647 
12648   ins_cost(INSN_COST);
12649   format %{ "cmp  $op1, $op2" %}
12650 
12651   ins_encode(aarch64_enc_cmp(op1, op2));
12652 
12653   ins_pipe(icmp_reg_reg);
12654 %}
12655 
12656 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
12657 %{
12658   match(Set cr (CmpL op1 zero));
12659 
12660   effect(DEF cr, USE op1);
12661 
12662   ins_cost(INSN_COST);
12663   format %{ "tst  $op1" %}
12664 
12665   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
12666 
12667   ins_pipe(icmp_reg_imm);
12668 %}
12669 
12670 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
12671 %{
12672   match(Set cr (CmpL op1 op2));
12673 
12674   effect(DEF cr, USE op1);
12675 
12676   ins_cost(INSN_COST);
12677   format %{ "cmp  $op1, $op2" %}
12678 
12679   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
12680 
12681   ins_pipe(icmp_reg_imm);
12682 %}
12683 
12684 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
12685 %{
12686   match(Set cr (CmpL op1 op2));
12687 
12688   effect(DEF cr, USE op1);
12689 
12690   ins_cost(INSN_COST * 2);
12691   format %{ "cmp  $op1, $op2" %}
12692 
12693   ins_encode(aarch64_enc_cmp_imm(op1, op2));
12694 
12695   ins_pipe(icmp_reg_imm);
12696 %}
12697 
12698 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
12699 %{
12700   match(Set cr (CmpUL op1 op2));
12701 
12702   effect(DEF cr, USE op1, USE op2);
12703 
12704   ins_cost(INSN_COST);
12705   format %{ "cmp  $op1, $op2" %}
12706 
12707   ins_encode(aarch64_enc_cmp(op1, op2));
12708 
12709   ins_pipe(icmp_reg_reg);
12710 %}
12711 
12712 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
12713 %{
12714   match(Set cr (CmpUL op1 zero));
12715 
12716   effect(DEF cr, USE op1);
12717 
12718   ins_cost(INSN_COST);
12719   format %{ "tst  $op1" %}
12720 
12721   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
12722 
12723   ins_pipe(icmp_reg_imm);
12724 %}
12725 
12726 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
12727 %{
12728   match(Set cr (CmpUL op1 op2));
12729 
12730   effect(DEF cr, USE op1);
12731 
12732   ins_cost(INSN_COST);
12733   format %{ "cmp  $op1, $op2" %}
12734 
12735   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
12736 
12737   ins_pipe(icmp_reg_imm);
12738 %}
12739 
12740 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
12741 %{
12742   match(Set cr (CmpUL op1 op2));
12743 
12744   effect(DEF cr, USE op1);
12745 
12746   ins_cost(INSN_COST * 2);
12747   format %{ "cmp  $op1, $op2" %}
12748 
12749   ins_encode(aarch64_enc_cmp_imm(op1, op2));
12750 
12751   ins_pipe(icmp_reg_imm);
12752 %}
12753 
12754 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
12755 %{
12756   match(Set cr (CmpP op1 op2));
12757 
12758   effect(DEF cr, USE op1, USE op2);
12759 
12760   ins_cost(INSN_COST);
12761   format %{ "cmp  $op1, $op2\t // ptr" %}
12762 
12763   ins_encode(aarch64_enc_cmpp(op1, op2));
12764 
12765   ins_pipe(icmp_reg_reg);
12766 %}
12767 
12768 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
12769 %{
12770   match(Set cr (CmpN op1 op2));
12771 
12772   effect(DEF cr, USE op1, USE op2);
12773 
12774   ins_cost(INSN_COST);
12775   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
12776 
12777   ins_encode(aarch64_enc_cmpn(op1, op2));
12778 
12779   ins_pipe(icmp_reg_reg);
12780 %}
12781 
12782 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
12783 %{
12784   match(Set cr (CmpP op1 zero));
12785 
12786   effect(DEF cr, USE op1, USE zero);
12787 
12788   ins_cost(INSN_COST);
12789   format %{ "cmp  $op1, 0\t // ptr" %}
12790 
12791   ins_encode(aarch64_enc_testp(op1));
12792 
12793   ins_pipe(icmp_reg_imm);
12794 %}
12795 
12796 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
12797 %{
12798   match(Set cr (CmpN op1 zero));
12799 
12800   effect(DEF cr, USE op1, USE zero);
12801 
12802   ins_cost(INSN_COST);
12803   format %{ "cmp  $op1, 0\t // compressed ptr" %}
12804 
12805   ins_encode(aarch64_enc_testn(op1));
12806 
12807   ins_pipe(icmp_reg_imm);
12808 %}
12809 
12810 // FP comparisons
12811 //
12812 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
12813 // using normal cmpOp. See declaration of rFlagsReg for details.
12814 
12815 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
12816 %{
12817   match(Set cr (CmpF src1 src2));
12818 
12819   ins_cost(3 * INSN_COST);
12820   format %{ "fcmps $src1, $src2" %}
12821 
12822   ins_encode %{
12823     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12824   %}
12825 
12826   ins_pipe(pipe_class_compare);
12827 %}
12828 
12829 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
12830 %{
12831   match(Set cr (CmpF src1 src2));
12832 
12833   ins_cost(3 * INSN_COST);
12834   format %{ "fcmps $src1, 0.0" %}
12835 
12836   ins_encode %{
12837     __ fcmps(as_FloatRegister($src1$$reg), 0.0);
12838   %}
12839 
12840   ins_pipe(pipe_class_compare);
12841 %}
12842 // FROM HERE
12843 
12844 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
12845 %{
12846   match(Set cr (CmpD src1 src2));
12847 
12848   ins_cost(3 * INSN_COST);
12849   format %{ "fcmpd $src1, $src2" %}
12850 
12851   ins_encode %{
12852     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12853   %}
12854 
12855   ins_pipe(pipe_class_compare);
12856 %}
12857 
12858 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
12859 %{
12860   match(Set cr (CmpD src1 src2));
12861 
12862   ins_cost(3 * INSN_COST);
12863   format %{ "fcmpd $src1, 0.0" %}
12864 
12865   ins_encode %{
12866     __ fcmpd(as_FloatRegister($src1$$reg), 0.0);
12867   %}
12868 
12869   ins_pipe(pipe_class_compare);
12870 %}
12871 
12872 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
12873 %{
12874   match(Set dst (CmpF3 src1 src2));
12875   effect(KILL cr);
12876 
12877   ins_cost(5 * INSN_COST);
12878   format %{ "fcmps $src1, $src2\n\t"
12879             "csinvw($dst, zr, zr, eq\n\t"
12880             "csnegw($dst, $dst, $dst, lt)"
12881   %}
12882 
12883   ins_encode %{
12884     Label done;
12885     FloatRegister s1 = as_FloatRegister($src1$$reg);
12886     FloatRegister s2 = as_FloatRegister($src2$$reg);
12887     Register d = as_Register($dst$$reg);
12888     __ fcmps(s1, s2);
12889     // installs 0 if EQ else -1
12890     __ csinvw(d, zr, zr, Assembler::EQ);
12891     // keeps -1 if less or unordered else installs 1
12892     __ csnegw(d, d, d, Assembler::LT);
12893     __ bind(done);
12894   %}
12895 
12896   ins_pipe(pipe_class_default);
12897 
12898 %}
12899 
12900 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
12901 %{
12902   match(Set dst (CmpD3 src1 src2));
12903   effect(KILL cr);
12904 
12905   ins_cost(5 * INSN_COST);
12906   format %{ "fcmpd $src1, $src2\n\t"
12907             "csinvw($dst, zr, zr, eq\n\t"
12908             "csnegw($dst, $dst, $dst, lt)"
12909   %}
12910 
12911   ins_encode %{
12912     Label done;
12913     FloatRegister s1 = as_FloatRegister($src1$$reg);
12914     FloatRegister s2 = as_FloatRegister($src2$$reg);
12915     Register d = as_Register($dst$$reg);
12916     __ fcmpd(s1, s2);
12917     // installs 0 if EQ else -1
12918     __ csinvw(d, zr, zr, Assembler::EQ);
12919     // keeps -1 if less or unordered else installs 1
12920     __ csnegw(d, d, d, Assembler::LT);
12921     __ bind(done);
12922   %}
12923   ins_pipe(pipe_class_default);
12924 
12925 %}
12926 
12927 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
12928 %{
12929   match(Set dst (CmpF3 src1 zero));
12930   effect(KILL cr);
12931 
12932   ins_cost(5 * INSN_COST);
12933   format %{ "fcmps $src1, 0.0\n\t"
12934             "csinvw($dst, zr, zr, eq\n\t"
12935             "csnegw($dst, $dst, $dst, lt)"
12936   %}
12937 
12938   ins_encode %{
12939     Label done;
12940     FloatRegister s1 = as_FloatRegister($src1$$reg);
12941     Register d = as_Register($dst$$reg);
12942     __ fcmps(s1, 0.0);
12943     // installs 0 if EQ else -1
12944     __ csinvw(d, zr, zr, Assembler::EQ);
12945     // keeps -1 if less or unordered else installs 1
12946     __ csnegw(d, d, d, Assembler::LT);
12947     __ bind(done);
12948   %}
12949 
12950   ins_pipe(pipe_class_default);
12951 
12952 %}
12953 
12954 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
12955 %{
12956   match(Set dst (CmpD3 src1 zero));
12957   effect(KILL cr);
12958 
12959   ins_cost(5 * INSN_COST);
12960   format %{ "fcmpd $src1, 0.0\n\t"
12961             "csinvw($dst, zr, zr, eq\n\t"
12962             "csnegw($dst, $dst, $dst, lt)"
12963   %}
12964 
12965   ins_encode %{
12966     Label done;
12967     FloatRegister s1 = as_FloatRegister($src1$$reg);
12968     Register d = as_Register($dst$$reg);
12969     __ fcmpd(s1, 0.0);
12970     // installs 0 if EQ else -1
12971     __ csinvw(d, zr, zr, Assembler::EQ);
12972     // keeps -1 if less or unordered else installs 1
12973     __ csnegw(d, d, d, Assembler::LT);
12974     __ bind(done);
12975   %}
12976   ins_pipe(pipe_class_default);
12977 
12978 %}
12979 
12980 // Manifest a CmpL result in an integer register.
12981 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
12982 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
12983 %{
12984   match(Set dst (CmpL3 src1 src2));
12985   effect(KILL flags);
12986 
12987   ins_cost(INSN_COST * 6);
12988   format %{
12989       "cmp $src1, $src2"
12990       "csetw $dst, ne"
12991       "cnegw $dst, lt"
12992   %}
12993   // format %{ "CmpL3 $dst, $src1, $src2" %}
12994   ins_encode %{
12995     __ cmp($src1$$Register, $src2$$Register);
12996     __ csetw($dst$$Register, Assembler::NE);
12997     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
12998   %}
12999 
13000   ins_pipe(ialu_reg_reg);
13001 %}
13002 
13003 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
13004 %{
13005   match(Set dst (CmpLTMask p q));
13006   effect(KILL cr);
13007 
13008   ins_cost(3 * INSN_COST);
13009 
13010   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
13011             "csetw $dst, lt\n\t"
13012             "subw $dst, zr, $dst"
13013   %}
13014 
13015   ins_encode %{
13016     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
13017     __ csetw(as_Register($dst$$reg), Assembler::LT);
13018     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
13019   %}
13020 
13021   ins_pipe(ialu_reg_reg);
13022 %}
13023 
13024 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
13025 %{
13026   match(Set dst (CmpLTMask src zero));
13027   effect(KILL cr);
13028 
13029   ins_cost(INSN_COST);
13030 
13031   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
13032 
13033   ins_encode %{
13034     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
13035   %}
13036 
13037   ins_pipe(ialu_reg_shift);
13038 %}
13039 
13040 // ============================================================================
13041 // Max and Min
13042 
13043 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13044 %{
13045   match(Set dst (MinI src1 src2));
13046 
13047   effect(DEF dst, USE src1, USE src2, KILL cr);
13048   size(8);
13049 
13050   ins_cost(INSN_COST * 3);
13051   format %{
13052     "cmpw $src1 $src2\t signed int\n\t"
13053     "cselw $dst, $src1, $src2 lt\t"
13054   %}
13055 
13056   ins_encode %{
13057     __ cmpw(as_Register($src1$$reg),
13058             as_Register($src2$$reg));
13059     __ cselw(as_Register($dst$$reg),
13060              as_Register($src1$$reg),
13061              as_Register($src2$$reg),
13062              Assembler::LT);
13063   %}
13064 
13065   ins_pipe(ialu_reg_reg);
13066 %}
13067 // FROM HERE
13068 
13069 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13070 %{
13071   match(Set dst (MaxI src1 src2));
13072 
13073   effect(DEF dst, USE src1, USE src2, KILL cr);
13074   size(8);
13075 
13076   ins_cost(INSN_COST * 3);
13077   format %{
13078     "cmpw $src1 $src2\t signed int\n\t"
13079     "cselw $dst, $src1, $src2 gt\t"
13080   %}
13081 
13082   ins_encode %{
13083     __ cmpw(as_Register($src1$$reg),
13084             as_Register($src2$$reg));
13085     __ cselw(as_Register($dst$$reg),
13086              as_Register($src1$$reg),
13087              as_Register($src2$$reg),
13088              Assembler::GT);
13089   %}
13090 
13091   ins_pipe(ialu_reg_reg);
13092 %}
13093 
13094 // ============================================================================
13095 // Branch Instructions
13096 
13097 // Direct Branch.
13098 instruct branch(label lbl)
13099 %{
13100   match(Goto);
13101 
13102   effect(USE lbl);
13103 
13104   ins_cost(BRANCH_COST);
13105   format %{ "b  $lbl" %}
13106 
13107   ins_encode(aarch64_enc_b(lbl));
13108 
13109   ins_pipe(pipe_branch);
13110 %}
13111 
13112 // Conditional Near Branch
13113 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
13114 %{
13115   // Same match rule as `branchConFar'.
13116   match(If cmp cr);
13117 
13118   effect(USE lbl);
13119 
13120   ins_cost(BRANCH_COST);
13121   // If set to 1 this indicates that the current instruction is a
13122   // short variant of a long branch. This avoids using this
13123   // instruction in first-pass matching. It will then only be used in
13124   // the `Shorten_branches' pass.
13125   // ins_short_branch(1);
13126   format %{ "b$cmp  $lbl" %}
13127 
13128   ins_encode(aarch64_enc_br_con(cmp, lbl));
13129 
13130   ins_pipe(pipe_branch_cond);
13131 %}
13132 
13133 // Conditional Near Branch Unsigned
13134 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13135 %{
13136   // Same match rule as `branchConFar'.
13137   match(If cmp cr);
13138 
13139   effect(USE lbl);
13140 
13141   ins_cost(BRANCH_COST);
13142   // If set to 1 this indicates that the current instruction is a
13143   // short variant of a long branch. This avoids using this
13144   // instruction in first-pass matching. It will then only be used in
13145   // the `Shorten_branches' pass.
13146   // ins_short_branch(1);
13147   format %{ "b$cmp  $lbl\t# unsigned" %}
13148 
13149   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13150 
13151   ins_pipe(pipe_branch_cond);
13152 %}
13153 
13154 // Make use of CBZ and CBNZ.  These instructions, as well as being
13155 // shorter than (cmp; branch), have the additional benefit of not
13156 // killing the flags.
13157 
13158 instruct cmpI_imm0_branch(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
13159   match(If cmp (CmpI op1 op2));
13160   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13161             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13162   effect(USE labl);
13163 
13164   ins_cost(BRANCH_COST);
13165   format %{ "cbw$cmp   $op1, $labl" %}
13166   ins_encode %{
13167     Label* L = $labl$$label;
13168     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13169     if (cond == Assembler::EQ)
13170       __ cbzw($op1$$Register, *L);
13171     else
13172       __ cbnzw($op1$$Register, *L);
13173   %}
13174   ins_pipe(pipe_cmp_branch);
13175 %}
13176 
13177 instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
13178   match(If cmp (CmpL op1 op2));
13179   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13180             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13181   effect(USE labl);
13182 
13183   ins_cost(BRANCH_COST);
13184   format %{ "cb$cmp   $op1, $labl" %}
13185   ins_encode %{
13186     Label* L = $labl$$label;
13187     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13188     if (cond == Assembler::EQ)
13189       __ cbz($op1$$Register, *L);
13190     else
13191       __ cbnz($op1$$Register, *L);
13192   %}
13193   ins_pipe(pipe_cmp_branch);
13194 %}
13195 
13196 instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
13197   match(If cmp (CmpP op1 op2));
13198   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13199             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13200   effect(USE labl);
13201 
13202   ins_cost(BRANCH_COST);
13203   format %{ "cb$cmp   $op1, $labl" %}
13204   ins_encode %{
13205     Label* L = $labl$$label;
13206     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13207     if (cond == Assembler::EQ)
13208       __ cbz($op1$$Register, *L);
13209     else
13210       __ cbnz($op1$$Register, *L);
13211   %}
13212   ins_pipe(pipe_cmp_branch);
13213 %}
13214 
13215 instruct cmpN_imm0_branch(cmpOp cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
13216   match(If cmp (CmpN op1 op2));
13217   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13218             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13219   effect(USE labl);
13220 
13221   ins_cost(BRANCH_COST);
13222   format %{ "cbw$cmp   $op1, $labl" %}
13223   ins_encode %{
13224     Label* L = $labl$$label;
13225     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13226     if (cond == Assembler::EQ)
13227       __ cbzw($op1$$Register, *L);
13228     else
13229       __ cbnzw($op1$$Register, *L);
13230   %}
13231   ins_pipe(pipe_cmp_branch);
13232 %}
13233 
13234 instruct cmpP_narrowOop_imm0_branch(cmpOp cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
13235   match(If cmp (CmpP (DecodeN oop) zero));
13236   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13237             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13238   effect(USE labl);
13239 
13240   ins_cost(BRANCH_COST);
13241   format %{ "cb$cmp   $oop, $labl" %}
13242   ins_encode %{
13243     Label* L = $labl$$label;
13244     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13245     if (cond == Assembler::EQ)
13246       __ cbzw($oop$$Register, *L);
13247     else
13248       __ cbnzw($oop$$Register, *L);
13249   %}
13250   ins_pipe(pipe_cmp_branch);
13251 %}
13252 
13253 instruct cmpUI_imm0_branch(cmpOpU cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
13254   match(If cmp (CmpU op1 op2));
13255   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13256             || n->in(1)->as_Bool()->_test._test == BoolTest::eq
13257             || n->in(1)->as_Bool()->_test._test == BoolTest::gt
13258             ||  n->in(1)->as_Bool()->_test._test == BoolTest::le);
13259   effect(USE labl);
13260 
13261   ins_cost(BRANCH_COST);
13262   format %{ "cbw$cmp   $op1, $labl" %}
13263   ins_encode %{
13264     Label* L = $labl$$label;
13265     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13266     if (cond == Assembler::EQ || cond == Assembler::LS)
13267       __ cbzw($op1$$Register, *L);
13268     else
13269       __ cbnzw($op1$$Register, *L);
13270   %}
13271   ins_pipe(pipe_cmp_branch);
13272 %}
13273 
13274 instruct cmpUL_imm0_branch(cmpOpU cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
13275   match(If cmp (CmpUL op1 op2));
13276   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13277             || n->in(1)->as_Bool()->_test._test == BoolTest::eq
13278             || n->in(1)->as_Bool()->_test._test == BoolTest::gt
13279             || n->in(1)->as_Bool()->_test._test == BoolTest::le);
13280   effect(USE labl);
13281 
13282   ins_cost(BRANCH_COST);
13283   format %{ "cb$cmp   $op1, $labl" %}
13284   ins_encode %{
13285     Label* L = $labl$$label;
13286     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13287     if (cond == Assembler::EQ || cond == Assembler::LS)
13288       __ cbz($op1$$Register, *L);
13289     else
13290       __ cbnz($op1$$Register, *L);
13291   %}
13292   ins_pipe(pipe_cmp_branch);
13293 %}
13294 
13295 // Test bit and Branch
13296 
13297 // Patterns for short (< 32KiB) variants
13298 instruct cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{
13299   match(If cmp (CmpL op1 op2));
13300   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13301             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13302   effect(USE labl);
13303 
13304   ins_cost(BRANCH_COST);
13305   format %{ "cb$cmp   $op1, $labl # long" %}
13306   ins_encode %{
13307     Label* L = $labl$$label;
13308     Assembler::Condition cond =
13309       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13310     __ tbr(cond, $op1$$Register, 63, *L);
13311   %}
13312   ins_pipe(pipe_cmp_branch);
13313   ins_short_branch(1);
13314 %}
13315 
13316 instruct cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13317   match(If cmp (CmpI op1 op2));
13318   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13319             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13320   effect(USE labl);
13321 
13322   ins_cost(BRANCH_COST);
13323   format %{ "cb$cmp   $op1, $labl # int" %}
13324   ins_encode %{
13325     Label* L = $labl$$label;
13326     Assembler::Condition cond =
13327       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13328     __ tbr(cond, $op1$$Register, 31, *L);
13329   %}
13330   ins_pipe(pipe_cmp_branch);
13331   ins_short_branch(1);
13332 %}
13333 
13334 instruct cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13335   match(If cmp (CmpL (AndL op1 op2) op3));
13336   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13337             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13338             && is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13339   effect(USE labl);
13340 
13341   ins_cost(BRANCH_COST);
13342   format %{ "tb$cmp   $op1, $op2, $labl" %}
13343   ins_encode %{
13344     Label* L = $labl$$label;
13345     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13346     int bit = exact_log2($op2$$constant);
13347     __ tbr(cond, $op1$$Register, bit, *L);
13348   %}
13349   ins_pipe(pipe_cmp_branch);
13350   ins_short_branch(1);
13351 %}
13352 
13353 instruct cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
13354   match(If cmp (CmpI (AndI op1 op2) op3));
13355   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13356             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13357             && is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
13358   effect(USE labl);
13359 
13360   ins_cost(BRANCH_COST);
13361   format %{ "tb$cmp   $op1, $op2, $labl" %}
13362   ins_encode %{
13363     Label* L = $labl$$label;
13364     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13365     int bit = exact_log2($op2$$constant);
13366     __ tbr(cond, $op1$$Register, bit, *L);
13367   %}
13368   ins_pipe(pipe_cmp_branch);
13369   ins_short_branch(1);
13370 %}
13371 
13372 // And far variants
13373 instruct far_cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{
13374   match(If cmp (CmpL op1 op2));
13375   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13376             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13377   effect(USE labl);
13378 
13379   ins_cost(BRANCH_COST);
13380   format %{ "cb$cmp   $op1, $labl # long" %}
13381   ins_encode %{
13382     Label* L = $labl$$label;
13383     Assembler::Condition cond =
13384       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13385     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
13386   %}
13387   ins_pipe(pipe_cmp_branch);
13388 %}
13389 
13390 instruct far_cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13391   match(If cmp (CmpI op1 op2));
13392   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13393             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13394   effect(USE labl);
13395 
13396   ins_cost(BRANCH_COST);
13397   format %{ "cb$cmp   $op1, $labl # int" %}
13398   ins_encode %{
13399     Label* L = $labl$$label;
13400     Assembler::Condition cond =
13401       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13402     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
13403   %}
13404   ins_pipe(pipe_cmp_branch);
13405 %}
13406 
13407 instruct far_cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13408   match(If cmp (CmpL (AndL op1 op2) op3));
13409   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13410             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13411             && is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13412   effect(USE labl);
13413 
13414   ins_cost(BRANCH_COST);
13415   format %{ "tb$cmp   $op1, $op2, $labl" %}
13416   ins_encode %{
13417     Label* L = $labl$$label;
13418     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13419     int bit = exact_log2($op2$$constant);
13420     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
13421   %}
13422   ins_pipe(pipe_cmp_branch);
13423 %}
13424 
13425 instruct far_cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
13426   match(If cmp (CmpI (AndI op1 op2) op3));
13427   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13428             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13429             && is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
13430   effect(USE labl);
13431 
13432   ins_cost(BRANCH_COST);
13433   format %{ "tb$cmp   $op1, $op2, $labl" %}
13434   ins_encode %{
13435     Label* L = $labl$$label;
13436     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13437     int bit = exact_log2($op2$$constant);
13438     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
13439   %}
13440   ins_pipe(pipe_cmp_branch);
13441 %}
13442 
13443 // Test bits
13444 
13445 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
13446   match(Set cr (CmpL (AndL op1 op2) op3));
13447   predicate(Assembler::operand_valid_for_logical_immediate
13448             (/*is_32*/false, n->in(1)->in(2)->get_long()));
13449 
13450   ins_cost(INSN_COST);
13451   format %{ "tst $op1, $op2 # long" %}
13452   ins_encode %{
13453     __ tst($op1$$Register, $op2$$constant);
13454   %}
13455   ins_pipe(ialu_reg_reg);
13456 %}
13457 
13458 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
13459   match(Set cr (CmpI (AndI op1 op2) op3));
13460   predicate(Assembler::operand_valid_for_logical_immediate
13461             (/*is_32*/true, n->in(1)->in(2)->get_int()));
13462 
13463   ins_cost(INSN_COST);
13464   format %{ "tst $op1, $op2 # int" %}
13465   ins_encode %{
13466     __ tstw($op1$$Register, $op2$$constant);
13467   %}
13468   ins_pipe(ialu_reg_reg);
13469 %}
13470 
13471 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
13472   match(Set cr (CmpL (AndL op1 op2) op3));
13473 
13474   ins_cost(INSN_COST);
13475   format %{ "tst $op1, $op2 # long" %}
13476   ins_encode %{
13477     __ tst($op1$$Register, $op2$$Register);
13478   %}
13479   ins_pipe(ialu_reg_reg);
13480 %}
13481 
13482 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
13483   match(Set cr (CmpI (AndI op1 op2) op3));
13484 
13485   ins_cost(INSN_COST);
13486   format %{ "tstw $op1, $op2 # int" %}
13487   ins_encode %{
13488     __ tstw($op1$$Register, $op2$$Register);
13489   %}
13490   ins_pipe(ialu_reg_reg);
13491 %}
13492 
13493 
13494 // Conditional Far Branch
13495 // Conditional Far Branch Unsigned
13496 // TODO: fixme
13497 
13498 // counted loop end branch near
13499 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
13500 %{
13501   match(CountedLoopEnd cmp cr);
13502 
13503   effect(USE lbl);
13504 
13505   ins_cost(BRANCH_COST);
13506   // short variant.
13507   // ins_short_branch(1);
13508   format %{ "b$cmp $lbl \t// counted loop end" %}
13509 
13510   ins_encode(aarch64_enc_br_con(cmp, lbl));
13511 
13512   ins_pipe(pipe_branch);
13513 %}
13514 
13515 // counted loop end branch near Unsigned
13516 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13517 %{
13518   match(CountedLoopEnd cmp cr);
13519 
13520   effect(USE lbl);
13521 
13522   ins_cost(BRANCH_COST);
13523   // short variant.
13524   // ins_short_branch(1);
13525   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
13526 
13527   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13528 
13529   ins_pipe(pipe_branch);
13530 %}
13531 
13532 // counted loop end branch far
13533 // counted loop end branch far unsigned
13534 // TODO: fixme
13535 
13536 // ============================================================================
13537 // inlined locking and unlocking
13538 
13539 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13540 %{
13541   match(Set cr (FastLock object box));
13542   effect(TEMP tmp, TEMP tmp2);
13543 
13544   // TODO
13545   // identify correct cost
13546   ins_cost(5 * INSN_COST);
13547   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
13548 
13549   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
13550 
13551   ins_pipe(pipe_serial);
13552 %}
13553 
13554 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13555 %{
13556   match(Set cr (FastUnlock object box));
13557   effect(TEMP tmp, TEMP tmp2);
13558 
13559   ins_cost(5 * INSN_COST);
13560   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
13561 
13562   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
13563 
13564   ins_pipe(pipe_serial);
13565 %}
13566 
13567 
13568 // ============================================================================
13569 // Safepoint Instructions
13570 
13571 // TODO
13572 // provide a near and far version of this code
13573 
13574 instruct safePoint(rFlagsReg cr, iRegP poll)
13575 %{
13576   match(SafePoint poll);
13577   effect(KILL cr);
13578 
13579   format %{
13580     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
13581   %}
13582   ins_encode %{
13583     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
13584   %}
13585   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
13586 %}
13587 
13588 
13589 // ============================================================================
13590 // Procedure Call/Return Instructions
13591 
13592 // Call Java Static Instruction
13593 
13594 instruct CallStaticJavaDirect(method meth)
13595 %{
13596   match(CallStaticJava);
13597 
13598   effect(USE meth);
13599 
13600   predicate(!((CallStaticJavaNode*)n)->is_method_handle_invoke());
13601 
13602   ins_cost(CALL_COST);
13603 
13604   format %{ "call,static $meth \t// ==> " %}
13605 
13606   ins_encode( aarch64_enc_java_static_call(meth),
13607               aarch64_enc_call_epilog );
13608 
13609   ins_pipe(pipe_class_call);
13610 %}
13611 
13612 // TO HERE
13613 
13614 // Call Java Static Instruction (method handle version)
13615 
13616 instruct CallStaticJavaDirectHandle(method meth, iRegP_FP reg_mh_save)
13617 %{
13618   match(CallStaticJava);
13619 
13620   effect(USE meth);
13621 
13622   predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
13623 
13624   ins_cost(CALL_COST);
13625 
13626   format %{ "call,static $meth \t// (methodhandle) ==> " %}
13627 
13628   ins_encode( aarch64_enc_java_handle_call(meth),
13629               aarch64_enc_call_epilog );
13630 
13631   ins_pipe(pipe_class_call);
13632 %}
13633 
13634 // Call Java Dynamic Instruction
13635 instruct CallDynamicJavaDirect(method meth)
13636 %{
13637   match(CallDynamicJava);
13638 
13639   effect(USE meth);
13640 
13641   ins_cost(CALL_COST);
13642 
13643   format %{ "CALL,dynamic $meth \t// ==> " %}
13644 
13645   ins_encode( aarch64_enc_java_dynamic_call(meth),
13646                aarch64_enc_call_epilog );
13647 
13648   ins_pipe(pipe_class_call);
13649 %}
13650 
13651 // Call Runtime Instruction
13652 
13653 instruct CallRuntimeDirect(method meth)
13654 %{
13655   match(CallRuntime);
13656 
13657   effect(USE meth);
13658 
13659   ins_cost(CALL_COST);
13660 
13661   format %{ "CALL, runtime $meth" %}
13662 
13663   ins_encode( aarch64_enc_java_to_runtime(meth) );
13664 
13665   ins_pipe(pipe_class_call);
13666 %}
13667 
13668 // Call Runtime Instruction
13669 
13670 instruct CallLeafDirect(method meth)
13671 %{
13672   match(CallLeaf);
13673 
13674   effect(USE meth);
13675 
13676   ins_cost(CALL_COST);
13677 
13678   format %{ "CALL, runtime leaf $meth" %}
13679 
13680   ins_encode( aarch64_enc_java_to_runtime(meth) );
13681 
13682   ins_pipe(pipe_class_call);
13683 %}
13684 
13685 // Call Runtime Instruction
13686 
13687 instruct CallLeafNoFPDirect(method meth)
13688 %{
13689   match(CallLeafNoFP);
13690 
13691   effect(USE meth);
13692 
13693   ins_cost(CALL_COST);
13694 
13695   format %{ "CALL, runtime leaf nofp $meth" %}
13696 
13697   ins_encode( aarch64_enc_java_to_runtime(meth) );
13698 
13699   ins_pipe(pipe_class_call);
13700 %}
13701 
13702 // Tail Call; Jump from runtime stub to Java code.
13703 // Also known as an 'interprocedural jump'.
13704 // Target of jump will eventually return to caller.
13705 // TailJump below removes the return address.
13706 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
13707 %{
13708   match(TailCall jump_target method_oop);
13709 
13710   ins_cost(CALL_COST);
13711 
13712   format %{ "br $jump_target\t# $method_oop holds method oop" %}
13713 
13714   ins_encode(aarch64_enc_tail_call(jump_target));
13715 
13716   ins_pipe(pipe_class_call);
13717 %}
13718 
13719 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
13720 %{
13721   match(TailJump jump_target ex_oop);
13722 
13723   ins_cost(CALL_COST);
13724 
13725   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
13726 
13727   ins_encode(aarch64_enc_tail_jmp(jump_target));
13728 
13729   ins_pipe(pipe_class_call);
13730 %}
13731 
13732 // Create exception oop: created by stack-crawling runtime code.
13733 // Created exception is now available to this handler, and is setup
13734 // just prior to jumping to this handler. No code emitted.
13735 // TODO check
13736 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
13737 instruct CreateException(iRegP_R0 ex_oop)
13738 %{
13739   match(Set ex_oop (CreateEx));
13740 
13741   format %{ " -- \t// exception oop; no code emitted" %}
13742 
13743   size(0);
13744 
13745   ins_encode( /*empty*/ );
13746 
13747   ins_pipe(pipe_class_empty);
13748 %}
13749 
13750 // Rethrow exception: The exception oop will come in the first
13751 // argument position. Then JUMP (not call) to the rethrow stub code.
13752 instruct RethrowException() %{
13753   match(Rethrow);
13754   ins_cost(CALL_COST);
13755 
13756   format %{ "b rethrow_stub" %}
13757 
13758   ins_encode( aarch64_enc_rethrow() );
13759 
13760   ins_pipe(pipe_class_call);
13761 %}
13762 
13763 
13764 // Return Instruction
13765 // epilog node loads ret address into lr as part of frame pop
13766 instruct Ret()
13767 %{
13768   match(Return);
13769 
13770   format %{ "ret\t// return register" %}
13771 
13772   ins_encode( aarch64_enc_ret() );
13773 
13774   ins_pipe(pipe_branch);
13775 %}
13776 
13777 // Die now.
13778 instruct ShouldNotReachHere() %{
13779   match(Halt);
13780 
13781   ins_cost(CALL_COST);
13782   format %{ "ShouldNotReachHere" %}
13783 
13784   ins_encode %{
13785     // TODO
13786     // implement proper trap call here
13787     __ brk(999);
13788   %}
13789 
13790   ins_pipe(pipe_class_default);
13791 %}
13792 
13793 // ============================================================================
13794 // Partial Subtype Check
13795 // 
13796 // superklass array for an instance of the superklass.  Set a hidden
13797 // internal cache on a hit (cache is checked with exposed code in
13798 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
13799 // encoding ALSO sets flags.
13800 
13801 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
13802 %{
13803   match(Set result (PartialSubtypeCheck sub super));
13804   effect(KILL cr, KILL temp);
13805 
13806   ins_cost(1100);  // slightly larger than the next version
13807   format %{ "partialSubtypeCheck $result, $sub, $super" %}
13808 
13809   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13810 
13811   opcode(0x1); // Force zero of result reg on hit
13812 
13813   ins_pipe(pipe_class_memory);
13814 %}
13815 
13816 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
13817 %{
13818   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13819   effect(KILL temp, KILL result);
13820 
13821   ins_cost(1100);  // slightly larger than the next version
13822   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
13823 
13824   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13825 
13826   opcode(0x0); // Don't zero result reg on hit
13827 
13828   ins_pipe(pipe_class_memory);
13829 %}
13830 
13831 instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
13832                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
13833 %{
13834   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
13835   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
13836 
13837   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
13838   ins_encode %{
13839     __ string_compare($str1$$Register, $str2$$Register,
13840                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
13841                       $tmp1$$Register);
13842   %}
13843   ins_pipe(pipe_class_memory);
13844 %}
13845 
13846 instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
13847        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
13848 %{
13849   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
13850   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
13851          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13852   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %}
13853 
13854   ins_encode %{
13855     __ string_indexof($str1$$Register, $str2$$Register,
13856                       $cnt1$$Register, $cnt2$$Register,
13857                       $tmp1$$Register, $tmp2$$Register,
13858                       $tmp3$$Register, $tmp4$$Register,
13859                       -1, $result$$Register);
13860   %}
13861   ins_pipe(pipe_class_memory);
13862 %}
13863 
13864 instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
13865                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
13866                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
13867 %{
13868   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
13869   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
13870          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13871   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %}
13872 
13873   ins_encode %{
13874     int icnt2 = (int)$int_cnt2$$constant;
13875     __ string_indexof($str1$$Register, $str2$$Register,
13876                       $cnt1$$Register, zr,
13877                       $tmp1$$Register, $tmp2$$Register,
13878                       $tmp3$$Register, $tmp4$$Register,
13879                       icnt2, $result$$Register);
13880   %}
13881   ins_pipe(pipe_class_memory);
13882 %}
13883 
13884 instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
13885                         iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
13886 %{
13887   match(Set result (StrEquals (Binary str1 str2) cnt));
13888   effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
13889 
13890   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp" %}
13891   ins_encode %{
13892     __ string_equals($str1$$Register, $str2$$Register,
13893                       $cnt$$Register, $result$$Register,
13894                       $tmp$$Register);
13895   %}
13896   ins_pipe(pipe_class_memory);
13897 %}
13898 
13899 instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
13900                       iRegP_R10 tmp, rFlagsReg cr)
13901 %{
13902   match(Set result (AryEq ary1 ary2));
13903   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
13904 
13905   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
13906   ins_encode %{
13907     __ char_arrays_equals($ary1$$Register, $ary2$$Register,
13908                           $result$$Register, $tmp$$Register);
13909   %}
13910   ins_pipe(pipe_class_memory);
13911 %}
13912 
13913 // encode char[] to byte[] in ISO_8859_1
13914 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
13915                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
13916                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
13917                           iRegI_R0 result, rFlagsReg cr)
13918 %{
13919   match(Set result (EncodeISOArray src (Binary dst len)));
13920   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
13921          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
13922 
13923   format %{ "Encode array $src,$dst,$len -> $result" %}
13924   ins_encode %{
13925     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
13926          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
13927          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
13928   %}
13929   ins_pipe( pipe_class_memory );
13930 %}
13931 
13932 // ============================================================================
13933 // This name is KNOWN by the ADLC and cannot be changed.
13934 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13935 // for this guy.
13936 instruct tlsLoadP(thread_RegP dst)
13937 %{
13938   match(Set dst (ThreadLocal));
13939 
13940   ins_cost(0);
13941 
13942   format %{ " -- \t// $dst=Thread::current(), empty" %}
13943 
13944   size(0);
13945 
13946   ins_encode( /*empty*/ );
13947 
13948   ins_pipe(pipe_class_empty);
13949 %}
13950 
13951 // ====================VECTOR INSTRUCTIONS=====================================
13952 
13953 // Load vector (32 bits)
13954 instruct loadV4(vecD dst, vmem4 mem)
13955 %{
13956   predicate(n->as_LoadVector()->memory_size() == 4);
13957   match(Set dst (LoadVector mem));
13958   ins_cost(4 * INSN_COST);
13959   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
13960   ins_encode( aarch64_enc_ldrvS(dst, mem) );
13961   ins_pipe(vload_reg_mem64);
13962 %}
13963 
13964 // Load vector (64 bits)
13965 instruct loadV8(vecD dst, vmem8 mem)
13966 %{
13967   predicate(n->as_LoadVector()->memory_size() == 8);
13968   match(Set dst (LoadVector mem));
13969   ins_cost(4 * INSN_COST);
13970   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
13971   ins_encode( aarch64_enc_ldrvD(dst, mem) );
13972   ins_pipe(vload_reg_mem64);
13973 %}
13974 
13975 // Load Vector (128 bits)
13976 instruct loadV16(vecX dst, vmem16 mem)
13977 %{
13978   predicate(n->as_LoadVector()->memory_size() == 16);
13979   match(Set dst (LoadVector mem));
13980   ins_cost(4 * INSN_COST);
13981   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
13982   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
13983   ins_pipe(vload_reg_mem128);
13984 %}
13985 
13986 // Store Vector (32 bits)
13987 instruct storeV4(vecD src, vmem4 mem)
13988 %{
13989   predicate(n->as_StoreVector()->memory_size() == 4);
13990   match(Set mem (StoreVector mem src));
13991   ins_cost(4 * INSN_COST);
13992   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
13993   ins_encode( aarch64_enc_strvS(src, mem) );
13994   ins_pipe(vstore_reg_mem64);
13995 %}
13996 
13997 // Store Vector (64 bits)
13998 instruct storeV8(vecD src, vmem8 mem)
13999 %{
14000   predicate(n->as_StoreVector()->memory_size() == 8);
14001   match(Set mem (StoreVector mem src));
14002   ins_cost(4 * INSN_COST);
14003   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
14004   ins_encode( aarch64_enc_strvD(src, mem) );
14005   ins_pipe(vstore_reg_mem64);
14006 %}
14007 
14008 // Store Vector (128 bits)
14009 instruct storeV16(vecX src, vmem16 mem)
14010 %{
14011   predicate(n->as_StoreVector()->memory_size() == 16);
14012   match(Set mem (StoreVector mem src));
14013   ins_cost(4 * INSN_COST);
14014   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
14015   ins_encode( aarch64_enc_strvQ(src, mem) );
14016   ins_pipe(vstore_reg_mem128);
14017 %}
14018 
14019 instruct replicate8B(vecD dst, iRegIorL2I src)
14020 %{
14021   predicate(n->as_Vector()->length() == 4 ||
14022             n->as_Vector()->length() == 8);
14023   match(Set dst (ReplicateB src));
14024   ins_cost(INSN_COST);
14025   format %{ "dup  $dst, $src\t# vector (8B)" %}
14026   ins_encode %{
14027     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
14028   %}
14029   ins_pipe(vdup_reg_reg64);
14030 %}
14031 
14032 instruct replicate16B(vecX dst, iRegIorL2I src)
14033 %{
14034   predicate(n->as_Vector()->length() == 16);
14035   match(Set dst (ReplicateB src));
14036   ins_cost(INSN_COST);
14037   format %{ "dup  $dst, $src\t# vector (16B)" %}
14038   ins_encode %{
14039     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
14040   %}
14041   ins_pipe(vdup_reg_reg128);
14042 %}
14043 
14044 instruct replicate8B_imm(vecD dst, immI con)
14045 %{
14046   predicate(n->as_Vector()->length() == 4 ||
14047             n->as_Vector()->length() == 8);
14048   match(Set dst (ReplicateB con));
14049   ins_cost(INSN_COST);
14050   format %{ "movi  $dst, $con\t# vector(8B)" %}
14051   ins_encode %{
14052     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
14053   %}
14054   ins_pipe(vmovi_reg_imm64);
14055 %}
14056 
14057 instruct replicate16B_imm(vecX dst, immI con)
14058 %{
14059   predicate(n->as_Vector()->length() == 16);
14060   match(Set dst (ReplicateB con));
14061   ins_cost(INSN_COST);
14062   format %{ "movi  $dst, $con\t# vector(16B)" %}
14063   ins_encode %{
14064     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
14065   %}
14066   ins_pipe(vmovi_reg_imm128);
14067 %}
14068 
14069 instruct replicate4S(vecD dst, iRegIorL2I src)
14070 %{
14071   predicate(n->as_Vector()->length() == 2 ||
14072             n->as_Vector()->length() == 4);
14073   match(Set dst (ReplicateS src));
14074   ins_cost(INSN_COST);
14075   format %{ "dup  $dst, $src\t# vector (4S)" %}
14076   ins_encode %{
14077     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
14078   %}
14079   ins_pipe(vdup_reg_reg64);
14080 %}
14081 
14082 instruct replicate8S(vecX dst, iRegIorL2I src)
14083 %{
14084   predicate(n->as_Vector()->length() == 8);
14085   match(Set dst (ReplicateS src));
14086   ins_cost(INSN_COST);
14087   format %{ "dup  $dst, $src\t# vector (8S)" %}
14088   ins_encode %{
14089     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
14090   %}
14091   ins_pipe(vdup_reg_reg128);
14092 %}
14093 
14094 instruct replicate4S_imm(vecD dst, immI con)
14095 %{
14096   predicate(n->as_Vector()->length() == 2 ||
14097             n->as_Vector()->length() == 4);
14098   match(Set dst (ReplicateS con));
14099   ins_cost(INSN_COST);
14100   format %{ "movi  $dst, $con\t# vector(4H)" %}
14101   ins_encode %{
14102     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
14103   %}
14104   ins_pipe(vmovi_reg_imm64);
14105 %}
14106 
14107 instruct replicate8S_imm(vecX dst, immI con)
14108 %{
14109   predicate(n->as_Vector()->length() == 8);
14110   match(Set dst (ReplicateS con));
14111   ins_cost(INSN_COST);
14112   format %{ "movi  $dst, $con\t# vector(8H)" %}
14113   ins_encode %{
14114     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
14115   %}
14116   ins_pipe(vmovi_reg_imm128);
14117 %}
14118 
14119 instruct replicate2I(vecD dst, iRegIorL2I src)
14120 %{
14121   predicate(n->as_Vector()->length() == 2);
14122   match(Set dst (ReplicateI src));
14123   ins_cost(INSN_COST);
14124   format %{ "dup  $dst, $src\t# vector (2I)" %}
14125   ins_encode %{
14126     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
14127   %}
14128   ins_pipe(vdup_reg_reg64);
14129 %}
14130 
14131 instruct replicate4I(vecX dst, iRegIorL2I src)
14132 %{
14133   predicate(n->as_Vector()->length() == 4);
14134   match(Set dst (ReplicateI src));
14135   ins_cost(INSN_COST);
14136   format %{ "dup  $dst, $src\t# vector (4I)" %}
14137   ins_encode %{
14138     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
14139   %}
14140   ins_pipe(vdup_reg_reg128);
14141 %}
14142 
14143 instruct replicate2I_imm(vecD dst, immI con)
14144 %{
14145   predicate(n->as_Vector()->length() == 2);
14146   match(Set dst (ReplicateI con));
14147   ins_cost(INSN_COST);
14148   format %{ "movi  $dst, $con\t# vector(2I)" %}
14149   ins_encode %{
14150     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
14151   %}
14152   ins_pipe(vmovi_reg_imm64);
14153 %}
14154 
14155 instruct replicate4I_imm(vecX dst, immI con)
14156 %{
14157   predicate(n->as_Vector()->length() == 4);
14158   match(Set dst (ReplicateI con));
14159   ins_cost(INSN_COST);
14160   format %{ "movi  $dst, $con\t# vector(4I)" %}
14161   ins_encode %{
14162     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
14163   %}
14164   ins_pipe(vmovi_reg_imm128);
14165 %}
14166 
14167 instruct replicate2L(vecX dst, iRegL src)
14168 %{
14169   predicate(n->as_Vector()->length() == 2);
14170   match(Set dst (ReplicateL src));
14171   ins_cost(INSN_COST);
14172   format %{ "dup  $dst, $src\t# vector (2L)" %}
14173   ins_encode %{
14174     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
14175   %}
14176   ins_pipe(vdup_reg_reg128);
14177 %}
14178 
14179 instruct replicate2L_zero(vecX dst, immI0 zero)
14180 %{
14181   predicate(n->as_Vector()->length() == 2);
14182   match(Set dst (ReplicateI zero));
14183   ins_cost(INSN_COST);
14184   format %{ "movi  $dst, $zero\t# vector(4I)" %}
14185   ins_encode %{
14186     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14187            as_FloatRegister($dst$$reg),
14188            as_FloatRegister($dst$$reg));
14189   %}
14190   ins_pipe(vmovi_reg_imm128);
14191 %}
14192 
14193 instruct replicate2F(vecD dst, vRegF src)
14194 %{
14195   predicate(n->as_Vector()->length() == 2);
14196   match(Set dst (ReplicateF src));
14197   ins_cost(INSN_COST);
14198   format %{ "dup  $dst, $src\t# vector (2F)" %}
14199   ins_encode %{
14200     __ dup(as_FloatRegister($dst$$reg), __ T2S,
14201            as_FloatRegister($src$$reg));
14202   %}
14203   ins_pipe(vdup_reg_freg64);
14204 %}
14205 
14206 instruct replicate4F(vecX dst, vRegF src)
14207 %{
14208   predicate(n->as_Vector()->length() == 4);
14209   match(Set dst (ReplicateF src));
14210   ins_cost(INSN_COST);
14211   format %{ "dup  $dst, $src\t# vector (4F)" %}
14212   ins_encode %{
14213     __ dup(as_FloatRegister($dst$$reg), __ T4S,
14214            as_FloatRegister($src$$reg));
14215   %}
14216   ins_pipe(vdup_reg_freg128);
14217 %}
14218 
14219 instruct replicate2D(vecX dst, vRegD src)
14220 %{
14221   predicate(n->as_Vector()->length() == 2);
14222   match(Set dst (ReplicateD src));
14223   ins_cost(INSN_COST);
14224   format %{ "dup  $dst, $src\t# vector (2D)" %}
14225   ins_encode %{
14226     __ dup(as_FloatRegister($dst$$reg), __ T2D,
14227            as_FloatRegister($src$$reg));
14228   %}
14229   ins_pipe(vdup_reg_dreg128);
14230 %}
14231 
14232 // ====================VECTOR ARITHMETIC=======================================
14233 
14234 // --------------------------------- ADD --------------------------------------
14235 
14236 instruct vadd8B(vecD dst, vecD src1, vecD src2)
14237 %{
14238   predicate(n->as_Vector()->length() == 4 ||
14239             n->as_Vector()->length() == 8);
14240   match(Set dst (AddVB src1 src2));
14241   ins_cost(INSN_COST);
14242   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
14243   ins_encode %{
14244     __ addv(as_FloatRegister($dst$$reg), __ T8B,
14245             as_FloatRegister($src1$$reg),
14246             as_FloatRegister($src2$$reg));
14247   %}
14248   ins_pipe(vdop64);
14249 %}
14250 
14251 instruct vadd16B(vecX dst, vecX src1, vecX src2)
14252 %{
14253   predicate(n->as_Vector()->length() == 16);
14254   match(Set dst (AddVB src1 src2));
14255   ins_cost(INSN_COST);
14256   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
14257   ins_encode %{
14258     __ addv(as_FloatRegister($dst$$reg), __ T16B,
14259             as_FloatRegister($src1$$reg),
14260             as_FloatRegister($src2$$reg));
14261   %}
14262   ins_pipe(vdop128);
14263 %}
14264 
14265 instruct vadd4S(vecD dst, vecD src1, vecD src2)
14266 %{
14267   predicate(n->as_Vector()->length() == 2 ||
14268             n->as_Vector()->length() == 4);
14269   match(Set dst (AddVS src1 src2));
14270   ins_cost(INSN_COST);
14271   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
14272   ins_encode %{
14273     __ addv(as_FloatRegister($dst$$reg), __ T4H,
14274             as_FloatRegister($src1$$reg),
14275             as_FloatRegister($src2$$reg));
14276   %}
14277   ins_pipe(vdop64);
14278 %}
14279 
14280 instruct vadd8S(vecX dst, vecX src1, vecX src2)
14281 %{
14282   predicate(n->as_Vector()->length() == 8);
14283   match(Set dst (AddVS src1 src2));
14284   ins_cost(INSN_COST);
14285   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
14286   ins_encode %{
14287     __ addv(as_FloatRegister($dst$$reg), __ T8H,
14288             as_FloatRegister($src1$$reg),
14289             as_FloatRegister($src2$$reg));
14290   %}
14291   ins_pipe(vdop128);
14292 %}
14293 
14294 instruct vadd2I(vecD dst, vecD src1, vecD src2)
14295 %{
14296   predicate(n->as_Vector()->length() == 2);
14297   match(Set dst (AddVI src1 src2));
14298   ins_cost(INSN_COST);
14299   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
14300   ins_encode %{
14301     __ addv(as_FloatRegister($dst$$reg), __ T2S,
14302             as_FloatRegister($src1$$reg),
14303             as_FloatRegister($src2$$reg));
14304   %}
14305   ins_pipe(vdop64);
14306 %}
14307 
14308 instruct vadd4I(vecX dst, vecX src1, vecX src2)
14309 %{
14310   predicate(n->as_Vector()->length() == 4);
14311   match(Set dst (AddVI src1 src2));
14312   ins_cost(INSN_COST);
14313   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
14314   ins_encode %{
14315     __ addv(as_FloatRegister($dst$$reg), __ T4S,
14316             as_FloatRegister($src1$$reg),
14317             as_FloatRegister($src2$$reg));
14318   %}
14319   ins_pipe(vdop128);
14320 %}
14321 
14322 instruct vadd2L(vecX dst, vecX src1, vecX src2)
14323 %{
14324   predicate(n->as_Vector()->length() == 2);
14325   match(Set dst (AddVL src1 src2));
14326   ins_cost(INSN_COST);
14327   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
14328   ins_encode %{
14329     __ addv(as_FloatRegister($dst$$reg), __ T2D,
14330             as_FloatRegister($src1$$reg),
14331             as_FloatRegister($src2$$reg));
14332   %}
14333   ins_pipe(vdop128);
14334 %}
14335 
14336 instruct vadd2F(vecD dst, vecD src1, vecD src2)
14337 %{
14338   predicate(n->as_Vector()->length() == 2);
14339   match(Set dst (AddVF src1 src2));
14340   ins_cost(INSN_COST);
14341   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
14342   ins_encode %{
14343     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
14344             as_FloatRegister($src1$$reg),
14345             as_FloatRegister($src2$$reg));
14346   %}
14347   ins_pipe(vdop_fp64);
14348 %}
14349 
14350 instruct vadd4F(vecX dst, vecX src1, vecX src2)
14351 %{
14352   predicate(n->as_Vector()->length() == 4);
14353   match(Set dst (AddVF src1 src2));
14354   ins_cost(INSN_COST);
14355   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
14356   ins_encode %{
14357     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
14358             as_FloatRegister($src1$$reg),
14359             as_FloatRegister($src2$$reg));
14360   %}
14361   ins_pipe(vdop_fp128);
14362 %}
14363 
14364 instruct vadd2D(vecX dst, vecX src1, vecX src2)
14365 %{
14366   match(Set dst (AddVD src1 src2));
14367   ins_cost(INSN_COST);
14368   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
14369   ins_encode %{
14370     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
14371             as_FloatRegister($src1$$reg),
14372             as_FloatRegister($src2$$reg));
14373   %}
14374   ins_pipe(vdop_fp128);
14375 %}
14376 
14377 // --------------------------------- SUB --------------------------------------
14378 
14379 instruct vsub8B(vecD dst, vecD src1, vecD src2)
14380 %{
14381   predicate(n->as_Vector()->length() == 4 ||
14382             n->as_Vector()->length() == 8);
14383   match(Set dst (SubVB src1 src2));
14384   ins_cost(INSN_COST);
14385   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
14386   ins_encode %{
14387     __ subv(as_FloatRegister($dst$$reg), __ T8B,
14388             as_FloatRegister($src1$$reg),
14389             as_FloatRegister($src2$$reg));
14390   %}
14391   ins_pipe(vdop64);
14392 %}
14393 
14394 instruct vsub16B(vecX dst, vecX src1, vecX src2)
14395 %{
14396   predicate(n->as_Vector()->length() == 16);
14397   match(Set dst (SubVB src1 src2));
14398   ins_cost(INSN_COST);
14399   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
14400   ins_encode %{
14401     __ subv(as_FloatRegister($dst$$reg), __ T16B,
14402             as_FloatRegister($src1$$reg),
14403             as_FloatRegister($src2$$reg));
14404   %}
14405   ins_pipe(vdop128);
14406 %}
14407 
14408 instruct vsub4S(vecD dst, vecD src1, vecD src2)
14409 %{
14410   predicate(n->as_Vector()->length() == 2 ||
14411             n->as_Vector()->length() == 4);
14412   match(Set dst (SubVS src1 src2));
14413   ins_cost(INSN_COST);
14414   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
14415   ins_encode %{
14416     __ subv(as_FloatRegister($dst$$reg), __ T4H,
14417             as_FloatRegister($src1$$reg),
14418             as_FloatRegister($src2$$reg));
14419   %}
14420   ins_pipe(vdop64);
14421 %}
14422 
14423 instruct vsub8S(vecX dst, vecX src1, vecX src2)
14424 %{
14425   predicate(n->as_Vector()->length() == 8);
14426   match(Set dst (SubVS src1 src2));
14427   ins_cost(INSN_COST);
14428   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
14429   ins_encode %{
14430     __ subv(as_FloatRegister($dst$$reg), __ T8H,
14431             as_FloatRegister($src1$$reg),
14432             as_FloatRegister($src2$$reg));
14433   %}
14434   ins_pipe(vdop128);
14435 %}
14436 
14437 instruct vsub2I(vecD dst, vecD src1, vecD src2)
14438 %{
14439   predicate(n->as_Vector()->length() == 2);
14440   match(Set dst (SubVI src1 src2));
14441   ins_cost(INSN_COST);
14442   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
14443   ins_encode %{
14444     __ subv(as_FloatRegister($dst$$reg), __ T2S,
14445             as_FloatRegister($src1$$reg),
14446             as_FloatRegister($src2$$reg));
14447   %}
14448   ins_pipe(vdop64);
14449 %}
14450 
14451 instruct vsub4I(vecX dst, vecX src1, vecX src2)
14452 %{
14453   predicate(n->as_Vector()->length() == 4);
14454   match(Set dst (SubVI src1 src2));
14455   ins_cost(INSN_COST);
14456   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
14457   ins_encode %{
14458     __ subv(as_FloatRegister($dst$$reg), __ T4S,
14459             as_FloatRegister($src1$$reg),
14460             as_FloatRegister($src2$$reg));
14461   %}
14462   ins_pipe(vdop128);
14463 %}
14464 
14465 instruct vsub2L(vecX dst, vecX src1, vecX src2)
14466 %{
14467   predicate(n->as_Vector()->length() == 2);
14468   match(Set dst (SubVL src1 src2));
14469   ins_cost(INSN_COST);
14470   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
14471   ins_encode %{
14472     __ subv(as_FloatRegister($dst$$reg), __ T2D,
14473             as_FloatRegister($src1$$reg),
14474             as_FloatRegister($src2$$reg));
14475   %}
14476   ins_pipe(vdop128);
14477 %}
14478 
14479 instruct vsub2F(vecD dst, vecD src1, vecD src2)
14480 %{
14481   predicate(n->as_Vector()->length() == 2);
14482   match(Set dst (SubVF src1 src2));
14483   ins_cost(INSN_COST);
14484   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
14485   ins_encode %{
14486     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
14487             as_FloatRegister($src1$$reg),
14488             as_FloatRegister($src2$$reg));
14489   %}
14490   ins_pipe(vdop_fp64);
14491 %}
14492 
14493 instruct vsub4F(vecX dst, vecX src1, vecX src2)
14494 %{
14495   predicate(n->as_Vector()->length() == 4);
14496   match(Set dst (SubVF src1 src2));
14497   ins_cost(INSN_COST);
14498   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
14499   ins_encode %{
14500     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
14501             as_FloatRegister($src1$$reg),
14502             as_FloatRegister($src2$$reg));
14503   %}
14504   ins_pipe(vdop_fp128);
14505 %}
14506 
14507 instruct vsub2D(vecX dst, vecX src1, vecX src2)
14508 %{
14509   predicate(n->as_Vector()->length() == 2);
14510   match(Set dst (SubVD src1 src2));
14511   ins_cost(INSN_COST);
14512   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
14513   ins_encode %{
14514     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
14515             as_FloatRegister($src1$$reg),
14516             as_FloatRegister($src2$$reg));
14517   %}
14518   ins_pipe(vdop_fp128);
14519 %}
14520 
14521 // --------------------------------- MUL --------------------------------------
14522 
14523 instruct vmul4S(vecD dst, vecD src1, vecD src2)
14524 %{
14525   predicate(n->as_Vector()->length() == 2 ||
14526             n->as_Vector()->length() == 4);
14527   match(Set dst (MulVS src1 src2));
14528   ins_cost(INSN_COST);
14529   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
14530   ins_encode %{
14531     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
14532             as_FloatRegister($src1$$reg),
14533             as_FloatRegister($src2$$reg));
14534   %}
14535   ins_pipe(vmul64);
14536 %}
14537 
14538 instruct vmul8S(vecX dst, vecX src1, vecX src2)
14539 %{
14540   predicate(n->as_Vector()->length() == 8);
14541   match(Set dst (MulVS src1 src2));
14542   ins_cost(INSN_COST);
14543   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
14544   ins_encode %{
14545     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
14546             as_FloatRegister($src1$$reg),
14547             as_FloatRegister($src2$$reg));
14548   %}
14549   ins_pipe(vmul128);
14550 %}
14551 
14552 instruct vmul2I(vecD dst, vecD src1, vecD src2)
14553 %{
14554   predicate(n->as_Vector()->length() == 2);
14555   match(Set dst (MulVI src1 src2));
14556   ins_cost(INSN_COST);
14557   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
14558   ins_encode %{
14559     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
14560             as_FloatRegister($src1$$reg),
14561             as_FloatRegister($src2$$reg));
14562   %}
14563   ins_pipe(vmul64);
14564 %}
14565 
14566 instruct vmul4I(vecX dst, vecX src1, vecX src2)
14567 %{
14568   predicate(n->as_Vector()->length() == 4);
14569   match(Set dst (MulVI src1 src2));
14570   ins_cost(INSN_COST);
14571   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
14572   ins_encode %{
14573     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
14574             as_FloatRegister($src1$$reg),
14575             as_FloatRegister($src2$$reg));
14576   %}
14577   ins_pipe(vmul128);
14578 %}
14579 
14580 instruct vmul2F(vecD dst, vecD src1, vecD src2)
14581 %{
14582   predicate(n->as_Vector()->length() == 2);
14583   match(Set dst (MulVF src1 src2));
14584   ins_cost(INSN_COST);
14585   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
14586   ins_encode %{
14587     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
14588             as_FloatRegister($src1$$reg),
14589             as_FloatRegister($src2$$reg));
14590   %}
14591   ins_pipe(vmuldiv_fp64);
14592 %}
14593 
14594 instruct vmul4F(vecX dst, vecX src1, vecX src2)
14595 %{
14596   predicate(n->as_Vector()->length() == 4);
14597   match(Set dst (MulVF src1 src2));
14598   ins_cost(INSN_COST);
14599   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
14600   ins_encode %{
14601     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
14602             as_FloatRegister($src1$$reg),
14603             as_FloatRegister($src2$$reg));
14604   %}
14605   ins_pipe(vmuldiv_fp128);
14606 %}
14607 
14608 instruct vmul2D(vecX dst, vecX src1, vecX src2)
14609 %{
14610   predicate(n->as_Vector()->length() == 2);
14611   match(Set dst (MulVD src1 src2));
14612   ins_cost(INSN_COST);
14613   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
14614   ins_encode %{
14615     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
14616             as_FloatRegister($src1$$reg),
14617             as_FloatRegister($src2$$reg));
14618   %}
14619   ins_pipe(vmuldiv_fp128);
14620 %}
14621 
14622 // --------------------------------- MLA --------------------------------------
14623 
14624 instruct vmla4S(vecD dst, vecD src1, vecD src2)
14625 %{
14626   predicate(n->as_Vector()->length() == 2 ||
14627             n->as_Vector()->length() == 4);
14628   match(Set dst (AddVS dst (MulVS src1 src2)));
14629   ins_cost(INSN_COST);
14630   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
14631   ins_encode %{
14632     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
14633             as_FloatRegister($src1$$reg),
14634             as_FloatRegister($src2$$reg));
14635   %}
14636   ins_pipe(vmla64);
14637 %}
14638 
14639 instruct vmla8S(vecX dst, vecX src1, vecX src2)
14640 %{
14641   predicate(n->as_Vector()->length() == 8);
14642   match(Set dst (AddVS dst (MulVS src1 src2)));
14643   ins_cost(INSN_COST);
14644   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
14645   ins_encode %{
14646     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
14647             as_FloatRegister($src1$$reg),
14648             as_FloatRegister($src2$$reg));
14649   %}
14650   ins_pipe(vmla128);
14651 %}
14652 
14653 instruct vmla2I(vecD dst, vecD src1, vecD src2)
14654 %{
14655   predicate(n->as_Vector()->length() == 2);
14656   match(Set dst (AddVI dst (MulVI src1 src2)));
14657   ins_cost(INSN_COST);
14658   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
14659   ins_encode %{
14660     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
14661             as_FloatRegister($src1$$reg),
14662             as_FloatRegister($src2$$reg));
14663   %}
14664   ins_pipe(vmla64);
14665 %}
14666 
14667 instruct vmla4I(vecX dst, vecX src1, vecX src2)
14668 %{
14669   predicate(n->as_Vector()->length() == 4);
14670   match(Set dst (AddVI dst (MulVI src1 src2)));
14671   ins_cost(INSN_COST);
14672   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
14673   ins_encode %{
14674     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
14675             as_FloatRegister($src1$$reg),
14676             as_FloatRegister($src2$$reg));
14677   %}
14678   ins_pipe(vmla128);
14679 %}
14680 
14681 // --------------------------------- MLS --------------------------------------
14682 
14683 instruct vmls4S(vecD dst, vecD src1, vecD src2)
14684 %{
14685   predicate(n->as_Vector()->length() == 2 ||
14686             n->as_Vector()->length() == 4);
14687   match(Set dst (SubVS dst (MulVS src1 src2)));
14688   ins_cost(INSN_COST);
14689   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
14690   ins_encode %{
14691     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
14692             as_FloatRegister($src1$$reg),
14693             as_FloatRegister($src2$$reg));
14694   %}
14695   ins_pipe(vmla64);
14696 %}
14697 
14698 instruct vmls8S(vecX dst, vecX src1, vecX src2)
14699 %{
14700   predicate(n->as_Vector()->length() == 8);
14701   match(Set dst (SubVS dst (MulVS src1 src2)));
14702   ins_cost(INSN_COST);
14703   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
14704   ins_encode %{
14705     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
14706             as_FloatRegister($src1$$reg),
14707             as_FloatRegister($src2$$reg));
14708   %}
14709   ins_pipe(vmla128);
14710 %}
14711 
14712 instruct vmls2I(vecD dst, vecD src1, vecD src2)
14713 %{
14714   predicate(n->as_Vector()->length() == 2);
14715   match(Set dst (SubVI dst (MulVI src1 src2)));
14716   ins_cost(INSN_COST);
14717   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
14718   ins_encode %{
14719     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
14720             as_FloatRegister($src1$$reg),
14721             as_FloatRegister($src2$$reg));
14722   %}
14723   ins_pipe(vmla64);
14724 %}
14725 
14726 instruct vmls4I(vecX dst, vecX src1, vecX src2)
14727 %{
14728   predicate(n->as_Vector()->length() == 4);
14729   match(Set dst (SubVI dst (MulVI src1 src2)));
14730   ins_cost(INSN_COST);
14731   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
14732   ins_encode %{
14733     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
14734             as_FloatRegister($src1$$reg),
14735             as_FloatRegister($src2$$reg));
14736   %}
14737   ins_pipe(vmla128);
14738 %}
14739 
14740 // --------------------------------- DIV --------------------------------------
14741 
14742 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
14743 %{
14744   predicate(n->as_Vector()->length() == 2);
14745   match(Set dst (DivVF src1 src2));
14746   ins_cost(INSN_COST);
14747   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
14748   ins_encode %{
14749     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
14750             as_FloatRegister($src1$$reg),
14751             as_FloatRegister($src2$$reg));
14752   %}
14753   ins_pipe(vmuldiv_fp64);
14754 %}
14755 
14756 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
14757 %{
14758   predicate(n->as_Vector()->length() == 4);
14759   match(Set dst (DivVF src1 src2));
14760   ins_cost(INSN_COST);
14761   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
14762   ins_encode %{
14763     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
14764             as_FloatRegister($src1$$reg),
14765             as_FloatRegister($src2$$reg));
14766   %}
14767   ins_pipe(vmuldiv_fp128);
14768 %}
14769 
14770 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
14771 %{
14772   predicate(n->as_Vector()->length() == 2);
14773   match(Set dst (DivVD src1 src2));
14774   ins_cost(INSN_COST);
14775   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
14776   ins_encode %{
14777     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
14778             as_FloatRegister($src1$$reg),
14779             as_FloatRegister($src2$$reg));
14780   %}
14781   ins_pipe(vmuldiv_fp128);
14782 %}
14783 
14784 // --------------------------------- AND --------------------------------------
14785 
14786 instruct vand8B(vecD dst, vecD src1, vecD src2)
14787 %{
14788   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14789             n->as_Vector()->length_in_bytes() == 8);
14790   match(Set dst (AndV src1 src2));
14791   ins_cost(INSN_COST);
14792   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14793   ins_encode %{
14794     __ andr(as_FloatRegister($dst$$reg), __ T8B,
14795             as_FloatRegister($src1$$reg),
14796             as_FloatRegister($src2$$reg));
14797   %}
14798   ins_pipe(vlogical64);
14799 %}
14800 
14801 instruct vand16B(vecX dst, vecX src1, vecX src2)
14802 %{
14803   predicate(n->as_Vector()->length_in_bytes() == 16);
14804   match(Set dst (AndV src1 src2));
14805   ins_cost(INSN_COST);
14806   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
14807   ins_encode %{
14808     __ andr(as_FloatRegister($dst$$reg), __ T16B,
14809             as_FloatRegister($src1$$reg),
14810             as_FloatRegister($src2$$reg));
14811   %}
14812   ins_pipe(vlogical128);
14813 %}
14814 
14815 // --------------------------------- OR ---------------------------------------
14816 
14817 instruct vor8B(vecD dst, vecD src1, vecD src2)
14818 %{
14819   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14820             n->as_Vector()->length_in_bytes() == 8);
14821   match(Set dst (OrV src1 src2));
14822   ins_cost(INSN_COST);
14823   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14824   ins_encode %{
14825     __ orr(as_FloatRegister($dst$$reg), __ T8B,
14826             as_FloatRegister($src1$$reg),
14827             as_FloatRegister($src2$$reg));
14828   %}
14829   ins_pipe(vlogical64);
14830 %}
14831 
14832 instruct vor16B(vecX dst, vecX src1, vecX src2)
14833 %{
14834   predicate(n->as_Vector()->length_in_bytes() == 16);
14835   match(Set dst (OrV src1 src2));
14836   ins_cost(INSN_COST);
14837   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
14838   ins_encode %{
14839     __ orr(as_FloatRegister($dst$$reg), __ T16B,
14840             as_FloatRegister($src1$$reg),
14841             as_FloatRegister($src2$$reg));
14842   %}
14843   ins_pipe(vlogical128);
14844 %}
14845 
14846 // --------------------------------- XOR --------------------------------------
14847 
14848 instruct vxor8B(vecD dst, vecD src1, vecD src2)
14849 %{
14850   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14851             n->as_Vector()->length_in_bytes() == 8);
14852   match(Set dst (XorV src1 src2));
14853   ins_cost(INSN_COST);
14854   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
14855   ins_encode %{
14856     __ eor(as_FloatRegister($dst$$reg), __ T8B,
14857             as_FloatRegister($src1$$reg),
14858             as_FloatRegister($src2$$reg));
14859   %}
14860   ins_pipe(vlogical64);
14861 %}
14862 
14863 instruct vxor16B(vecX dst, vecX src1, vecX src2)
14864 %{
14865   predicate(n->as_Vector()->length_in_bytes() == 16);
14866   match(Set dst (XorV src1 src2));
14867   ins_cost(INSN_COST);
14868   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
14869   ins_encode %{
14870     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14871             as_FloatRegister($src1$$reg),
14872             as_FloatRegister($src2$$reg));
14873   %}
14874   ins_pipe(vlogical128);
14875 %}
14876 
14877 // ------------------------------ Shift ---------------------------------------
14878 instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
14879   predicate(n->as_Vector()->length_in_bytes() == 8);
14880   match(Set dst (LShiftCntV cnt));
14881   match(Set dst (RShiftCntV cnt));
14882   format %{ "dup  $dst, $cnt\t# shift count vector (8B)" %}
14883   ins_encode %{
14884     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg));
14885   %}
14886   ins_pipe(vdup_reg_reg64);
14887 %}
14888 
14889 instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{
14890   predicate(n->as_Vector()->length_in_bytes() == 16);
14891   match(Set dst (LShiftCntV cnt));
14892   match(Set dst (RShiftCntV cnt));
14893   format %{ "dup  $dst, $cnt\t# shift count vector (16B)" %}
14894   ins_encode %{
14895     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
14896   %}
14897   ins_pipe(vdup_reg_reg128);
14898 %}
14899 
14900 instruct vsll8B(vecD dst, vecD src, vecD shift) %{
14901   predicate(n->as_Vector()->length() == 4 ||
14902             n->as_Vector()->length() == 8);
14903   match(Set dst (LShiftVB src shift));
14904   ins_cost(INSN_COST);
14905   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
14906   ins_encode %{
14907     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
14908             as_FloatRegister($src$$reg),
14909             as_FloatRegister($shift$$reg));
14910   %}
14911   ins_pipe(vshift64);
14912 %}
14913 
14914 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
14915   predicate(n->as_Vector()->length() == 16);
14916   match(Set dst (LShiftVB src shift));
14917   ins_cost(INSN_COST);
14918   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
14919   ins_encode %{
14920     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
14921             as_FloatRegister($src$$reg),
14922             as_FloatRegister($shift$$reg));
14923   %}
14924   ins_pipe(vshift128);
14925 %}
14926 
14927 // Right shifts with vector shift count on aarch64 SIMD are implemented
14928 // as left shift by negative shift count.
14929 // There are two cases for vector shift count.
14930 //
14931 // Case 1: The vector shift count is from replication.
14932 //        |            |
14933 //    LoadVector  RShiftCntV
14934 //        |       /
14935 //     RShiftVI
14936 // Note: In inner loop, multiple neg instructions are used, which can be
14937 // moved to outer loop and merge into one neg instruction.
14938 //
14939 // Case 2: The vector shift count is from loading.
14940 // This case isn't supported by middle-end now. But it's supported by
14941 // panama/vectorIntrinsics(JEP 338: Vector API).
14942 //        |            |
14943 //    LoadVector  LoadVector
14944 //        |       /
14945 //     RShiftVI
14946 //
14947 
14948 instruct vsra8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
14949   predicate(n->as_Vector()->length() == 4 ||
14950             n->as_Vector()->length() == 8);
14951   match(Set dst (RShiftVB src shift));
14952   ins_cost(INSN_COST);
14953   effect(TEMP tmp);
14954   format %{ "negr  $tmp,$shift\t"
14955             "sshl  $dst,$src,$tmp\t# vector (8B)" %}
14956   ins_encode %{
14957     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
14958             as_FloatRegister($shift$$reg));
14959     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
14960             as_FloatRegister($src$$reg),
14961             as_FloatRegister($tmp$$reg));
14962   %}
14963   ins_pipe(vshift64);
14964 %}
14965 
14966 instruct vsra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
14967   predicate(n->as_Vector()->length() == 16);
14968   match(Set dst (RShiftVB src shift));
14969   ins_cost(INSN_COST);
14970   effect(TEMP tmp);
14971   format %{ "negr  $tmp,$shift\t"
14972             "sshl  $dst,$src,$tmp\t# vector (16B)" %}
14973   ins_encode %{
14974     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
14975             as_FloatRegister($shift$$reg));
14976     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
14977             as_FloatRegister($src$$reg),
14978             as_FloatRegister($tmp$$reg));
14979   %}
14980   ins_pipe(vshift128);
14981 %}
14982 
14983 instruct vsrl8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
14984   predicate(n->as_Vector()->length() == 4 ||
14985             n->as_Vector()->length() == 8);
14986   match(Set dst (URShiftVB src shift));
14987   ins_cost(INSN_COST);
14988   effect(TEMP tmp);
14989   format %{ "negr  $tmp,$shift\t"
14990             "ushl  $dst,$src,$tmp\t# vector (8B)" %}
14991   ins_encode %{
14992     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
14993             as_FloatRegister($shift$$reg));
14994     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
14995             as_FloatRegister($src$$reg),
14996             as_FloatRegister($tmp$$reg));
14997   %}
14998   ins_pipe(vshift64);
14999 %}
15000 
15001 instruct vsrl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
15002   predicate(n->as_Vector()->length() == 16);
15003   match(Set dst (URShiftVB src shift));
15004   ins_cost(INSN_COST);
15005   effect(TEMP tmp);
15006   format %{ "negr  $tmp,$shift\t"
15007             "ushl  $dst,$src,$tmp\t# vector (16B)" %}
15008   ins_encode %{
15009     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15010             as_FloatRegister($shift$$reg));
15011     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
15012             as_FloatRegister($src$$reg),
15013             as_FloatRegister($tmp$$reg));
15014   %}
15015   ins_pipe(vshift128);
15016 %}
15017 
15018 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
15019   predicate(n->as_Vector()->length() == 4 ||
15020             n->as_Vector()->length() == 8);
15021   match(Set dst (LShiftVB src shift));
15022   ins_cost(INSN_COST);
15023   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
15024   ins_encode %{
15025     int sh = (int)$shift$$constant & 31;
15026     if (sh >= 8) {
15027       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15028              as_FloatRegister($src$$reg),
15029              as_FloatRegister($src$$reg));
15030     } else {
15031       __ shl(as_FloatRegister($dst$$reg), __ T8B,
15032              as_FloatRegister($src$$reg), sh);
15033     }
15034   %}
15035   ins_pipe(vshift64_imm);
15036 %}
15037 
15038 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
15039   predicate(n->as_Vector()->length() == 16);
15040   match(Set dst (LShiftVB src shift));
15041   ins_cost(INSN_COST);
15042   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
15043   ins_encode %{
15044     int sh = (int)$shift$$constant & 31;
15045     if (sh >= 8) {
15046       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15047              as_FloatRegister($src$$reg),
15048              as_FloatRegister($src$$reg));
15049     } else {
15050       __ shl(as_FloatRegister($dst$$reg), __ T16B,
15051              as_FloatRegister($src$$reg), sh);
15052     }
15053   %}
15054   ins_pipe(vshift128_imm);
15055 %}
15056 
15057 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
15058   predicate(n->as_Vector()->length() == 4 ||
15059             n->as_Vector()->length() == 8);
15060   match(Set dst (RShiftVB src shift));
15061   ins_cost(INSN_COST);
15062   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
15063   ins_encode %{
15064     int sh = (int)$shift$$constant & 31;
15065     if (sh >= 8) sh = 7;
15066     sh = -sh & 7;
15067     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
15068            as_FloatRegister($src$$reg), sh);
15069   %}
15070   ins_pipe(vshift64_imm);
15071 %}
15072 
15073 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
15074   predicate(n->as_Vector()->length() == 16);
15075   match(Set dst (RShiftVB src shift));
15076   ins_cost(INSN_COST);
15077   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
15078   ins_encode %{
15079     int sh = (int)$shift$$constant & 31;
15080     if (sh >= 8) sh = 7;
15081     sh = -sh & 7;
15082     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
15083            as_FloatRegister($src$$reg), sh);
15084   %}
15085   ins_pipe(vshift128_imm);
15086 %}
15087 
15088 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
15089   predicate(n->as_Vector()->length() == 4 ||
15090             n->as_Vector()->length() == 8);
15091   match(Set dst (URShiftVB src shift));
15092   ins_cost(INSN_COST);
15093   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
15094   ins_encode %{
15095     int sh = (int)$shift$$constant & 31;
15096     if (sh >= 8) {
15097       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15098              as_FloatRegister($src$$reg),
15099              as_FloatRegister($src$$reg));
15100     } else {
15101       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
15102              as_FloatRegister($src$$reg), -sh & 7);
15103     }
15104   %}
15105   ins_pipe(vshift64_imm);
15106 %}
15107 
15108 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
15109   predicate(n->as_Vector()->length() == 16);
15110   match(Set dst (URShiftVB src shift));
15111   ins_cost(INSN_COST);
15112   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
15113   ins_encode %{
15114     int sh = (int)$shift$$constant & 31;
15115     if (sh >= 8) {
15116       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15117              as_FloatRegister($src$$reg),
15118              as_FloatRegister($src$$reg));
15119     } else {
15120       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
15121              as_FloatRegister($src$$reg), -sh & 7);
15122     }
15123   %}
15124   ins_pipe(vshift128_imm);
15125 %}
15126 
15127 instruct vsll4S(vecD dst, vecD src, vecD shift) %{
15128   predicate(n->as_Vector()->length() == 2 ||
15129             n->as_Vector()->length() == 4);
15130   match(Set dst (LShiftVS src shift));
15131   ins_cost(INSN_COST);
15132   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
15133   ins_encode %{
15134     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
15135             as_FloatRegister($src$$reg),
15136             as_FloatRegister($shift$$reg));
15137   %}
15138   ins_pipe(vshift64);
15139 %}
15140 
15141 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
15142   predicate(n->as_Vector()->length() == 8);
15143   match(Set dst (LShiftVS src shift));
15144   ins_cost(INSN_COST);
15145   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
15146   ins_encode %{
15147     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
15148             as_FloatRegister($src$$reg),
15149             as_FloatRegister($shift$$reg));
15150   %}
15151   ins_pipe(vshift128);
15152 %}
15153 
15154 instruct vsra4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
15155   predicate(n->as_Vector()->length() == 2 ||
15156             n->as_Vector()->length() == 4);
15157   match(Set dst (RShiftVS src shift));
15158   ins_cost(INSN_COST);
15159   effect(TEMP tmp);
15160   format %{ "negr  $tmp,$shift\t"
15161             "sshl  $dst,$src,$tmp\t# vector (4H)" %}
15162   ins_encode %{
15163     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15164             as_FloatRegister($shift$$reg));
15165     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
15166             as_FloatRegister($src$$reg),
15167             as_FloatRegister($tmp$$reg));
15168   %}
15169   ins_pipe(vshift64);
15170 %}
15171 
15172 instruct vsra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
15173   predicate(n->as_Vector()->length() == 8);
15174   match(Set dst (RShiftVS src shift));
15175   ins_cost(INSN_COST);
15176   effect(TEMP tmp);
15177   format %{ "negr  $tmp,$shift\t"
15178             "sshl  $dst,$src,$tmp\t# vector (8H)" %}
15179   ins_encode %{
15180     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15181             as_FloatRegister($shift$$reg));
15182     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
15183             as_FloatRegister($src$$reg),
15184             as_FloatRegister($tmp$$reg));
15185   %}
15186   ins_pipe(vshift128);
15187 %}
15188 
15189 instruct vsrl4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
15190   predicate(n->as_Vector()->length() == 2 ||
15191             n->as_Vector()->length() == 4);
15192   match(Set dst (URShiftVS src shift));
15193   ins_cost(INSN_COST);
15194   effect(TEMP tmp);
15195   format %{ "negr  $tmp,$shift\t"
15196             "ushl  $dst,$src,$tmp\t# vector (4H)" %}
15197   ins_encode %{
15198     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15199             as_FloatRegister($shift$$reg));
15200     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
15201             as_FloatRegister($src$$reg),
15202             as_FloatRegister($tmp$$reg));
15203   %}
15204   ins_pipe(vshift64);
15205 %}
15206 
15207 instruct vsrl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
15208   predicate(n->as_Vector()->length() == 8);
15209   match(Set dst (URShiftVS src shift));
15210   ins_cost(INSN_COST);
15211   effect(TEMP tmp);
15212   format %{ "negr  $tmp,$shift\t"
15213             "ushl  $dst,$src,$tmp\t# vector (8H)" %}
15214   ins_encode %{
15215     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15216             as_FloatRegister($shift$$reg));
15217     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
15218             as_FloatRegister($src$$reg),
15219             as_FloatRegister($tmp$$reg));
15220   %}
15221   ins_pipe(vshift128);
15222 %}
15223 
15224 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
15225   predicate(n->as_Vector()->length() == 2 ||
15226             n->as_Vector()->length() == 4);
15227   match(Set dst (LShiftVS src shift));
15228   ins_cost(INSN_COST);
15229   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
15230   ins_encode %{
15231     int sh = (int)$shift$$constant & 31;
15232     if (sh >= 16) {
15233       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15234              as_FloatRegister($src$$reg),
15235              as_FloatRegister($src$$reg));
15236     } else {
15237       __ shl(as_FloatRegister($dst$$reg), __ T4H,
15238              as_FloatRegister($src$$reg), sh);
15239     }
15240   %}
15241   ins_pipe(vshift64_imm);
15242 %}
15243 
15244 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
15245   predicate(n->as_Vector()->length() == 8);
15246   match(Set dst (LShiftVS src shift));
15247   ins_cost(INSN_COST);
15248   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
15249   ins_encode %{
15250     int sh = (int)$shift$$constant & 31;
15251     if (sh >= 16) {
15252       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15253              as_FloatRegister($src$$reg),
15254              as_FloatRegister($src$$reg));
15255     } else {
15256       __ shl(as_FloatRegister($dst$$reg), __ T8H,
15257              as_FloatRegister($src$$reg), sh);
15258     }
15259   %}
15260   ins_pipe(vshift128_imm);
15261 %}
15262 
15263 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
15264   predicate(n->as_Vector()->length() == 2 ||
15265             n->as_Vector()->length() == 4);
15266   match(Set dst (RShiftVS src shift));
15267   ins_cost(INSN_COST);
15268   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
15269   ins_encode %{
15270     int sh = (int)$shift$$constant & 31;
15271     if (sh >= 16) sh = 15;
15272     sh = -sh & 15;
15273     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
15274            as_FloatRegister($src$$reg), sh);
15275   %}
15276   ins_pipe(vshift64_imm);
15277 %}
15278 
15279 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
15280   predicate(n->as_Vector()->length() == 8);
15281   match(Set dst (RShiftVS src shift));
15282   ins_cost(INSN_COST);
15283   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
15284   ins_encode %{
15285     int sh = (int)$shift$$constant & 31;
15286     if (sh >= 16) sh = 15;
15287     sh = -sh & 15;
15288     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
15289            as_FloatRegister($src$$reg), sh);
15290   %}
15291   ins_pipe(vshift128_imm);
15292 %}
15293 
15294 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
15295   predicate(n->as_Vector()->length() == 2 ||
15296             n->as_Vector()->length() == 4);
15297   match(Set dst (URShiftVS src shift));
15298   ins_cost(INSN_COST);
15299   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
15300   ins_encode %{
15301     int sh = (int)$shift$$constant & 31;
15302     if (sh >= 16) {
15303       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15304              as_FloatRegister($src$$reg),
15305              as_FloatRegister($src$$reg));
15306     } else {
15307       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
15308              as_FloatRegister($src$$reg), -sh & 15);
15309     }
15310   %}
15311   ins_pipe(vshift64_imm);
15312 %}
15313 
15314 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
15315   predicate(n->as_Vector()->length() == 8);
15316   match(Set dst (URShiftVS src shift));
15317   ins_cost(INSN_COST);
15318   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
15319   ins_encode %{
15320     int sh = (int)$shift$$constant & 31;
15321     if (sh >= 16) {
15322       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15323              as_FloatRegister($src$$reg),
15324              as_FloatRegister($src$$reg));
15325     } else {
15326       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
15327              as_FloatRegister($src$$reg), -sh & 15);
15328     }
15329   %}
15330   ins_pipe(vshift128_imm);
15331 %}
15332 
15333 instruct vsll2I(vecD dst, vecD src, vecD shift) %{
15334   predicate(n->as_Vector()->length() == 2);
15335   match(Set dst (LShiftVI src shift));
15336   ins_cost(INSN_COST);
15337   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
15338   ins_encode %{
15339     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
15340             as_FloatRegister($src$$reg),
15341             as_FloatRegister($shift$$reg));
15342   %}
15343   ins_pipe(vshift64);
15344 %}
15345 
15346 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
15347   predicate(n->as_Vector()->length() == 4);
15348   match(Set dst (LShiftVI src shift));
15349   ins_cost(INSN_COST);
15350   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
15351   ins_encode %{
15352     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
15353             as_FloatRegister($src$$reg),
15354             as_FloatRegister($shift$$reg));
15355   %}
15356   ins_pipe(vshift128);
15357 %}
15358 
15359 instruct vsra2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
15360   predicate(n->as_Vector()->length() == 2);
15361   match(Set dst (RShiftVI src shift));
15362   ins_cost(INSN_COST);
15363   effect(TEMP tmp);
15364   format %{ "negr  $tmp,$shift\t"
15365             "sshl  $dst,$src,$tmp\t# vector (2S)" %}
15366   ins_encode %{
15367     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15368             as_FloatRegister($shift$$reg));
15369     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
15370             as_FloatRegister($src$$reg),
15371             as_FloatRegister($tmp$$reg));
15372   %}
15373   ins_pipe(vshift64);
15374 %}
15375 
15376 instruct vsra4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
15377   predicate(n->as_Vector()->length() == 4);
15378   match(Set dst (RShiftVI src shift));
15379   ins_cost(INSN_COST);
15380   effect(TEMP tmp);
15381   format %{ "negr  $tmp,$shift\t"
15382             "sshl  $dst,$src,$tmp\t# vector (4S)" %}
15383   ins_encode %{
15384     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15385             as_FloatRegister($shift$$reg));
15386     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
15387             as_FloatRegister($src$$reg),
15388             as_FloatRegister($tmp$$reg));
15389   %}
15390   ins_pipe(vshift128);
15391 %}
15392 
15393 instruct vsrl2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
15394   predicate(n->as_Vector()->length() == 2);
15395   match(Set dst (URShiftVI src shift));
15396   ins_cost(INSN_COST);
15397   effect(TEMP tmp);
15398   format %{ "negr  $tmp,$shift\t"
15399             "ushl  $dst,$src,$tmp\t# vector (2S)" %}
15400   ins_encode %{
15401     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15402             as_FloatRegister($shift$$reg));
15403     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
15404             as_FloatRegister($src$$reg),
15405             as_FloatRegister($tmp$$reg));
15406   %}
15407   ins_pipe(vshift64);
15408 %}
15409 
15410 instruct vsrl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
15411   predicate(n->as_Vector()->length() == 4);
15412   match(Set dst (URShiftVI src shift));
15413   ins_cost(INSN_COST);
15414   effect(TEMP tmp);
15415   format %{ "negr  $tmp,$shift\t"
15416             "ushl  $dst,$src,$tmp\t# vector (4S)" %}
15417   ins_encode %{
15418     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15419             as_FloatRegister($shift$$reg));
15420     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
15421             as_FloatRegister($src$$reg),
15422             as_FloatRegister($tmp$$reg));
15423   %}
15424   ins_pipe(vshift128);
15425 %}
15426 
15427 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
15428   predicate(n->as_Vector()->length() == 2);
15429   match(Set dst (LShiftVI src shift));
15430   ins_cost(INSN_COST);
15431   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
15432   ins_encode %{
15433     __ shl(as_FloatRegister($dst$$reg), __ T2S,
15434            as_FloatRegister($src$$reg),
15435            (int)$shift$$constant & 31);
15436   %}
15437   ins_pipe(vshift64_imm);
15438 %}
15439 
15440 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
15441   predicate(n->as_Vector()->length() == 4);
15442   match(Set dst (LShiftVI src shift));
15443   ins_cost(INSN_COST);
15444   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
15445   ins_encode %{
15446     __ shl(as_FloatRegister($dst$$reg), __ T4S,
15447            as_FloatRegister($src$$reg),
15448            (int)$shift$$constant & 31);
15449   %}
15450   ins_pipe(vshift128_imm);
15451 %}
15452 
15453 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
15454   predicate(n->as_Vector()->length() == 2);
15455   match(Set dst (RShiftVI src shift));
15456   ins_cost(INSN_COST);
15457   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
15458   ins_encode %{
15459     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
15460             as_FloatRegister($src$$reg),
15461             -(int)$shift$$constant & 31);
15462   %}
15463   ins_pipe(vshift64_imm);
15464 %}
15465 
15466 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
15467   predicate(n->as_Vector()->length() == 4);
15468   match(Set dst (RShiftVI src shift));
15469   ins_cost(INSN_COST);
15470   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
15471   ins_encode %{
15472     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
15473             as_FloatRegister($src$$reg),
15474             -(int)$shift$$constant & 31);
15475   %}
15476   ins_pipe(vshift128_imm);
15477 %}
15478 
15479 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
15480   predicate(n->as_Vector()->length() == 2);
15481   match(Set dst (URShiftVI src shift));
15482   ins_cost(INSN_COST);
15483   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
15484   ins_encode %{
15485     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
15486             as_FloatRegister($src$$reg),
15487             -(int)$shift$$constant & 31);
15488   %}
15489   ins_pipe(vshift64_imm);
15490 %}
15491 
15492 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
15493   predicate(n->as_Vector()->length() == 4);
15494   match(Set dst (URShiftVI src shift));
15495   ins_cost(INSN_COST);
15496   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
15497   ins_encode %{
15498     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
15499             as_FloatRegister($src$$reg),
15500             -(int)$shift$$constant & 31);
15501   %}
15502   ins_pipe(vshift128_imm);
15503 %}
15504 
15505 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
15506   predicate(n->as_Vector()->length() == 2);
15507   match(Set dst (LShiftVL src shift));
15508   ins_cost(INSN_COST);
15509   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
15510   ins_encode %{
15511     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
15512             as_FloatRegister($src$$reg),
15513             as_FloatRegister($shift$$reg));
15514   %}
15515   ins_pipe(vshift128);
15516 %}
15517 
15518 instruct vsra2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
15519   predicate(n->as_Vector()->length() == 2);
15520   match(Set dst (RShiftVL src shift));
15521   ins_cost(INSN_COST);
15522   effect(TEMP tmp);
15523   format %{ "negr  $tmp,$shift\t"
15524             "sshl  $dst,$src,$tmp\t# vector (2D)" %}
15525   ins_encode %{
15526     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15527             as_FloatRegister($shift$$reg));
15528     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
15529             as_FloatRegister($src$$reg),
15530             as_FloatRegister($tmp$$reg));
15531   %}
15532   ins_pipe(vshift128);
15533 %}
15534 
15535 instruct vsrl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
15536   predicate(n->as_Vector()->length() == 2);
15537   match(Set dst (URShiftVL src shift));
15538   ins_cost(INSN_COST);
15539   effect(TEMP tmp);
15540   format %{ "negr  $tmp,$shift\t"
15541             "ushl  $dst,$src,$tmp\t# vector (2D)" %}
15542   ins_encode %{
15543     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15544             as_FloatRegister($shift$$reg));
15545     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
15546             as_FloatRegister($src$$reg),
15547             as_FloatRegister($tmp$$reg));
15548   %}
15549   ins_pipe(vshift128);
15550 %}
15551 
15552 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
15553   predicate(n->as_Vector()->length() == 2);
15554   match(Set dst (LShiftVL src shift));
15555   ins_cost(INSN_COST);
15556   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
15557   ins_encode %{
15558     __ shl(as_FloatRegister($dst$$reg), __ T2D,
15559            as_FloatRegister($src$$reg),
15560            (int)$shift$$constant & 63);
15561   %}
15562   ins_pipe(vshift128_imm);
15563 %}
15564 
15565 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
15566   predicate(n->as_Vector()->length() == 2);
15567   match(Set dst (RShiftVL src shift));
15568   ins_cost(INSN_COST);
15569   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
15570   ins_encode %{
15571     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
15572             as_FloatRegister($src$$reg),
15573             -(int)$shift$$constant & 63);
15574   %}
15575   ins_pipe(vshift128_imm);
15576 %}
15577 
15578 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
15579   predicate(n->as_Vector()->length() == 2);
15580   match(Set dst (URShiftVL src shift));
15581   ins_cost(INSN_COST);
15582   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
15583   ins_encode %{
15584     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
15585             as_FloatRegister($src$$reg),
15586             -(int)$shift$$constant & 63);
15587   %}
15588   ins_pipe(vshift128_imm);
15589 %}
15590 
15591 //----------PEEPHOLE RULES-----------------------------------------------------
15592 // These must follow all instruction definitions as they use the names
15593 // defined in the instructions definitions.
15594 //
15595 // peepmatch ( root_instr_name [preceding_instruction]* );
15596 //
15597 // peepconstraint %{
15598 // (instruction_number.operand_name relational_op instruction_number.operand_name
15599 //  [, ...] );
15600 // // instruction numbers are zero-based using left to right order in peepmatch
15601 //
15602 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
15603 // // provide an instruction_number.operand_name for each operand that appears
15604 // // in the replacement instruction's match rule
15605 //
15606 // ---------VM FLAGS---------------------------------------------------------
15607 //
15608 // All peephole optimizations can be turned off using -XX:-OptoPeephole
15609 //
15610 // Each peephole rule is given an identifying number starting with zero and
15611 // increasing by one in the order seen by the parser.  An individual peephole
15612 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
15613 // on the command-line.
15614 //
15615 // ---------CURRENT LIMITATIONS----------------------------------------------
15616 //
15617 // Only match adjacent instructions in same basic block
15618 // Only equality constraints
15619 // Only constraints between operands, not (0.dest_reg == RAX_enc)
15620 // Only one replacement instruction
15621 //
15622 // ---------EXAMPLE----------------------------------------------------------
15623 //
15624 // // pertinent parts of existing instructions in architecture description
15625 // instruct movI(iRegINoSp dst, iRegI src)
15626 // %{
15627 //   match(Set dst (CopyI src));
15628 // %}
15629 //
15630 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
15631 // %{
15632 //   match(Set dst (AddI dst src));
15633 //   effect(KILL cr);
15634 // %}
15635 //
15636 // // Change (inc mov) to lea
15637 // peephole %{
15638 //   // increment preceeded by register-register move
15639 //   peepmatch ( incI_iReg movI );
15640 //   // require that the destination register of the increment
15641 //   // match the destination register of the move
15642 //   peepconstraint ( 0.dst == 1.dst );
15643 //   // construct a replacement instruction that sets
15644 //   // the destination to ( move's source register + one )
15645 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
15646 // %}
15647 //
15648 
15649 // Implementation no longer uses movX instructions since
15650 // machine-independent system no longer uses CopyX nodes.
15651 //
15652 // peephole
15653 // %{
15654 //   peepmatch (incI_iReg movI);
15655 //   peepconstraint (0.dst == 1.dst);
15656 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15657 // %}
15658 
15659 // peephole
15660 // %{
15661 //   peepmatch (decI_iReg movI);
15662 //   peepconstraint (0.dst == 1.dst);
15663 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15664 // %}
15665 
15666 // peephole
15667 // %{
15668 //   peepmatch (addI_iReg_imm movI);
15669 //   peepconstraint (0.dst == 1.dst);
15670 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15671 // %}
15672 
15673 // peephole
15674 // %{
15675 //   peepmatch (incL_iReg movL);
15676 //   peepconstraint (0.dst == 1.dst);
15677 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15678 // %}
15679 
15680 // peephole
15681 // %{
15682 //   peepmatch (decL_iReg movL);
15683 //   peepconstraint (0.dst == 1.dst);
15684 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15685 // %}
15686 
15687 // peephole
15688 // %{
15689 //   peepmatch (addL_iReg_imm movL);
15690 //   peepconstraint (0.dst == 1.dst);
15691 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15692 // %}
15693 
15694 // peephole
15695 // %{
15696 //   peepmatch (addP_iReg_imm movP);
15697 //   peepconstraint (0.dst == 1.dst);
15698 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
15699 // %}
15700 
15701 // // Change load of spilled value to only a spill
15702 // instruct storeI(memory mem, iRegI src)
15703 // %{
15704 //   match(Set mem (StoreI mem src));
15705 // %}
15706 //
15707 // instruct loadI(iRegINoSp dst, memory mem)
15708 // %{
15709 //   match(Set dst (LoadI mem));
15710 // %}
15711 //
15712 
15713 //----------SMARTSPILL RULES---------------------------------------------------
15714 // These must follow all instruction definitions as they use the names
15715 // defined in the instructions definitions.
15716 
15717 // Local Variables:
15718 // mode: c++
15719 // End: