1 //
   2 // Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, 2019, Red Hat Inc.
   4 // All rights reserved.
   5 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6 //
   7 // This code is free software; you can redistribute it and/or modify it
   8 // under the terms of the GNU General Public License version 2 only, as
   9 // published by the Free Software Foundation.
  10 //
  11 // This code is distributed in the hope that it will be useful, but WITHOUT
  12 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 // version 2 for more details (a copy is included in the LICENSE file that
  15 // accompanied this code).
  16 //
  17 // You should have received a copy of the GNU General Public License version
  18 // 2 along with this work; if not, write to the Free Software Foundation,
  19 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20 //
  21 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22 // or visit www.oracle.com if you need additional information or have any
  23 // questions.
  24 //
  25 //
  26 
  27 // AArch64 Architecture Description File
  28 
  29 //----------REGISTER DEFINITION BLOCK------------------------------------------
  30 // This information is used by the matcher and the register allocator to
  31 // describe individual registers and classes of registers within the target
  32 // archtecture.
  33 
  34 register %{
  35 //----------Architecture Description Register Definitions----------------------
  36 // General Registers
  37 // "reg_def"  name ( register save type, C convention save type,
  38 //                   ideal register type, encoding );
  39 // Register Save Types:
  40 //
  41 // NS  = No-Save:       The register allocator assumes that these registers
  42 //                      can be used without saving upon entry to the method, &
  43 //                      that they do not need to be saved at call sites.
  44 //
  45 // SOC = Save-On-Call:  The register allocator assumes that these registers
  46 //                      can be used without saving upon entry to the method,
  47 //                      but that they must be saved at call sites.
  48 //
  49 // SOE = Save-On-Entry: The register allocator assumes that these registers
  50 //                      must be saved before using them upon entry to the
  51 //                      method, but they do not need to be saved at call
  52 //                      sites.
  53 //
  54 // AS  = Always-Save:   The register allocator assumes that these registers
  55 //                      must be saved before using them upon entry to the
  56 //                      method, & that they must be saved at call sites.
  57 //
  58 // Ideal Register Type is used to determine how to save & restore a
  59 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  60 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  61 //
  62 // The encoding number is the actual bit-pattern placed into the opcodes.
  63 
  64 // We must define the 64 bit int registers in two 32 bit halves, the
  65 // real lower register and a virtual upper half register. upper halves
  66 // are used by the register allocator but are not actually supplied as
  67 // operands to memory ops.
  68 //
  69 // follow the C1 compiler in making registers
  70 //
  71 //   r0-r7,r10-r26 volatile (caller save)
  72 //   r27-r32 system (no save, no allocate)
  73 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  74 //
  75 // as regards Java usage. we don't use any callee save registers
  76 // because this makes it difficult to de-optimise a frame (see comment
  77 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  78 //
  79 
  80 // General Registers
  81 
  82 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  83 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  84 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  85 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  86 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  87 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  88 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  89 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  90 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  91 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  92 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  93 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  94 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  95 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  96 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  97 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  98 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  99 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 100 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 101 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 102 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 103 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 104 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 105 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 106 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 107 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 108 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 109 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 110 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 111 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 112 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 113 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 114 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 115 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 116 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 117 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 118 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 119 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 120 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 121 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 122 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 123 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 124 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 125 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 126 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 127 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 128 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 129 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 130 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 131 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 132 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 133 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());  
 134 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 135 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 136 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 137 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 138 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 139 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 140 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 141 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 142 
 143 // ----------------------------
 144 // Float/Double Registers
 145 // ----------------------------
 146 
 147 // Double Registers
 148 
 149 // The rules of ADL require that double registers be defined in pairs.
 150 // Each pair must be two 32-bit values, but not necessarily a pair of
 151 // single float registers. In each pair, ADLC-assigned register numbers
 152 // must be adjacent, with the lower number even. Finally, when the
 153 // CPU stores such a register pair to memory, the word associated with
 154 // the lower ADLC-assigned number must be stored to the lower address.
 155 
 156 // AArch64 has 32 floating-point registers. Each can store a vector of
 157 // single or double precision floating-point values up to 8 * 32
 158 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 159 // use the first float or double element of the vector.
 160 
 161 // for Java use float registers v0-v15 are always save on call whereas
 162 // the platform ABI treats v8-v15 as callee save). float registers
 163 // v16-v31 are SOC as per the platform spec
 164 
 165   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 166   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 167   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 168   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 169 
 170   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 171   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 172   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 173   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 174 
 175   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 176   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 177   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 178   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 179 
 180   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 181   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 182   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 183   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 184 
 185   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 186   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 187   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 188   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 189 
 190   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 191   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 192   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 193   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 194 
 195   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 196   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 197   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 198   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 199 
 200   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 201   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 202   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 203   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 204 
 205   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 206   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 207   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 208   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 209 
 210   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 211   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 212   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 213   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 214 
 215   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 216   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 217   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 218   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 219 
 220   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 221   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 222   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 223   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 224 
 225   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 226   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 227   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 228   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 229 
 230   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 231   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 232   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 233   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 234 
 235   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 236   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 237   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 238   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 239 
 240   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 241   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 242   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 243   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 244 
 245   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 246   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 247   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 248   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 249 
 250   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 251   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 252   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 253   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 254 
 255   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 256   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 257   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 258   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 259 
 260   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 261   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 262   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 263   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 264 
 265   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 266   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 267   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 268   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 269 
 270   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 271   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 272   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 273   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 274 
 275   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 276   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 277   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 278   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 279 
 280   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 281   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 282   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 283   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 284 
 285   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 286   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 287   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 288   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 289 
 290   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 291   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 292   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 293   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 294 
 295   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 296   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 297   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 298   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 299 
 300   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 301   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 302   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 303   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 304 
 305   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 306   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 307   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 308   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 309 
 310   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 311   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 312   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 313   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 314 
 315   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 316   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 317   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 318   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 319 
 320   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 321   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 322   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 323   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 324 
 325 // ----------------------------
 326 // Special Registers
 327 // ----------------------------
 328 
 329 // the AArch64 CSPR status flag register is not directly acessible as
 330 // instruction operand. the FPSR status flag register is a system
 331 // register which can be written/read using MSR/MRS but again does not
 332 // appear as an operand (a code identifying the FSPR occurs as an
 333 // immediate value in the instruction).
 334 
 335 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 336 
 337 
 338 // Specify priority of register selection within phases of register
 339 // allocation.  Highest priority is first.  A useful heuristic is to
 340 // give registers a low priority when they are required by machine
 341 // instructions, like EAX and EDX on I486, and choose no-save registers
 342 // before save-on-call, & save-on-call before save-on-entry.  Registers
 343 // which participate in fixed calling sequences should come last.
 344 // Registers which are used as pairs must fall on an even boundary.
 345 
 346 alloc_class chunk0(
 347     // volatiles
 348     R10, R10_H,
 349     R11, R11_H,
 350     R12, R12_H,
 351     R13, R13_H,
 352     R14, R14_H,
 353     R15, R15_H,
 354     R16, R16_H,
 355     R17, R17_H,
 356     R18, R18_H,
 357 
 358     // arg registers
 359     R0, R0_H,
 360     R1, R1_H,
 361     R2, R2_H,
 362     R3, R3_H,
 363     R4, R4_H,
 364     R5, R5_H,
 365     R6, R6_H,
 366     R7, R7_H,
 367 
 368     // non-volatiles
 369     R19, R19_H,
 370     R20, R20_H,
 371     R21, R21_H,
 372     R22, R22_H,
 373     R23, R23_H,
 374     R24, R24_H,
 375     R25, R25_H,
 376     R26, R26_H,
 377     
 378     // non-allocatable registers
 379 
 380     R27, R27_H, // heapbase
 381     R28, R28_H, // thread
 382     R29, R29_H, // fp
 383     R30, R30_H, // lr
 384     R31, R31_H, // sp
 385 );
 386 
 387 alloc_class chunk1(
 388 
 389     // no save
 390     V16, V16_H, V16_J, V16_K,
 391     V17, V17_H, V17_J, V17_K,
 392     V18, V18_H, V18_J, V18_K,
 393     V19, V19_H, V19_J, V19_K,
 394     V20, V20_H, V20_J, V20_K,
 395     V21, V21_H, V21_J, V21_K,
 396     V22, V22_H, V22_J, V22_K,
 397     V23, V23_H, V23_J, V23_K,
 398     V24, V24_H, V24_J, V24_K,
 399     V25, V25_H, V25_J, V25_K,
 400     V26, V26_H, V26_J, V26_K,
 401     V27, V27_H, V27_J, V27_K,
 402     V28, V28_H, V28_J, V28_K,
 403     V29, V29_H, V29_J, V29_K,
 404     V30, V30_H, V30_J, V30_K,
 405     V31, V31_H, V31_J, V31_K,
 406 
 407     // arg registers
 408     V0, V0_H, V0_J, V0_K,
 409     V1, V1_H, V1_J, V1_K,
 410     V2, V2_H, V2_J, V2_K,
 411     V3, V3_H, V3_J, V3_K,
 412     V4, V4_H, V4_J, V4_K,
 413     V5, V5_H, V5_J, V5_K,
 414     V6, V6_H, V6_J, V6_K,
 415     V7, V7_H, V7_J, V7_K,
 416 
 417     // non-volatiles
 418     V8, V8_H, V8_J, V8_K,
 419     V9, V9_H, V9_J, V9_K,
 420     V10, V10_H, V10_J, V10_K,
 421     V11, V11_H, V11_J, V11_K,
 422     V12, V12_H, V12_J, V12_K,
 423     V13, V13_H, V13_J, V13_K,
 424     V14, V14_H, V14_J, V14_K,
 425     V15, V15_H, V15_J, V15_K,
 426 );
 427 
 428 alloc_class chunk2(RFLAGS);
 429 
 430 //----------Architecture Description Register Classes--------------------------
 431 // Several register classes are automatically defined based upon information in
 432 // this architecture description.
 433 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 434 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 435 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 436 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 437 //
 438 
 439 // Class for all 32 bit integer registers -- excludes SP which will
 440 // never be used as an integer register
 441 reg_class any_reg32(
 442     R0,
 443     R1,
 444     R2,
 445     R3,
 446     R4,
 447     R5,
 448     R6,
 449     R7,
 450     R10,
 451     R11,
 452     R12,
 453     R13,
 454     R14,
 455     R15,
 456     R16,
 457     R17,
 458     R18,
 459     R19,
 460     R20,
 461     R21,
 462     R22,
 463     R23,
 464     R24,
 465     R25,
 466     R26,
 467     R27,
 468     R28,
 469     R29,
 470     R30
 471 );
 472 
 473 // Singleton class for R0 int register
 474 reg_class int_r0_reg(R0);
 475 
 476 // Singleton class for R2 int register
 477 reg_class int_r2_reg(R2);
 478 
 479 // Singleton class for R3 int register
 480 reg_class int_r3_reg(R3);
 481 
 482 // Singleton class for R4 int register
 483 reg_class int_r4_reg(R4);
 484 
 485 // Class for all long integer registers (including RSP)
 486 reg_class any_reg(
 487     R0, R0_H,
 488     R1, R1_H,
 489     R2, R2_H,
 490     R3, R3_H,
 491     R4, R4_H,
 492     R5, R5_H,
 493     R6, R6_H,
 494     R7, R7_H,
 495     R10, R10_H,
 496     R11, R11_H,
 497     R12, R12_H,
 498     R13, R13_H,
 499     R14, R14_H,
 500     R15, R15_H,
 501     R16, R16_H,
 502     R17, R17_H,
 503     R18, R18_H,
 504     R19, R19_H,
 505     R20, R20_H,
 506     R21, R21_H,
 507     R22, R22_H,
 508     R23, R23_H,
 509     R24, R24_H,
 510     R25, R25_H,
 511     R26, R26_H,
 512     R27, R27_H,
 513     R28, R28_H,
 514     R29, R29_H,
 515     R30, R30_H,
 516     R31, R31_H
 517 );
 518 
 519 // Class for all non-special integer registers
 520 reg_class no_special_reg32(
 521     R0,
 522     R1,
 523     R2,
 524     R3,
 525     R4,
 526     R5,
 527     R6,
 528     R7,
 529     R10,
 530     R11,
 531     R12,                        // rmethod
 532     R13,
 533     R14,
 534     R15,
 535     R16,
 536     R17,
 537     R18,
 538     R19,
 539     R20,
 540     R21,
 541     R22,
 542     R23,
 543     R24,
 544     R25,
 545     R26
 546  /* R27, */                     // heapbase
 547  /* R28, */                     // thread
 548  /* R29, */                     // fp
 549  /* R30, */                     // lr
 550  /* R31 */                      // sp
 551 );
 552 
 553 // Class for all non-special long integer registers
 554 reg_class no_special_reg(
 555     R0, R0_H,
 556     R1, R1_H,
 557     R2, R2_H,
 558     R3, R3_H,
 559     R4, R4_H,
 560     R5, R5_H,
 561     R6, R6_H,
 562     R7, R7_H,
 563     R10, R10_H,
 564     R11, R11_H,
 565     R12, R12_H,                 // rmethod
 566     R13, R13_H,
 567     R14, R14_H,
 568     R15, R15_H,
 569     R16, R16_H,
 570     R17, R17_H,
 571     R18, R18_H,
 572     R19, R19_H,
 573     R20, R20_H,
 574     R21, R21_H,
 575     R22, R22_H,
 576     R23, R23_H,
 577     R24, R24_H,
 578     R25, R25_H,
 579     R26, R26_H,
 580  /* R27, R27_H, */              // heapbase
 581  /* R28, R28_H, */              // thread
 582  /* R29, R29_H, */              // fp
 583  /* R30, R30_H, */              // lr
 584  /* R31, R31_H */               // sp
 585 );
 586 
 587 // Class for 64 bit register r0
 588 reg_class r0_reg(
 589     R0, R0_H
 590 );
 591 
 592 // Class for 64 bit register r1
 593 reg_class r1_reg(
 594     R1, R1_H
 595 );
 596 
 597 // Class for 64 bit register r2
 598 reg_class r2_reg(
 599     R2, R2_H
 600 );
 601 
 602 // Class for 64 bit register r3
 603 reg_class r3_reg(
 604     R3, R3_H
 605 );
 606 
 607 // Class for 64 bit register r4
 608 reg_class r4_reg(
 609     R4, R4_H
 610 );
 611 
 612 // Class for 64 bit register r5
 613 reg_class r5_reg(
 614     R5, R5_H
 615 );
 616 
 617 // Class for 64 bit register r10
 618 reg_class r10_reg(
 619     R10, R10_H
 620 );
 621 
 622 // Class for 64 bit register r11
 623 reg_class r11_reg(
 624     R11, R11_H
 625 );
 626 
 627 // Class for method register
 628 reg_class method_reg(
 629     R12, R12_H
 630 );
 631 
 632 // Class for heapbase register
 633 reg_class heapbase_reg(
 634     R27, R27_H
 635 );
 636 
 637 // Class for thread register
 638 reg_class thread_reg(
 639     R28, R28_H
 640 );
 641 
 642 // Class for frame pointer register
 643 reg_class fp_reg(
 644     R29, R29_H
 645 );
 646 
 647 // Class for link register
 648 reg_class lr_reg(
 649     R30, R30_H
 650 );
 651 
 652 // Class for long sp register
 653 reg_class sp_reg(
 654   R31, R31_H
 655 );
 656 
 657 // Class for all pointer registers
 658 reg_class ptr_reg(
 659     R0, R0_H,
 660     R1, R1_H,
 661     R2, R2_H,
 662     R3, R3_H,
 663     R4, R4_H,
 664     R5, R5_H,
 665     R6, R6_H,
 666     R7, R7_H,
 667     R10, R10_H,
 668     R11, R11_H,
 669     R12, R12_H,
 670     R13, R13_H,
 671     R14, R14_H,
 672     R15, R15_H,
 673     R16, R16_H,
 674     R17, R17_H,
 675     R18, R18_H,
 676     R19, R19_H,
 677     R20, R20_H,
 678     R21, R21_H,
 679     R22, R22_H,
 680     R23, R23_H,
 681     R24, R24_H,
 682     R25, R25_H,
 683     R26, R26_H,
 684     R27, R27_H,
 685     R28, R28_H,
 686     R29, R29_H,
 687     R30, R30_H,
 688     R31, R31_H
 689 );
 690 
 691 // Class for all non_special pointer registers
 692 reg_class no_special_ptr_reg(
 693     R0, R0_H,
 694     R1, R1_H,
 695     R2, R2_H,
 696     R3, R3_H,
 697     R4, R4_H,
 698     R5, R5_H,
 699     R6, R6_H,
 700     R7, R7_H,
 701     R10, R10_H,
 702     R11, R11_H,
 703     R12, R12_H,
 704     R13, R13_H,
 705     R14, R14_H,
 706     R15, R15_H,
 707     R16, R16_H,
 708     R17, R17_H,
 709     R18, R18_H,
 710     R19, R19_H,
 711     R20, R20_H,
 712     R21, R21_H,
 713     R22, R22_H,
 714     R23, R23_H,
 715     R24, R24_H,
 716     R25, R25_H,
 717     R26, R26_H,
 718  /* R27, R27_H, */              // heapbase
 719  /* R28, R28_H, */              // thread
 720  /* R29, R29_H, */              // fp
 721  /* R30, R30_H, */              // lr
 722  /* R31, R31_H */               // sp
 723 );
 724 
 725 // Class for all float registers
 726 reg_class float_reg(
 727     V0,
 728     V1,
 729     V2,
 730     V3,
 731     V4,
 732     V5,
 733     V6,
 734     V7,
 735     V8,
 736     V9,
 737     V10,
 738     V11,
 739     V12,
 740     V13,
 741     V14,
 742     V15,
 743     V16,
 744     V17,
 745     V18,
 746     V19,
 747     V20,
 748     V21,
 749     V22,
 750     V23,
 751     V24,
 752     V25,
 753     V26,
 754     V27,
 755     V28,
 756     V29,
 757     V30,
 758     V31
 759 );
 760 
 761 // Double precision float registers have virtual `high halves' that
 762 // are needed by the allocator.
 763 // Class for all double registers
 764 reg_class double_reg(
 765     V0, V0_H, 
 766     V1, V1_H, 
 767     V2, V2_H, 
 768     V3, V3_H, 
 769     V4, V4_H, 
 770     V5, V5_H, 
 771     V6, V6_H, 
 772     V7, V7_H, 
 773     V8, V8_H, 
 774     V9, V9_H, 
 775     V10, V10_H, 
 776     V11, V11_H, 
 777     V12, V12_H, 
 778     V13, V13_H, 
 779     V14, V14_H, 
 780     V15, V15_H, 
 781     V16, V16_H, 
 782     V17, V17_H, 
 783     V18, V18_H, 
 784     V19, V19_H, 
 785     V20, V20_H, 
 786     V21, V21_H, 
 787     V22, V22_H, 
 788     V23, V23_H, 
 789     V24, V24_H, 
 790     V25, V25_H, 
 791     V26, V26_H, 
 792     V27, V27_H, 
 793     V28, V28_H, 
 794     V29, V29_H, 
 795     V30, V30_H, 
 796     V31, V31_H
 797 );
 798 
 799 // Class for all 64bit vector registers
 800 reg_class vectord_reg(
 801     V0, V0_H,
 802     V1, V1_H,
 803     V2, V2_H,
 804     V3, V3_H,
 805     V4, V4_H,
 806     V5, V5_H,
 807     V6, V6_H,
 808     V7, V7_H,
 809     V8, V8_H,
 810     V9, V9_H,
 811     V10, V10_H,
 812     V11, V11_H,
 813     V12, V12_H,
 814     V13, V13_H,
 815     V14, V14_H,
 816     V15, V15_H,
 817     V16, V16_H,
 818     V17, V17_H,
 819     V18, V18_H,
 820     V19, V19_H,
 821     V20, V20_H,
 822     V21, V21_H,
 823     V22, V22_H,
 824     V23, V23_H,
 825     V24, V24_H,
 826     V25, V25_H,
 827     V26, V26_H,
 828     V27, V27_H,
 829     V28, V28_H,
 830     V29, V29_H,
 831     V30, V30_H,
 832     V31, V31_H
 833 );
 834 
 835 // Class for all 128bit vector registers
 836 reg_class vectorx_reg(
 837     V0, V0_H, V0_J, V0_K,
 838     V1, V1_H, V1_J, V1_K,
 839     V2, V2_H, V2_J, V2_K,
 840     V3, V3_H, V3_J, V3_K,
 841     V4, V4_H, V4_J, V4_K,
 842     V5, V5_H, V5_J, V5_K,
 843     V6, V6_H, V6_J, V6_K,
 844     V7, V7_H, V7_J, V7_K,
 845     V8, V8_H, V8_J, V8_K,
 846     V9, V9_H, V9_J, V9_K,
 847     V10, V10_H, V10_J, V10_K,
 848     V11, V11_H, V11_J, V11_K,
 849     V12, V12_H, V12_J, V12_K,
 850     V13, V13_H, V13_J, V13_K,
 851     V14, V14_H, V14_J, V14_K,
 852     V15, V15_H, V15_J, V15_K,
 853     V16, V16_H, V16_J, V16_K,
 854     V17, V17_H, V17_J, V17_K,
 855     V18, V18_H, V18_J, V18_K,
 856     V19, V19_H, V19_J, V19_K,
 857     V20, V20_H, V20_J, V20_K,
 858     V21, V21_H, V21_J, V21_K,
 859     V22, V22_H, V22_J, V22_K,
 860     V23, V23_H, V23_J, V23_K,
 861     V24, V24_H, V24_J, V24_K,
 862     V25, V25_H, V25_J, V25_K,
 863     V26, V26_H, V26_J, V26_K,
 864     V27, V27_H, V27_J, V27_K,
 865     V28, V28_H, V28_J, V28_K,
 866     V29, V29_H, V29_J, V29_K,
 867     V30, V30_H, V30_J, V30_K,
 868     V31, V31_H, V31_J, V31_K
 869 );
 870 
 871 // Class for 128 bit register v0
 872 reg_class v0_reg(
 873     V0, V0_H
 874 );
 875 
 876 // Class for 128 bit register v1
 877 reg_class v1_reg(
 878     V1, V1_H
 879 );
 880 
 881 // Class for 128 bit register v2
 882 reg_class v2_reg(
 883     V2, V2_H
 884 );
 885 
 886 // Class for 128 bit register v3
 887 reg_class v3_reg(
 888     V3, V3_H
 889 );
 890 
 891 // Singleton class for condition codes
 892 reg_class int_flags(RFLAGS);
 893 
 894 %}
 895 
 896 //----------DEFINITION BLOCK---------------------------------------------------
 897 // Define name --> value mappings to inform the ADLC of an integer valued name
 898 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 899 // Format:
 900 //        int_def  <name>         ( <int_value>, <expression>);
 901 // Generated Code in ad_<arch>.hpp
 902 //        #define  <name>   (<expression>)
 903 //        // value == <int_value>
 904 // Generated code in ad_<arch>.cpp adlc_verification()
 905 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 906 //
 907 
 908 // we follow the ppc-aix port in using a simple cost model which ranks
 909 // register operations as cheap, memory ops as more expensive and
 910 // branches as most expensive. the first two have a low as well as a
 911 // normal cost. huge cost appears to be a way of saying don't do
 912 // something
 913 
 914 definitions %{
 915   // The default cost (of a register move instruction).
 916   int_def INSN_COST            (    100,     100);
 917   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 918   int_def CALL_COST            (    200,     2 * INSN_COST);
 919   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 920 %}
 921 
 922 
 923 //----------SOURCE BLOCK-------------------------------------------------------
 924 // This is a block of C++ code which provides values, functions, and
 925 // definitions necessary in the rest of the architecture description
 926 
 927 source_hpp %{
 928 
 929 #include "opto/addnode.hpp"
 930 #if INCLUDE_ALL_GCS
 931 #include "shenandoahBarrierSetAssembler_aarch64.hpp"
 932 #endif
 933 
 934 class CallStubImpl {
 935  
 936   //--------------------------------------------------------------
 937   //---<  Used for optimization in Compile::shorten_branches  >---
 938   //--------------------------------------------------------------
 939 
 940  public:
 941   // Size of call trampoline stub.
 942   static uint size_call_trampoline() {
 943     return 0; // no call trampolines on this platform
 944   }
 945   
 946   // number of relocations needed by a call trampoline stub
 947   static uint reloc_call_trampoline() { 
 948     return 0; // no call trampolines on this platform
 949   }
 950 };
 951 
 952 class HandlerImpl {
 953 
 954  public:
 955 
 956   static int emit_exception_handler(CodeBuffer &cbuf);
 957   static int emit_deopt_handler(CodeBuffer& cbuf);
 958 
 959   static uint size_exception_handler() {
 960     return MacroAssembler::far_branch_size();
 961   }
 962 
 963   static uint size_deopt_handler() {
 964     // count one adr and one far branch instruction
 965     // return 4 * NativeInstruction::instruction_size;
 966     return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
 967   }
 968 };
 969 
 970   bool is_CAS(int opcode);
 971 
 972   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
 973 
 974   bool unnecessary_acquire(const Node *barrier);
 975   bool needs_acquiring_load(const Node *load);
 976 
 977   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
 978 
 979   bool unnecessary_release(const Node *barrier);
 980   bool unnecessary_volatile(const Node *barrier);
 981   bool needs_releasing_store(const Node *store);
 982 
 983   // predicate controlling translation of CompareAndSwapX
 984   bool needs_acquiring_load_exclusive(const Node *load);
 985 
 986   // predicate controlling translation of StoreCM
 987   bool unnecessary_storestore(const Node *storecm);
 988 
 989   // predicate controlling addressing modes
 990   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
 991 %}
 992 
 993 source %{
 994 
 995   // Optimizaton of volatile gets and puts
 996   // -------------------------------------
 997   //
 998   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
 999   // use to implement volatile reads and writes. For a volatile read
1000   // we simply need
1001   //
1002   //   ldar<x>
1003   //
1004   // and for a volatile write we need
1005   //
1006   //   stlr<x>
1007   // 
1008   // Alternatively, we can implement them by pairing a normal
1009   // load/store with a memory barrier. For a volatile read we need
1010   // 
1011   //   ldr<x>
1012   //   dmb ishld
1013   //
1014   // for a volatile write
1015   //
1016   //   dmb ish
1017   //   str<x>
1018   //   dmb ish
1019   //
1020   // We can also use ldaxr and stlxr to implement compare and swap CAS
1021   // sequences. These are normally translated to an instruction
1022   // sequence like the following
1023   //
1024   //   dmb      ish
1025   // retry:
1026   //   ldxr<x>   rval raddr
1027   //   cmp       rval rold
1028   //   b.ne done
1029   //   stlxr<x>  rval, rnew, rold
1030   //   cbnz      rval retry
1031   // done:
1032   //   cset      r0, eq
1033   //   dmb ishld
1034   //
1035   // Note that the exclusive store is already using an stlxr
1036   // instruction. That is required to ensure visibility to other
1037   // threads of the exclusive write (assuming it succeeds) before that
1038   // of any subsequent writes.
1039   //
1040   // The following instruction sequence is an improvement on the above
1041   //
1042   // retry:
1043   //   ldaxr<x>  rval raddr
1044   //   cmp       rval rold
1045   //   b.ne done
1046   //   stlxr<x>  rval, rnew, rold
1047   //   cbnz      rval retry
1048   // done:
1049   //   cset      r0, eq
1050   //
1051   // We don't need the leading dmb ish since the stlxr guarantees
1052   // visibility of prior writes in the case that the swap is
1053   // successful. Crucially we don't have to worry about the case where
1054   // the swap is not successful since no valid program should be
1055   // relying on visibility of prior changes by the attempting thread
1056   // in the case where the CAS fails.
1057   //
1058   // Similarly, we don't need the trailing dmb ishld if we substitute
1059   // an ldaxr instruction since that will provide all the guarantees we
1060   // require regarding observation of changes made by other threads
1061   // before any change to the CAS address observed by the load.
1062   //
1063   // In order to generate the desired instruction sequence we need to
1064   // be able to identify specific 'signature' ideal graph node
1065   // sequences which i) occur as a translation of a volatile reads or
1066   // writes or CAS operations and ii) do not occur through any other
1067   // translation or graph transformation. We can then provide
1068   // alternative aldc matching rules which translate these node
1069   // sequences to the desired machine code sequences. Selection of the
1070   // alternative rules can be implemented by predicates which identify
1071   // the relevant node sequences.
1072   //
1073   // The ideal graph generator translates a volatile read to the node
1074   // sequence
1075   //
1076   //   LoadX[mo_acquire]
1077   //   MemBarAcquire
1078   //
1079   // As a special case when using the compressed oops optimization we
1080   // may also see this variant
1081   //
1082   //   LoadN[mo_acquire]
1083   //   DecodeN
1084   //   MemBarAcquire
1085   //
1086   // A volatile write is translated to the node sequence
1087   //
1088   //   MemBarRelease
1089   //   StoreX[mo_release] {CardMark}-optional
1090   //   MemBarVolatile
1091   //
1092   // n.b. the above node patterns are generated with a strict
1093   // 'signature' configuration of input and output dependencies (see
1094   // the predicates below for exact details). The card mark may be as
1095   // simple as a few extra nodes or, in a few GC configurations, may
1096   // include more complex control flow between the leading and
1097   // trailing memory barriers. However, whatever the card mark
1098   // configuration these signatures are unique to translated volatile
1099   // reads/stores -- they will not appear as a result of any other
1100   // bytecode translation or inlining nor as a consequence of
1101   // optimizing transforms.
1102   //
1103   // We also want to catch inlined unsafe volatile gets and puts and
1104   // be able to implement them using either ldar<x>/stlr<x> or some
1105   // combination of ldr<x>/stlr<x> and dmb instructions.
1106   //
1107   // Inlined unsafe volatiles puts manifest as a minor variant of the
1108   // normal volatile put node sequence containing an extra cpuorder
1109   // membar
1110   //
1111   //   MemBarRelease
1112   //   MemBarCPUOrder
1113   //   StoreX[mo_release] {CardMark}-optional
1114   //   MemBarVolatile
1115   //
1116   // n.b. as an aside, the cpuorder membar is not itself subject to
1117   // matching and translation by adlc rules.  However, the rule
1118   // predicates need to detect its presence in order to correctly
1119   // select the desired adlc rules.
1120   //
1121   // Inlined unsafe volatile gets manifest as a somewhat different
1122   // node sequence to a normal volatile get
1123   //
1124   //   MemBarCPUOrder
1125   //        ||       \\
1126   //   MemBarAcquire LoadX[mo_acquire]
1127   //        ||
1128   //   MemBarCPUOrder
1129   //
1130   // In this case the acquire membar does not directly depend on the
1131   // load. However, we can be sure that the load is generated from an
1132   // inlined unsafe volatile get if we see it dependent on this unique
1133   // sequence of membar nodes. Similarly, given an acquire membar we
1134   // can know that it was added because of an inlined unsafe volatile
1135   // get if it is fed and feeds a cpuorder membar and if its feed
1136   // membar also feeds an acquiring load.
1137   //
1138   // Finally an inlined (Unsafe) CAS operation is translated to the
1139   // following ideal graph
1140   //
1141   //   MemBarRelease
1142   //   MemBarCPUOrder
1143   //   CompareAndSwapX {CardMark}-optional
1144   //   MemBarCPUOrder
1145   //   MemBarAcquire
1146   //
1147   // So, where we can identify these volatile read and write
1148   // signatures we can choose to plant either of the above two code
1149   // sequences. For a volatile read we can simply plant a normal
1150   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1151   // also choose to inhibit translation of the MemBarAcquire and
1152   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1153   //
1154   // When we recognise a volatile store signature we can choose to
1155   // plant at a dmb ish as a translation for the MemBarRelease, a
1156   // normal str<x> and then a dmb ish for the MemBarVolatile.
1157   // Alternatively, we can inhibit translation of the MemBarRelease
1158   // and MemBarVolatile and instead plant a simple stlr<x>
1159   // instruction.
1160   //
1161   // when we recognise a CAS signature we can choose to plant a dmb
1162   // ish as a translation for the MemBarRelease, the conventional
1163   // macro-instruction sequence for the CompareAndSwap node (which
1164   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1165   // Alternatively, we can elide generation of the dmb instructions
1166   // and plant the alternative CompareAndSwap macro-instruction
1167   // sequence (which uses ldaxr<x>).
1168   // 
1169   // Of course, the above only applies when we see these signature
1170   // configurations. We still want to plant dmb instructions in any
1171   // other cases where we may see a MemBarAcquire, MemBarRelease or
1172   // MemBarVolatile. For example, at the end of a constructor which
1173   // writes final/volatile fields we will see a MemBarRelease
1174   // instruction and this needs a 'dmb ish' lest we risk the
1175   // constructed object being visible without making the
1176   // final/volatile field writes visible.
1177   //
1178   // n.b. the translation rules below which rely on detection of the
1179   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1180   // If we see anything other than the signature configurations we
1181   // always just translate the loads and stores to ldr<x> and str<x>
1182   // and translate acquire, release and volatile membars to the
1183   // relevant dmb instructions.
1184   //
1185 
1186   // is_CAS(int opcode)
1187   //
1188   // return true if opcode is one of the possible CompareAndSwapX
1189   // values otherwise false.
1190 
1191   bool is_CAS(int opcode)
1192   {
1193     switch(opcode) {
1194     // We handle these
1195     case Op_CompareAndSwapI:
1196     case Op_CompareAndSwapL:
1197     case Op_CompareAndSwapP:
1198     case Op_CompareAndSwapN:
1199     case Op_GetAndSetI:
1200     case Op_GetAndSetL:
1201     case Op_GetAndSetP:
1202     case Op_GetAndSetN:
1203     case Op_GetAndAddI:
1204     case Op_GetAndAddL:
1205       return true;
1206     default:
1207       return false;
1208     }
1209   }
1210 
1211 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1212 
1213 bool unnecessary_acquire(const Node *barrier)
1214 {
1215   assert(barrier->is_MemBar(), "expecting a membar");
1216 
1217   if (UseBarriersForVolatile) {
1218     // we need to plant a dmb
1219     return false;
1220   }
1221 
1222   MemBarNode* mb = barrier->as_MemBar();
1223 
1224   if (mb->trailing_load()) {
1225     return true;
1226   }
1227 
1228   if (mb->trailing_load_store()) {
1229     Node* load_store = mb->in(MemBarNode::Precedent);
1230     assert(load_store->is_LoadStore(), "unexpected graph shape");
1231     return is_CAS(load_store->Opcode());
1232   }
1233 
1234   return false;
1235 }
1236 
1237 bool needs_acquiring_load(const Node *n)
1238 {
1239   assert(n->is_Load(), "expecting a load");
1240   if (UseBarriersForVolatile) {
1241     // we use a normal load and a dmb
1242     return false;
1243   }
1244 
1245   LoadNode *ld = n->as_Load();
1246 
1247   return ld->is_acquire();
1248 }
1249 
1250 bool unnecessary_release(const Node *n)
1251 {
1252   assert((n->is_MemBar() &&
1253           n->Opcode() == Op_MemBarRelease),
1254          "expecting a release membar");
1255 
1256   if (UseBarriersForVolatile) {
1257     // we need to plant a dmb
1258     return false;
1259   }
1260 
1261   MemBarNode *barrier = n->as_MemBar();
1262 
1263   if (!barrier->leading()) {
1264     return false;
1265   } else {
1266     Node* trailing = barrier->trailing_membar();
1267     MemBarNode* trailing_mb = trailing->as_MemBar();
1268     assert(trailing_mb->trailing(), "Not a trailing membar?");
1269     assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
1270 
1271     Node* mem = trailing_mb->in(MemBarNode::Precedent);
1272     if (mem->is_Store()) {
1273       assert(mem->as_Store()->is_release(), "");
1274       assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
1275       return true;
1276     } else {
1277       assert(mem->is_LoadStore(), "");
1278       assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
1279       return is_CAS(mem->Opcode());
1280     }
1281   }
1282 
1283   return false;
1284 }
1285 
1286 bool unnecessary_volatile(const Node *n)
1287 {
1288   // assert n->is_MemBar();
1289   if (UseBarriersForVolatile) {
1290     // we need to plant a dmb
1291     return false;
1292   }
1293 
1294   MemBarNode *mbvol = n->as_MemBar();
1295 
1296   bool release = mbvol->trailing_store();
1297   assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1298 #ifdef ASSERT
1299   if (release) {
1300     Node* leading = mbvol->leading_membar();
1301     assert(leading->Opcode() == Op_MemBarRelease, "");
1302     assert(leading->as_MemBar()->leading_store(), "");
1303     assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1304    }
1305 #endif
1306 
1307   return release;
1308 }
1309 
1310 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1311 
1312 bool needs_releasing_store(const Node *n)
1313 {
1314   // assert n->is_Store();
1315   if (UseBarriersForVolatile) {
1316     // we use a normal store and dmb combination
1317     return false;
1318   }
1319 
1320   StoreNode *st = n->as_Store();
1321 
1322   return st->trailing_membar() != NULL;
1323 }
1324 
1325 // predicate controlling translation of CAS
1326 //
1327 // returns true if CAS needs to use an acquiring load otherwise false
1328 
1329 bool needs_acquiring_load_exclusive(const Node *n)
1330 {
1331   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
1332   if (UseBarriersForVolatile) {
1333     return false;
1334   }
1335 
1336   LoadStoreNode* ldst = n->as_LoadStore();
1337   assert(ldst->trailing_membar() != NULL, "expected trailing membar");
1338 
1339   // so we can just return true here
1340   return true;
1341 }
1342 
1343 // predicate controlling translation of StoreCM
1344 //
1345 // returns true if a StoreStore must precede the card write otherwise
1346 // false
1347 
1348 bool unnecessary_storestore(const Node *storecm)
1349 {
1350   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
1351 
1352   // we need to generate a dmb ishst between an object put and the
1353   // associated card mark when we are using CMS without conditional
1354   // card marking
1355 
1356   if (UseConcMarkSweepGC && !UseCondCardMark) {
1357     return false;
1358   }
1359 
1360   // a storestore is unnecesary in all other cases
1361 
1362   return true;
1363 }
1364 
1365 
1366 #define __ _masm.
1367 
1368 // advance declaratuons for helper functions to convert register
1369 // indices to register objects
1370 
1371 // the ad file has to provide implementations of certain methods
1372 // expected by the generic code
1373 //
1374 // REQUIRED FUNCTIONALITY
1375 
1376 //=============================================================================
1377 
1378 // !!!!! Special hack to get all types of calls to specify the byte offset
1379 //       from the start of the call to the point where the return address
1380 //       will point.
1381 
1382 int MachCallStaticJavaNode::ret_addr_offset()
1383 {
1384   // call should be a simple bl
1385   // unless this is a method handle invoke in which case it is
1386   // mov(rfp, sp), bl, mov(sp, rfp)
1387   int off = 4;
1388   if (_method_handle_invoke) {
1389     off += 4;
1390   }
1391   return off;
1392 }
1393 
1394 int MachCallDynamicJavaNode::ret_addr_offset()
1395 {
1396   return 16; // movz, movk, movk, bl
1397 }
1398 
1399 int MachCallRuntimeNode::ret_addr_offset() {
1400   // for generated stubs the call will be
1401   //   bl(addr)
1402   // for real runtime callouts it will be six instructions
1403   // see aarch64_enc_java_to_runtime
1404   //   adr(rscratch2, retaddr)
1405   //   lea(rscratch1, RuntimeAddress(addr)
1406   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1407   //   blr(rscratch1)
1408   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1409   if (cb) {
1410     return MacroAssembler::far_branch_size();
1411   } else {
1412     return 6 * NativeInstruction::instruction_size;
1413   }
1414 }
1415 
1416 // Indicate if the safepoint node needs the polling page as an input
1417 
1418 // the shared code plants the oop data at the start of the generated
1419 // code for the safepoint node and that needs ot be at the load
1420 // instruction itself. so we cannot plant a mov of the safepoint poll
1421 // address followed by a load. setting this to true means the mov is
1422 // scheduled as a prior instruction. that's better for scheduling
1423 // anyway.
1424 
1425 bool SafePointNode::needs_polling_address_input()
1426 {
1427   return true;
1428 }
1429 
1430 //=============================================================================
1431 
1432 #ifndef PRODUCT
1433 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1434   st->print("BREAKPOINT");
1435 }
1436 #endif
1437 
1438 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1439   MacroAssembler _masm(&cbuf);
1440   __ brk(0);
1441 }
1442 
1443 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1444   return MachNode::size(ra_);
1445 }
1446 
1447 //=============================================================================
1448 
1449 #ifndef PRODUCT
1450   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1451     st->print("nop \t# %d bytes pad for loops and calls", _count);
1452   }
1453 #endif
1454 
1455   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1456     MacroAssembler _masm(&cbuf);
1457     for (int i = 0; i < _count; i++) { 
1458       __ nop();
1459     }
1460   }
1461 
1462   uint MachNopNode::size(PhaseRegAlloc*) const {
1463     return _count * NativeInstruction::instruction_size;
1464   }
1465 
1466 //=============================================================================
1467 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1468 
1469 int Compile::ConstantTable::calculate_table_base_offset() const {
1470   return 0;  // absolute addressing, no offset
1471 }
1472 
1473 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1474 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1475   ShouldNotReachHere();
1476 }
1477 
1478 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1479   // Empty encoding
1480 }
1481 
1482 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1483   return 0;
1484 }
1485 
1486 #ifndef PRODUCT
1487 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1488   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1489 }
1490 #endif
1491 
1492 #ifndef PRODUCT
1493 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1494   Compile* C = ra_->C;
1495 
1496   int framesize = C->frame_slots() << LogBytesPerInt;
1497 
1498   if (C->need_stack_bang(framesize))
1499     st->print("# stack bang size=%d\n\t", framesize);
1500 
1501   if (framesize == 0) {
1502     // Is this even possible?
1503     st->print("stp  lr, rfp, [sp, #%d]!", -(2 * wordSize)); 
1504   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1505     st->print("sub  sp, sp, #%d\n\t", framesize);
1506     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1507   } else {
1508     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize)); 
1509     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1510     st->print("sub  sp, sp, rscratch1");
1511   }
1512 }
1513 #endif
1514 
1515 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1516   Compile* C = ra_->C;
1517   MacroAssembler _masm(&cbuf);
1518 
1519   // n.b. frame size includes space for return pc and rfp
1520   long framesize = ((long)C->frame_slots()) << LogBytesPerInt;
1521   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
1522 
1523   // insert a nop at the start of the prolog so we can patch in a
1524   // branch if we need to invalidate the method later
1525   __ nop();
1526 
1527   if (C->need_stack_bang(framesize))
1528     __ generate_stack_overflow_check(framesize);
1529 
1530   __ build_frame(framesize);
1531 
1532   if (VerifyStackAtCalls) {
1533     Unimplemented();
1534   }
1535 
1536   C->set_frame_complete(cbuf.insts_size());
1537 
1538   if (C->has_mach_constant_base_node()) {
1539     // NOTE: We set the table base offset here because users might be
1540     // emitted before MachConstantBaseNode.
1541     Compile::ConstantTable& constant_table = C->constant_table();
1542     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1543   }
1544 }
1545 
1546 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1547 {
1548   return MachNode::size(ra_); // too many variables; just compute it
1549                               // the hard way
1550 }
1551 
1552 int MachPrologNode::reloc() const
1553 {
1554   return 0;
1555 }
1556 
1557 //=============================================================================
1558 
1559 #ifndef PRODUCT
1560 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1561   Compile* C = ra_->C;
1562   int framesize = C->frame_slots() << LogBytesPerInt;
1563 
1564   st->print("# pop frame %d\n\t",framesize);
1565 
1566   if (framesize == 0) {
1567     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1568   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1569     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1570     st->print("add  sp, sp, #%d\n\t", framesize);
1571   } else {
1572     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1573     st->print("add  sp, sp, rscratch1\n\t");
1574     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1575   }
1576 
1577   if (do_polling() && C->is_method_compilation()) {
1578     st->print("# touch polling page\n\t");
1579     st->print("mov  rscratch1, #" INTPTR_FORMAT "\n\t", p2i(os::get_polling_page()));
1580     st->print("ldr zr, [rscratch1]");
1581   }
1582 }
1583 #endif
1584 
1585 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1586   Compile* C = ra_->C;
1587   MacroAssembler _masm(&cbuf);
1588   int framesize = C->frame_slots() << LogBytesPerInt;
1589 
1590   __ remove_frame(framesize);
1591 
1592   if (do_polling() && C->is_method_compilation()) {
1593     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
1594   }
1595 }
1596 
1597 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1598   // Variable size. Determine dynamically.
1599   return MachNode::size(ra_);
1600 }
1601 
1602 int MachEpilogNode::reloc() const {
1603   // Return number of relocatable values contained in this instruction.
1604   return 1; // 1 for polling page.
1605 }
1606 
1607 const Pipeline * MachEpilogNode::pipeline() const {
1608   return MachNode::pipeline_class();
1609 }
1610 
1611 // This method seems to be obsolete. It is declared in machnode.hpp
1612 // and defined in all *.ad files, but it is never called. Should we
1613 // get rid of it?
1614 int MachEpilogNode::safepoint_offset() const {
1615   assert(do_polling(), "no return for this epilog node");
1616   return 4;
1617 }
1618 
1619 //=============================================================================
1620 
1621 // Figure out which register class each belongs in: rc_int, rc_float or
1622 // rc_stack.
1623 enum RC { rc_bad, rc_int, rc_float, rc_stack };
1624 
1625 static enum RC rc_class(OptoReg::Name reg) {
1626 
1627   if (reg == OptoReg::Bad) {
1628     return rc_bad;
1629   }
1630 
1631   // we have 30 int registers * 2 halves
1632   // (rscratch1 and rscratch2 are omitted)
1633 
1634   if (reg < 60) {
1635     return rc_int;
1636   }
1637 
1638   // we have 32 float register * 2 halves
1639   if (reg < 60 + 128) {
1640     return rc_float;
1641   }
1642 
1643   // Between float regs & stack is the flags regs.
1644   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
1645 
1646   return rc_stack;
1647 }
1648 
1649 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
1650   Compile* C = ra_->C;
1651 
1652   // Get registers to move.
1653   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
1654   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
1655   OptoReg::Name dst_hi = ra_->get_reg_second(this);
1656   OptoReg::Name dst_lo = ra_->get_reg_first(this);
1657 
1658   enum RC src_hi_rc = rc_class(src_hi);
1659   enum RC src_lo_rc = rc_class(src_lo);
1660   enum RC dst_hi_rc = rc_class(dst_hi);
1661   enum RC dst_lo_rc = rc_class(dst_lo);
1662 
1663   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
1664 
1665   if (src_hi != OptoReg::Bad) {
1666     assert((src_lo&1)==0 && src_lo+1==src_hi &&
1667            (dst_lo&1)==0 && dst_lo+1==dst_hi,
1668            "expected aligned-adjacent pairs");
1669   }
1670 
1671   if (src_lo == dst_lo && src_hi == dst_hi) {
1672     return 0;            // Self copy, no move.
1673   }
1674 
1675   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
1676               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
1677   int src_offset = ra_->reg2offset(src_lo);
1678   int dst_offset = ra_->reg2offset(dst_lo);
1679 
1680   if (bottom_type()->isa_vect() != NULL) {
1681     uint ireg = ideal_reg();
1682     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
1683     if (cbuf) {
1684       MacroAssembler _masm(cbuf);
1685       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
1686       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
1687         // stack->stack
1688         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
1689         if (ireg == Op_VecD) {
1690           __ unspill(rscratch1, true, src_offset);
1691           __ spill(rscratch1, true, dst_offset);
1692         } else {
1693           __ spill_copy128(src_offset, dst_offset);
1694         }
1695       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
1696         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1697                ireg == Op_VecD ? __ T8B : __ T16B,
1698                as_FloatRegister(Matcher::_regEncode[src_lo]));
1699       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
1700         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1701                        ireg == Op_VecD ? __ D : __ Q,
1702                        ra_->reg2offset(dst_lo));
1703       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
1704         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1705                        ireg == Op_VecD ? __ D : __ Q,
1706                        ra_->reg2offset(src_lo));
1707       } else {
1708         ShouldNotReachHere();
1709       }
1710     }
1711   } else if (cbuf) {
1712     MacroAssembler _masm(cbuf);
1713     switch (src_lo_rc) {
1714     case rc_int:
1715       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
1716         if (is64) {
1717             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
1718                    as_Register(Matcher::_regEncode[src_lo]));
1719         } else {
1720             MacroAssembler _masm(cbuf);
1721             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
1722                     as_Register(Matcher::_regEncode[src_lo]));
1723         }
1724       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
1725         if (is64) {
1726             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1727                      as_Register(Matcher::_regEncode[src_lo]));
1728         } else {
1729             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1730                      as_Register(Matcher::_regEncode[src_lo]));
1731         }
1732       } else {                    // gpr --> stack spill
1733         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1734         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
1735       }
1736       break;
1737     case rc_float:
1738       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
1739         if (is64) {
1740             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
1741                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1742         } else {
1743             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
1744                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1745         }
1746       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
1747           if (cbuf) {
1748             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1749                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1750         } else {
1751             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1752                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1753         }
1754       } else {                    // fpr --> stack spill
1755         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1756         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1757                  is64 ? __ D : __ S, dst_offset);
1758       }
1759       break;
1760     case rc_stack:
1761       if (dst_lo_rc == rc_int) {  // stack --> gpr load
1762         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
1763       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
1764         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1765                    is64 ? __ D : __ S, src_offset);
1766       } else {                    // stack --> stack copy
1767         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1768         __ unspill(rscratch1, is64, src_offset);
1769         __ spill(rscratch1, is64, dst_offset);
1770       }
1771       break;
1772     default:
1773       assert(false, "bad rc_class for spill");
1774       ShouldNotReachHere();
1775     }
1776   }
1777 
1778   if (st) {
1779     st->print("spill ");
1780     if (src_lo_rc == rc_stack) {
1781       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
1782     } else {
1783       st->print("%s -> ", Matcher::regName[src_lo]);
1784     }
1785     if (dst_lo_rc == rc_stack) {
1786       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
1787     } else {
1788       st->print("%s", Matcher::regName[dst_lo]);
1789     }
1790     if (bottom_type()->isa_vect() != NULL) {
1791       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
1792     } else {
1793       st->print("\t# spill size = %d", is64 ? 64:32);
1794     }
1795   }
1796 
1797   return 0;
1798 
1799 }
1800 
1801 #ifndef PRODUCT
1802 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1803   if (!ra_)
1804     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
1805   else
1806     implementation(NULL, ra_, false, st);
1807 }
1808 #endif
1809 
1810 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1811   implementation(&cbuf, ra_, false, NULL);
1812 }
1813 
1814 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1815   return MachNode::size(ra_);
1816 }
1817 
1818 //=============================================================================
1819 
1820 #ifndef PRODUCT
1821 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1822   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1823   int reg = ra_->get_reg_first(this);
1824   st->print("add %s, rsp, #%d]\t# box lock",
1825             Matcher::regName[reg], offset);
1826 }
1827 #endif
1828 
1829 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1830   MacroAssembler _masm(&cbuf);
1831 
1832   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1833   int reg    = ra_->get_encode(this);
1834 
1835   // This add will handle any 24-bit signed offset. 24 bits allows an
1836   // 8 megabyte stack frame.
1837   __ add(as_Register(reg), sp, offset);
1838 }
1839 
1840 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1841   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
1842   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1843 
1844   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
1845     return NativeInstruction::instruction_size;
1846   } else {
1847     return 2 * NativeInstruction::instruction_size;
1848   }
1849 }
1850 
1851 //=============================================================================
1852 
1853 #ifndef PRODUCT
1854 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1855 {
1856   st->print_cr("# MachUEPNode");
1857   if (UseCompressedClassPointers) {
1858     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1859     if (Universe::narrow_klass_shift() != 0) {
1860       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1861     }
1862   } else {
1863    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1864   }
1865   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
1866   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
1867 }
1868 #endif
1869 
1870 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1871 {
1872   // This is the unverified entry point.
1873   MacroAssembler _masm(&cbuf);
1874 
1875   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
1876   Label skip;
1877   // TODO
1878   // can we avoid this skip and still use a reloc?
1879   __ br(Assembler::EQ, skip);
1880   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1881   __ bind(skip);
1882 }
1883 
1884 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1885 {
1886   return MachNode::size(ra_);
1887 }
1888 
1889 // REQUIRED EMIT CODE
1890 
1891 //=============================================================================
1892 
1893 // Emit exception handler code.
1894 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
1895 {
1896   // mov rscratch1 #exception_blob_entry_point
1897   // br rscratch1
1898   // Note that the code buffer's insts_mark is always relative to insts.
1899   // That's why we must use the macroassembler to generate a handler.
1900   MacroAssembler _masm(&cbuf);
1901   address base = __ start_a_stub(size_exception_handler());
1902   if (base == NULL) {
1903     ciEnv::current()->record_failure("CodeCache is full");
1904     return 0;  // CodeBuffer::expand failed
1905   }
1906   int offset = __ offset();
1907   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1908   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1909   __ end_a_stub();
1910   return offset;
1911 }
1912 
1913 // Emit deopt handler code.
1914 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
1915 {
1916   // Note that the code buffer's insts_mark is always relative to insts.
1917   // That's why we must use the macroassembler to generate a handler.
1918   MacroAssembler _masm(&cbuf);
1919   address base = __ start_a_stub(size_deopt_handler());
1920   if (base == NULL) {
1921     ciEnv::current()->record_failure("CodeCache is full");
1922     return 0;  // CodeBuffer::expand failed
1923   }
1924   int offset = __ offset();
1925 
1926   __ adr(lr, __ pc());
1927   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1928 
1929   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1930   __ end_a_stub();
1931   return offset;
1932 }
1933 
1934 // REQUIRED MATCHER CODE
1935 
1936 //=============================================================================
1937 
1938 const bool Matcher::match_rule_supported(int opcode) {
1939 
1940   // TODO 
1941   // identify extra cases that we might want to provide match rules for
1942   // e.g. Op_StrEquals and other intrinsics
1943   if (!has_match_rule(opcode)) {
1944     return false;
1945   }
1946 
1947   return true;  // Per default match rules are supported.
1948 }
1949 
1950 int Matcher::regnum_to_fpu_offset(int regnum)
1951 {
1952   Unimplemented();
1953   return 0;
1954 }
1955 
1956 // Is this branch offset short enough that a short branch can be used?
1957 //
1958 // NOTE: If the platform does not provide any short branch variants, then
1959 //       this method should return false for offset 0.
1960 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1961   // The passed offset is relative to address of the branch.
1962 
1963   return (-32768 <= offset && offset < 32768);
1964 }
1965 
1966 const bool Matcher::isSimpleConstant64(jlong value) {
1967   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1968   // Probably always true, even if a temp register is required.
1969   return true;
1970 }
1971 
1972 // true just means we have fast l2f conversion
1973 const bool Matcher::convL2FSupported(void) {
1974   return true;
1975 }
1976 
1977 // Vector width in bytes.
1978 const int Matcher::vector_width_in_bytes(BasicType bt) {
1979   int size = MIN2(16,(int)MaxVectorSize);
1980   // Minimum 2 values in vector
1981   if (size < 2*type2aelembytes(bt)) size = 0;
1982   // But never < 4
1983   if (size < 4) size = 0;
1984   return size;
1985 }
1986 
1987 // Limits on vector size (number of elements) loaded into vector.
1988 const int Matcher::max_vector_size(const BasicType bt) {
1989   return vector_width_in_bytes(bt)/type2aelembytes(bt);
1990 }
1991 const int Matcher::min_vector_size(const BasicType bt) {
1992 //  For the moment limit the vector size to 8 bytes
1993     int size = 8 / type2aelembytes(bt);
1994     if (size < 2) size = 2;
1995     return size;
1996 }
1997 
1998 // Vector ideal reg.
1999 const uint Matcher::vector_ideal_reg(int len) {
2000   switch(len) {
2001     case  8: return Op_VecD;
2002     case 16: return Op_VecX;
2003   }
2004   ShouldNotReachHere();
2005   return 0;
2006 }
2007 
2008 const uint Matcher::vector_shift_count_ideal_reg(int size) {
2009   switch(size) {
2010     case  8: return Op_VecD;
2011     case 16: return Op_VecX;
2012   }
2013   ShouldNotReachHere();
2014   return 0;
2015 }
2016 
2017 // AES support not yet implemented
2018 const bool Matcher::pass_original_key_for_aes() {
2019   return false;
2020 }
2021 
2022 // x86 supports misaligned vectors store/load.
2023 const bool Matcher::misaligned_vectors_ok() {
2024   return !AlignVector; // can be changed by flag
2025 }
2026 
2027 // false => size gets scaled to BytesPerLong, ok.
2028 const bool Matcher::init_array_count_is_in_bytes = false;
2029 
2030 // Threshold size for cleararray.
2031 const int Matcher::init_array_short_size = 4 * BytesPerLong;
2032 
2033 // Use conditional move (CMOVL)
2034 const int Matcher::long_cmove_cost() {
2035   // long cmoves are no more expensive than int cmoves
2036   return 0;
2037 }
2038 
2039 const int Matcher::float_cmove_cost() {
2040   // float cmoves are no more expensive than int cmoves
2041   return 0;
2042 }
2043 
2044 // Does the CPU require late expand (see block.cpp for description of late expand)?
2045 const bool Matcher::require_postalloc_expand = false;
2046 
2047 // Should the Matcher clone shifts on addressing modes, expecting them
2048 // to be subsumed into complex addressing expressions or compute them
2049 // into registers?  True for Intel but false for most RISCs
2050 const bool Matcher::clone_shift_expressions = false;
2051 
2052 // Do we need to mask the count passed to shift instructions or does
2053 // the cpu only look at the lower 5/6 bits anyway?
2054 const bool Matcher::need_masked_shift_count = false;
2055 
2056 // This affects two different things:
2057 //  - how Decode nodes are matched
2058 //  - how ImplicitNullCheck opportunities are recognized
2059 // If true, the matcher will try to remove all Decodes and match them
2060 // (as operands) into nodes. NullChecks are not prepared to deal with 
2061 // Decodes by final_graph_reshaping().
2062 // If false, final_graph_reshaping() forces the decode behind the Cmp
2063 // for a NullCheck. The matcher matches the Decode node into a register.
2064 // Implicit_null_check optimization moves the Decode along with the 
2065 // memory operation back up before the NullCheck.
2066 bool Matcher::narrow_oop_use_complex_address() {
2067   return Universe::narrow_oop_shift() == 0;
2068 }
2069 
2070 bool Matcher::narrow_klass_use_complex_address() {
2071 // TODO
2072 // decide whether we need to set this to true
2073   return false;
2074 }
2075 
2076 // Is it better to copy float constants, or load them directly from
2077 // memory?  Intel can load a float constant from a direct address,
2078 // requiring no extra registers.  Most RISCs will have to materialize
2079 // an address into a register first, so they would do better to copy
2080 // the constant from stack.
2081 const bool Matcher::rematerialize_float_constants = false;
2082 
2083 // If CPU can load and store mis-aligned doubles directly then no
2084 // fixup is needed.  Else we split the double into 2 integer pieces
2085 // and move it piece-by-piece.  Only happens when passing doubles into
2086 // C code as the Java calling convention forces doubles to be aligned.
2087 const bool Matcher::misaligned_doubles_ok = true;
2088 
2089 // No-op on amd64
2090 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2091   Unimplemented();
2092 }
2093 
2094 // Advertise here if the CPU requires explicit rounding operations to
2095 // implement the UseStrictFP mode.
2096 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2097 
2098 // Are floats converted to double when stored to stack during
2099 // deoptimization?
2100 bool Matcher::float_in_double() { return true; }
2101 
2102 // Do ints take an entire long register or just half?
2103 // The relevant question is how the int is callee-saved:
2104 // the whole long is written but de-opt'ing will have to extract
2105 // the relevant 32 bits.
2106 const bool Matcher::int_in_long = true;
2107 
2108 // Return whether or not this register is ever used as an argument.
2109 // This function is used on startup to build the trampoline stubs in
2110 // generateOptoStub.  Registers not mentioned will be killed by the VM
2111 // call in the trampoline, and arguments in those registers not be
2112 // available to the callee.
2113 bool Matcher::can_be_java_arg(int reg)
2114 {
2115   return
2116     reg ==  R0_num || reg == R0_H_num ||
2117     reg ==  R1_num || reg == R1_H_num ||
2118     reg ==  R2_num || reg == R2_H_num ||
2119     reg ==  R3_num || reg == R3_H_num ||
2120     reg ==  R4_num || reg == R4_H_num ||
2121     reg ==  R5_num || reg == R5_H_num ||
2122     reg ==  R6_num || reg == R6_H_num ||
2123     reg ==  R7_num || reg == R7_H_num ||
2124     reg ==  V0_num || reg == V0_H_num ||
2125     reg ==  V1_num || reg == V1_H_num ||
2126     reg ==  V2_num || reg == V2_H_num ||
2127     reg ==  V3_num || reg == V3_H_num ||
2128     reg ==  V4_num || reg == V4_H_num ||
2129     reg ==  V5_num || reg == V5_H_num ||
2130     reg ==  V6_num || reg == V6_H_num ||
2131     reg ==  V7_num || reg == V7_H_num;
2132 }
2133 
2134 bool Matcher::is_spillable_arg(int reg)
2135 {
2136   return can_be_java_arg(reg);
2137 }
2138 
2139 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2140   return false;
2141 }
2142 
2143 RegMask Matcher::divI_proj_mask() {
2144   ShouldNotReachHere();
2145   return RegMask();
2146 }
2147 
2148 // Register for MODI projection of divmodI.
2149 RegMask Matcher::modI_proj_mask() {
2150   ShouldNotReachHere();
2151   return RegMask();
2152 }
2153 
2154 // Register for DIVL projection of divmodL.
2155 RegMask Matcher::divL_proj_mask() {
2156   ShouldNotReachHere();
2157   return RegMask();
2158 }
2159 
2160 // Register for MODL projection of divmodL.
2161 RegMask Matcher::modL_proj_mask() {
2162   ShouldNotReachHere();
2163   return RegMask();
2164 }
2165 
2166 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2167   return FP_REG_mask();
2168 }
2169 
2170 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
2171   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
2172     Node* u = addp->fast_out(i);
2173     if (u->is_Mem()) {
2174       int opsize = u->as_Mem()->memory_size();
2175       assert(opsize > 0, "unexpected memory operand size");
2176       if (u->as_Mem()->memory_size() != (1<<shift)) {
2177         return false;
2178       }
2179     }
2180   }
2181   return true;
2182 }
2183 
2184 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2185   MacroAssembler _masm(&cbuf);                                              \
2186   {                                                                     \
2187     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2188     guarantee(DISP == 0, "mode not permitted for volatile");            \
2189     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2190     __ INSN(REG, as_Register(BASE));                                    \
2191   }
2192 
2193 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2194 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2195 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2196                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2197 
2198   // Used for all non-volatile memory accesses.  The use of
2199   // $mem->opcode() to discover whether this pattern uses sign-extended
2200   // offsets is something of a kludge.
2201   static void loadStore(MacroAssembler masm, mem_insn insn,
2202                          Register reg, int opcode,
2203                          Register base, int index, int size, int disp)
2204   {
2205     Address::extend scale;
2206 
2207     // Hooboy, this is fugly.  We need a way to communicate to the
2208     // encoder that the index needs to be sign extended, so we have to
2209     // enumerate all the cases.
2210     switch (opcode) {
2211     case INDINDEXSCALEDOFFSETI2L:
2212     case INDINDEXSCALEDI2L:
2213     case INDINDEXSCALEDOFFSETI2LN:
2214     case INDINDEXSCALEDI2LN:
2215     case INDINDEXOFFSETI2L:
2216     case INDINDEXOFFSETI2LN:
2217       scale = Address::sxtw(size);
2218       break;
2219     default:
2220       scale = Address::lsl(size);
2221     }
2222 
2223     if (index == -1) {
2224       (masm.*insn)(reg, Address(base, disp));
2225     } else {
2226       if (disp == 0) {
2227         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2228       } else {
2229         masm.lea(rscratch1, Address(base, disp));
2230         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2231       }
2232     }
2233   }
2234 
2235   static void loadStore(MacroAssembler masm, mem_float_insn insn,
2236                          FloatRegister reg, int opcode,
2237                          Register base, int index, int size, int disp)
2238   {
2239     Address::extend scale;
2240 
2241     switch (opcode) {
2242     case INDINDEXSCALEDOFFSETI2L:
2243     case INDINDEXSCALEDI2L:
2244     case INDINDEXSCALEDOFFSETI2LN:
2245     case INDINDEXSCALEDI2LN:
2246       scale = Address::sxtw(size);
2247       break;
2248     default:
2249       scale = Address::lsl(size);
2250     }
2251 
2252      if (index == -1) {
2253       (masm.*insn)(reg, Address(base, disp));
2254     } else {
2255       if (disp == 0) {
2256         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2257       } else {
2258         masm.lea(rscratch1, Address(base, disp));
2259         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2260       }
2261     }
2262   }
2263 
2264   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2265                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2266                          int opcode, Register base, int index, int size, int disp)
2267   {
2268     if (index == -1) {
2269       (masm.*insn)(reg, T, Address(base, disp));
2270     } else {
2271       assert(disp == 0, "unsupported address mode");
2272       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2273     }
2274   }
2275 
2276 %}
2277 
2278 
2279 
2280 //----------ENCODING BLOCK-----------------------------------------------------
2281 // This block specifies the encoding classes used by the compiler to
2282 // output byte streams.  Encoding classes are parameterized macros
2283 // used by Machine Instruction Nodes in order to generate the bit
2284 // encoding of the instruction.  Operands specify their base encoding
2285 // interface with the interface keyword.  There are currently
2286 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2287 // COND_INTER.  REG_INTER causes an operand to generate a function
2288 // which returns its register number when queried.  CONST_INTER causes
2289 // an operand to generate a function which returns the value of the
2290 // constant when queried.  MEMORY_INTER causes an operand to generate
2291 // four functions which return the Base Register, the Index Register,
2292 // the Scale Value, and the Offset Value of the operand when queried.
2293 // COND_INTER causes an operand to generate six functions which return
2294 // the encoding code (ie - encoding bits for the instruction)
2295 // associated with each basic boolean condition for a conditional
2296 // instruction.
2297 //
2298 // Instructions specify two basic values for encoding.  Again, a
2299 // function is available to check if the constant displacement is an
2300 // oop. They use the ins_encode keyword to specify their encoding
2301 // classes (which must be a sequence of enc_class names, and their
2302 // parameters, specified in the encoding block), and they use the
2303 // opcode keyword to specify, in order, their primary, secondary, and
2304 // tertiary opcode.  Only the opcode sections which a particular
2305 // instruction needs for encoding need to be specified.
2306 encode %{
2307   // Build emit functions for each basic byte or larger field in the
2308   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2309   // from C++ code in the enc_class source block.  Emit functions will
2310   // live in the main source block for now.  In future, we can
2311   // generalize this by adding a syntax that specifies the sizes of
2312   // fields in an order, so that the adlc can build the emit functions
2313   // automagically
2314 
2315   // catch all for unimplemented encodings
2316   enc_class enc_unimplemented %{
2317     MacroAssembler _masm(&cbuf);
2318     __ unimplemented("C2 catch all");    
2319   %}
2320 
2321   // BEGIN Non-volatile memory access
2322 
2323   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
2324     Register dst_reg = as_Register($dst$$reg);
2325     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
2326                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2327   %}
2328 
2329   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
2330     Register dst_reg = as_Register($dst$$reg);
2331     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
2332                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2333   %}
2334 
2335   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
2336     Register dst_reg = as_Register($dst$$reg);
2337     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2338                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2339   %}
2340 
2341   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
2342     Register dst_reg = as_Register($dst$$reg);
2343     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2344                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2345   %}
2346 
2347   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
2348     Register dst_reg = as_Register($dst$$reg);
2349     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
2350                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2351   %}
2352 
2353   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
2354     Register dst_reg = as_Register($dst$$reg);
2355     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
2356                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2357   %}
2358 
2359   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
2360     Register dst_reg = as_Register($dst$$reg);
2361     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2362                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2363   %}
2364 
2365   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
2366     Register dst_reg = as_Register($dst$$reg);
2367     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2368                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2369   %}
2370 
2371   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
2372     Register dst_reg = as_Register($dst$$reg);
2373     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2374                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2375   %}
2376 
2377   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
2378     Register dst_reg = as_Register($dst$$reg);
2379     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2380                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2381   %}
2382 
2383   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
2384     Register dst_reg = as_Register($dst$$reg);
2385     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
2386                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2387   %}
2388 
2389   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
2390     Register dst_reg = as_Register($dst$$reg);
2391     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2392                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2393   %}
2394 
2395   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
2396     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2397     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2398                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2399   %}
2400 
2401   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
2402     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2403     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2404                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2405   %}
2406 
2407   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2408     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2409     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2410        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2411   %}
2412 
2413   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2414     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2415     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2416        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2417   %}
2418 
2419   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2420     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2421     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2422        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2423   %}
2424 
2425   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
2426     Register src_reg = as_Register($src$$reg);
2427     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2428                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2429   %}
2430 
2431   enc_class aarch64_enc_strb0(memory mem) %{
2432     MacroAssembler _masm(&cbuf);
2433     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2434                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2435   %}
2436 
2437   enc_class aarch64_enc_strb0_ordered(memory mem) %{
2438     MacroAssembler _masm(&cbuf);
2439     __ membar(Assembler::StoreStore);
2440     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2441                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2442   %}
2443 
2444   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
2445     Register src_reg = as_Register($src$$reg);
2446     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2447                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2448   %}
2449 
2450   enc_class aarch64_enc_strh0(memory mem) %{
2451     MacroAssembler _masm(&cbuf);
2452     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
2453                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2454   %}
2455 
2456   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
2457     Register src_reg = as_Register($src$$reg);
2458     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
2459                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2460   %}
2461 
2462   enc_class aarch64_enc_strw0(memory mem) %{
2463     MacroAssembler _masm(&cbuf);
2464     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
2465                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2466   %}
2467 
2468   enc_class aarch64_enc_str(iRegL src, memory mem) %{
2469     Register src_reg = as_Register($src$$reg);
2470     // we sometimes get asked to store the stack pointer into the
2471     // current thread -- we cannot do that directly on AArch64
2472     if (src_reg == r31_sp) {
2473       MacroAssembler _masm(&cbuf);
2474       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2475       __ mov(rscratch2, sp);
2476       src_reg = rscratch2;
2477     }
2478     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
2479                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2480   %}
2481 
2482   enc_class aarch64_enc_str0(memory mem) %{
2483     MacroAssembler _masm(&cbuf);
2484     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2485                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2486   %}
2487 
2488   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
2489     FloatRegister src_reg = as_FloatRegister($src$$reg);
2490     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2491                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2492   %}
2493 
2494   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
2495     FloatRegister src_reg = as_FloatRegister($src$$reg);
2496     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
2497                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2498   %}
2499 
2500   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
2501     FloatRegister src_reg = as_FloatRegister($src$$reg);
2502     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
2503        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2504   %}
2505 
2506   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
2507     FloatRegister src_reg = as_FloatRegister($src$$reg);
2508     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
2509        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2510   %}
2511 
2512   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
2513     FloatRegister src_reg = as_FloatRegister($src$$reg);
2514     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
2515        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2516   %}
2517 
2518   // END Non-volatile memory access
2519 
2520   // this encoding writes the address of the first instruction in the
2521   // call sequence for the runtime call into the anchor pc slot. this
2522   // address allows the runtime to i) locate the code buffer for the
2523   // caller (any address in the buffer would do) and ii) find the oop
2524   // map associated with the call (has to address the instruction
2525   // following the call). note that we have to store the address which
2526   // follows the actual call.
2527   // 
2528   // the offset from the current pc can be computed by considering
2529   // what gets generated between this point up to and including the
2530   // call. it looks like this
2531   //
2532   //   movz xscratch1 0xnnnn        <-- current pc is here
2533   //   movk xscratch1 0xnnnn
2534   //   movk xscratch1 0xnnnn
2535   //   str xscratch1, [xthread,#anchor_pc_off]
2536   //   mov xscratch2, sp
2537   //   str xscratch2, [xthread,#anchor_sp_off
2538   //   mov x0, x1
2539   //   . . .
2540   //   mov xn-1, xn
2541   //   mov xn, thread            <-- always passed
2542   //   mov xn+1, rfp             <-- optional iff primary == 1
2543   //   movz xscratch1 0xnnnn
2544   //   movk xscratch1 0xnnnn
2545   //   movk xscratch1 0xnnnn
2546   //   blr  xscratch1
2547   //   . . .
2548   //
2549   // where the called routine has n args (including the thread and,
2550   // possibly the stub's caller return address currently in rfp).  we
2551   // can compute n by looking at the number of args passed into the
2552   // stub. we assert that nargs is < 7.
2553   //
2554   // so the offset we need to add to the pc (in 32-bit words) is
2555   //   3 +        <-- load 48-bit constant return pc
2556   //   1 +        <-- write anchor pc
2557   //   1 +        <-- copy sp
2558   //   1 +        <-- write anchor sp
2559   //   nargs +    <-- java stub arg count
2560   //   1 +        <-- extra thread arg
2561   // [ 1 + ]      <-- optional ret address of stub caller
2562   //   3 +        <-- load 64 bit call target address
2563   //   1          <-- blr instruction
2564   //
2565   // i.e we need to add (nargs + 11) * 4 bytes or (nargs + 12) * 4 bytes
2566   //
2567 
2568   enc_class aarch64_enc_save_pc() %{
2569     Compile* C = ra_->C;
2570     int nargs = C->tf()->domain()->cnt() - TypeFunc::Parms;
2571     if ($primary) { nargs++; }
2572     assert(nargs <= 8, "opto runtime stub has more than 8 args!");
2573     MacroAssembler _masm(&cbuf);
2574     address pc = __ pc();
2575     int call_offset = (nargs + 11) * 4;
2576     int field_offset = in_bytes(JavaThread::frame_anchor_offset()) +
2577                        in_bytes(JavaFrameAnchor::last_Java_pc_offset());
2578     __ lea(rscratch1, InternalAddress(pc + call_offset));
2579     __ str(rscratch1, Address(rthread, field_offset));
2580   %}
2581 
2582   // volatile loads and stores
2583 
2584   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
2585     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2586                  rscratch1, stlrb);
2587     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2588       __ dmb(__ ISH);
2589   %}
2590 
2591   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
2592     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2593                  rscratch1, stlrh);
2594     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2595       __ dmb(__ ISH);
2596   %}
2597 
2598   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
2599     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2600                  rscratch1, stlrw);
2601     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2602       __ dmb(__ ISH);
2603   %}
2604 
2605 
2606   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
2607     Register dst_reg = as_Register($dst$$reg);
2608     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2609              rscratch1, ldarb);
2610     __ sxtbw(dst_reg, dst_reg);
2611   %}
2612 
2613   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
2614     Register dst_reg = as_Register($dst$$reg);
2615     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2616              rscratch1, ldarb);
2617     __ sxtb(dst_reg, dst_reg);
2618   %}
2619 
2620   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
2621     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2622              rscratch1, ldarb);
2623   %}
2624 
2625   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
2626     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2627              rscratch1, ldarb);
2628   %}
2629 
2630   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
2631     Register dst_reg = as_Register($dst$$reg);
2632     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2633              rscratch1, ldarh);
2634     __ sxthw(dst_reg, dst_reg);
2635   %}
2636 
2637   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
2638     Register dst_reg = as_Register($dst$$reg);
2639     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2640              rscratch1, ldarh);
2641     __ sxth(dst_reg, dst_reg);
2642   %}
2643 
2644   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
2645     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2646              rscratch1, ldarh);
2647   %}
2648 
2649   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
2650     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2651              rscratch1, ldarh);
2652   %}
2653 
2654   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
2655     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2656              rscratch1, ldarw);
2657   %}
2658 
2659   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
2660     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2661              rscratch1, ldarw);
2662   %}
2663 
2664   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
2665     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2666              rscratch1, ldar);
2667   %}
2668 
2669   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
2670     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2671              rscratch1, ldarw);
2672     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
2673   %}
2674 
2675   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
2676     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2677              rscratch1, ldar);
2678     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
2679   %}
2680 
2681   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
2682     Register src_reg = as_Register($src$$reg);
2683     // we sometimes get asked to store the stack pointer into the
2684     // current thread -- we cannot do that directly on AArch64
2685     if (src_reg == r31_sp) {
2686         MacroAssembler _masm(&cbuf);
2687       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2688       __ mov(rscratch2, sp);
2689       src_reg = rscratch2;
2690     }
2691     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2692                  rscratch1, stlr);
2693     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2694       __ dmb(__ ISH);
2695   %}
2696 
2697   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
2698     {
2699       MacroAssembler _masm(&cbuf);
2700       FloatRegister src_reg = as_FloatRegister($src$$reg);
2701       __ fmovs(rscratch2, src_reg);
2702     }
2703     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2704                  rscratch1, stlrw);
2705     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2706       __ dmb(__ ISH);
2707   %}
2708 
2709   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
2710     {
2711       MacroAssembler _masm(&cbuf);
2712       FloatRegister src_reg = as_FloatRegister($src$$reg);
2713       __ fmovd(rscratch2, src_reg);
2714     }
2715     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2716                  rscratch1, stlr);
2717     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2718       __ dmb(__ ISH);
2719   %}
2720 
2721   // synchronized read/update encodings
2722 
2723   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
2724     MacroAssembler _masm(&cbuf);
2725     Register dst_reg = as_Register($dst$$reg);
2726     Register base = as_Register($mem$$base);
2727     int index = $mem$$index;
2728     int scale = $mem$$scale;
2729     int disp = $mem$$disp;
2730     if (index == -1) {
2731        if (disp != 0) {      
2732         __ lea(rscratch1, Address(base, disp));
2733         __ ldaxr(dst_reg, rscratch1);
2734       } else {
2735         // TODO
2736         // should we ever get anything other than this case?
2737         __ ldaxr(dst_reg, base);
2738       }
2739     } else {
2740       Register index_reg = as_Register(index);
2741       if (disp == 0) {
2742         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
2743         __ ldaxr(dst_reg, rscratch1);
2744       } else {
2745         __ lea(rscratch1, Address(base, disp));
2746         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
2747         __ ldaxr(dst_reg, rscratch1);
2748       }
2749     }
2750   %}
2751 
2752   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
2753     MacroAssembler _masm(&cbuf);
2754     Register src_reg = as_Register($src$$reg);
2755     Register base = as_Register($mem$$base);
2756     int index = $mem$$index;
2757     int scale = $mem$$scale;
2758     int disp = $mem$$disp;
2759     if (index == -1) {
2760        if (disp != 0) {      
2761         __ lea(rscratch2, Address(base, disp));
2762         __ stlxr(rscratch1, src_reg, rscratch2);
2763       } else {
2764         // TODO
2765         // should we ever get anything other than this case?
2766         __ stlxr(rscratch1, src_reg, base);
2767       }
2768     } else {
2769       Register index_reg = as_Register(index);
2770       if (disp == 0) {
2771         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
2772         __ stlxr(rscratch1, src_reg, rscratch2);
2773       } else {
2774         __ lea(rscratch2, Address(base, disp));
2775         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
2776         __ stlxr(rscratch1, src_reg, rscratch2);
2777       }
2778     }
2779     __ cmpw(rscratch1, zr);
2780   %}
2781 
2782   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2783     MacroAssembler _masm(&cbuf);
2784     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2785     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2786                Assembler::xword, /*acquire*/ false, /*release*/ true);
2787   %}
2788 
2789   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2790     MacroAssembler _masm(&cbuf);
2791     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2792     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2793                Assembler::word, /*acquire*/ false, /*release*/ true);
2794   %}
2795 
2796 
2797   enc_class aarch64_enc_cmpxchg_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{
2798     MacroAssembler _masm(&cbuf);
2799     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2800     Register tmp = $tmp$$Register;
2801     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
2802     ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
2803                               /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register);
2804   %}
2805 
2806   // The only difference between aarch64_enc_cmpxchg and
2807   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
2808   // CompareAndSwap sequence to serve as a barrier on acquiring a
2809   // lock.
2810   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2811     MacroAssembler _masm(&cbuf);
2812     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2813     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2814                Assembler::xword, /*acquire*/ true, /*release*/ true);
2815   %}
2816 
2817   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2818     MacroAssembler _masm(&cbuf);
2819     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2820     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2821                Assembler::word, /*acquire*/ true, /*release*/ true);
2822   %}
2823 
2824   enc_class aarch64_enc_cmpxchg_acq_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{
2825     MacroAssembler _masm(&cbuf);
2826     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2827     Register tmp = $tmp$$Register;
2828     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
2829     ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
2830                               /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false,
2831                               $res$$Register);
2832   %}
2833 
2834   // auxiliary used for CompareAndSwapX to set result register
2835   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
2836     MacroAssembler _masm(&cbuf);
2837     Register res_reg = as_Register($res$$reg);
2838     __ cset(res_reg, Assembler::EQ);
2839   %}
2840 
2841   // prefetch encodings
2842 
2843   enc_class aarch64_enc_prefetchr(memory mem) %{
2844     MacroAssembler _masm(&cbuf);
2845     Register base = as_Register($mem$$base);
2846     int index = $mem$$index;
2847     int scale = $mem$$scale;
2848     int disp = $mem$$disp;
2849     if (index == -1) {
2850       __ prfm(Address(base, disp), PLDL1KEEP);
2851     } else {
2852       Register index_reg = as_Register(index);
2853       if (disp == 0) {
2854         __ prfm(Address(base, index_reg, Address::lsl(scale)), PLDL1KEEP);
2855       } else {
2856         __ lea(rscratch1, Address(base, disp));
2857         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PLDL1KEEP);
2858       }
2859     }
2860   %}
2861 
2862   enc_class aarch64_enc_prefetchw(memory mem) %{
2863     MacroAssembler _masm(&cbuf);
2864     Register base = as_Register($mem$$base);
2865     int index = $mem$$index;
2866     int scale = $mem$$scale;
2867     int disp = $mem$$disp;
2868     if (index == -1) {
2869       __ prfm(Address(base, disp), PSTL1KEEP);
2870     } else {
2871       Register index_reg = as_Register(index);
2872       if (disp == 0) {
2873         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
2874       } else {
2875         __ lea(rscratch1, Address(base, disp));
2876         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
2877       }
2878     }
2879   %}
2880 
2881   enc_class aarch64_enc_prefetchnta(memory mem) %{
2882     MacroAssembler _masm(&cbuf);
2883     Register base = as_Register($mem$$base);
2884     int index = $mem$$index;
2885     int scale = $mem$$scale;
2886     int disp = $mem$$disp;
2887     if (index == -1) {
2888       __ prfm(Address(base, disp), PSTL1STRM);
2889     } else {
2890       Register index_reg = as_Register(index);
2891       if (disp == 0) {
2892         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1STRM);
2893         __ nop();
2894       } else {
2895         __ lea(rscratch1, Address(base, disp));
2896         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1STRM);
2897       }
2898     }
2899   %}
2900 
2901   /// mov envcodings
2902 
2903   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
2904     MacroAssembler _masm(&cbuf);
2905     u_int32_t con = (u_int32_t)$src$$constant;
2906     Register dst_reg = as_Register($dst$$reg);
2907     if (con == 0) {
2908       __ movw(dst_reg, zr);
2909     } else {
2910       __ movw(dst_reg, con);
2911     }
2912   %}
2913 
2914   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
2915     MacroAssembler _masm(&cbuf);
2916     Register dst_reg = as_Register($dst$$reg);
2917     u_int64_t con = (u_int64_t)$src$$constant;
2918     if (con == 0) {
2919       __ mov(dst_reg, zr);
2920     } else {
2921       __ mov(dst_reg, con);
2922     }
2923   %}
2924 
2925   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
2926     MacroAssembler _masm(&cbuf);
2927     Register dst_reg = as_Register($dst$$reg);
2928     address con = (address)$src$$constant;
2929     if (con == NULL || con == (address)1) {
2930       ShouldNotReachHere();
2931     } else {
2932       relocInfo::relocType rtype = $src->constant_reloc();
2933       if (rtype == relocInfo::oop_type) {
2934         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
2935       } else if (rtype == relocInfo::metadata_type) {
2936         __ mov_metadata(dst_reg, (Metadata*)con);
2937       } else {
2938         assert(rtype == relocInfo::none, "unexpected reloc type");
2939         if (con < (address)(uintptr_t)os::vm_page_size()) {
2940           __ mov(dst_reg, con);
2941         } else {
2942           unsigned long offset;
2943           __ adrp(dst_reg, con, offset);
2944           __ add(dst_reg, dst_reg, offset);
2945         }
2946       }
2947     }
2948   %}
2949 
2950   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
2951     MacroAssembler _masm(&cbuf);
2952     Register dst_reg = as_Register($dst$$reg);
2953     __ mov(dst_reg, zr);
2954   %}
2955 
2956   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
2957     MacroAssembler _masm(&cbuf);
2958     Register dst_reg = as_Register($dst$$reg);
2959     __ mov(dst_reg, (u_int64_t)1);
2960   %}
2961 
2962   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
2963     MacroAssembler _masm(&cbuf);
2964     address page = (address)$src$$constant;
2965     Register dst_reg = as_Register($dst$$reg);
2966     unsigned long off;
2967     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
2968     assert(off == 0, "assumed offset == 0");
2969   %}
2970 
2971   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
2972     MacroAssembler _masm(&cbuf);
2973     __ load_byte_map_base($dst$$Register);
2974   %}
2975 
2976   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
2977     MacroAssembler _masm(&cbuf);
2978     Register dst_reg = as_Register($dst$$reg);
2979     address con = (address)$src$$constant;
2980     if (con == NULL) {
2981       ShouldNotReachHere();
2982     } else {
2983       relocInfo::relocType rtype = $src->constant_reloc();
2984       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
2985       __ set_narrow_oop(dst_reg, (jobject)con);
2986     }
2987   %}
2988 
2989   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
2990     MacroAssembler _masm(&cbuf);
2991     Register dst_reg = as_Register($dst$$reg);
2992     __ mov(dst_reg, zr);
2993   %}
2994 
2995   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
2996     MacroAssembler _masm(&cbuf);
2997     Register dst_reg = as_Register($dst$$reg);
2998     address con = (address)$src$$constant;
2999     if (con == NULL) {
3000       ShouldNotReachHere();
3001     } else {
3002       relocInfo::relocType rtype = $src->constant_reloc();
3003       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3004       __ set_narrow_klass(dst_reg, (Klass *)con);
3005     }
3006   %}
3007 
3008   // arithmetic encodings
3009 
3010   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3011     MacroAssembler _masm(&cbuf);
3012     Register dst_reg = as_Register($dst$$reg);
3013     Register src_reg = as_Register($src1$$reg);
3014     int32_t con = (int32_t)$src2$$constant;
3015     // add has primary == 0, subtract has primary == 1
3016     if ($primary) { con = -con; }
3017     if (con < 0) {
3018       __ subw(dst_reg, src_reg, -con);
3019     } else {
3020       __ addw(dst_reg, src_reg, con);
3021     }
3022   %}
3023 
3024   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3025     MacroAssembler _masm(&cbuf);
3026     Register dst_reg = as_Register($dst$$reg);
3027     Register src_reg = as_Register($src1$$reg);
3028     int32_t con = (int32_t)$src2$$constant;
3029     // add has primary == 0, subtract has primary == 1
3030     if ($primary) { con = -con; }
3031     if (con < 0) {
3032       __ sub(dst_reg, src_reg, -con);
3033     } else {
3034       __ add(dst_reg, src_reg, con);
3035     }
3036   %}
3037 
3038   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3039     MacroAssembler _masm(&cbuf);
3040    Register dst_reg = as_Register($dst$$reg);
3041    Register src1_reg = as_Register($src1$$reg);
3042    Register src2_reg = as_Register($src2$$reg);
3043     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3044   %}
3045 
3046   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3047     MacroAssembler _masm(&cbuf);
3048    Register dst_reg = as_Register($dst$$reg);
3049    Register src1_reg = as_Register($src1$$reg);
3050    Register src2_reg = as_Register($src2$$reg);
3051     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3052   %}
3053 
3054   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3055     MacroAssembler _masm(&cbuf);
3056    Register dst_reg = as_Register($dst$$reg);
3057    Register src1_reg = as_Register($src1$$reg);
3058    Register src2_reg = as_Register($src2$$reg);
3059     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3060   %}
3061 
3062   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3063     MacroAssembler _masm(&cbuf);
3064    Register dst_reg = as_Register($dst$$reg);
3065    Register src1_reg = as_Register($src1$$reg);
3066    Register src2_reg = as_Register($src2$$reg);
3067     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3068   %}
3069 
3070   // compare instruction encodings
3071 
3072   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3073     MacroAssembler _masm(&cbuf);
3074     Register reg1 = as_Register($src1$$reg);
3075     Register reg2 = as_Register($src2$$reg);
3076     __ cmpw(reg1, reg2);
3077   %}
3078 
3079   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3080     MacroAssembler _masm(&cbuf);
3081     Register reg = as_Register($src1$$reg);
3082     int32_t val = $src2$$constant;
3083     if (val >= 0) {
3084       __ subsw(zr, reg, val);
3085     } else {
3086       __ addsw(zr, reg, -val);
3087     }
3088   %}
3089 
3090   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3091     MacroAssembler _masm(&cbuf);
3092     Register reg1 = as_Register($src1$$reg);
3093     u_int32_t val = (u_int32_t)$src2$$constant;
3094     __ movw(rscratch1, val);
3095     __ cmpw(reg1, rscratch1);
3096   %}
3097 
3098   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3099     MacroAssembler _masm(&cbuf);
3100     Register reg1 = as_Register($src1$$reg);
3101     Register reg2 = as_Register($src2$$reg);
3102     __ cmp(reg1, reg2);
3103   %}
3104 
3105   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3106     MacroAssembler _masm(&cbuf);
3107     Register reg = as_Register($src1$$reg);
3108     int64_t val = $src2$$constant;
3109     if (val >= 0) {
3110       __ subs(zr, reg, val);
3111     } else if (val != -val) {
3112       __ adds(zr, reg, -val);
3113     } else {
3114     // aargh, Long.MIN_VALUE is a special case
3115       __ orr(rscratch1, zr, (u_int64_t)val);
3116       __ subs(zr, reg, rscratch1);
3117     }
3118   %}
3119 
3120   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3121     MacroAssembler _masm(&cbuf);
3122     Register reg1 = as_Register($src1$$reg);
3123     u_int64_t val = (u_int64_t)$src2$$constant;
3124     __ mov(rscratch1, val);
3125     __ cmp(reg1, rscratch1);
3126   %}
3127 
3128   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3129     MacroAssembler _masm(&cbuf);
3130     Register reg1 = as_Register($src1$$reg);
3131     Register reg2 = as_Register($src2$$reg);
3132     __ cmp(reg1, reg2);
3133   %}
3134 
3135   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3136     MacroAssembler _masm(&cbuf);
3137     Register reg1 = as_Register($src1$$reg);
3138     Register reg2 = as_Register($src2$$reg);
3139     __ cmpw(reg1, reg2);
3140   %}
3141 
3142   enc_class aarch64_enc_testp(iRegP src) %{
3143     MacroAssembler _masm(&cbuf);
3144     Register reg = as_Register($src$$reg);
3145     __ cmp(reg, zr);
3146   %}
3147 
3148   enc_class aarch64_enc_testn(iRegN src) %{
3149     MacroAssembler _masm(&cbuf);
3150     Register reg = as_Register($src$$reg);
3151     __ cmpw(reg, zr);
3152   %}
3153 
3154   enc_class aarch64_enc_b(label lbl) %{
3155     MacroAssembler _masm(&cbuf);
3156     Label *L = $lbl$$label;
3157     __ b(*L);
3158   %}
3159 
3160   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3161     MacroAssembler _masm(&cbuf);
3162     Label *L = $lbl$$label;
3163     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3164   %}
3165 
3166   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3167     MacroAssembler _masm(&cbuf);
3168     Label *L = $lbl$$label;
3169     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3170   %}
3171 
3172   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3173   %{
3174      Register sub_reg = as_Register($sub$$reg);
3175      Register super_reg = as_Register($super$$reg);
3176      Register temp_reg = as_Register($temp$$reg);
3177      Register result_reg = as_Register($result$$reg);
3178 
3179      Label miss;
3180      MacroAssembler _masm(&cbuf);
3181      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3182                                      NULL, &miss,
3183                                      /*set_cond_codes:*/ true);
3184      if ($primary) {
3185        __ mov(result_reg, zr);
3186      }
3187      __ bind(miss);
3188   %}
3189 
3190   enc_class aarch64_enc_java_static_call(method meth) %{
3191     MacroAssembler _masm(&cbuf);
3192 
3193     address mark = __ pc();
3194     address addr = (address)$meth$$method;
3195     address call;
3196     if (!_method) {
3197       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3198       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3199     } else if (_optimized_virtual) {
3200       call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
3201     } else {
3202       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
3203     }
3204     if (call == NULL) {
3205       ciEnv::current()->record_failure("CodeCache is full"); 
3206       return;
3207     }
3208 
3209     if (_method) {
3210       // Emit stub for static call
3211       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
3212       if (stub == NULL) {
3213         ciEnv::current()->record_failure("CodeCache is full"); 
3214         return;
3215       }
3216     }
3217   %}
3218 
3219   enc_class aarch64_enc_java_handle_call(method meth) %{
3220     MacroAssembler _masm(&cbuf);
3221     relocInfo::relocType reloc;
3222 
3223     // RFP is preserved across all calls, even compiled calls.
3224     // Use it to preserve SP.
3225     __ mov(rfp, sp);
3226 
3227     address mark = __ pc();
3228     address addr = (address)$meth$$method;
3229     address call;
3230     if (!_method) {
3231       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3232       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3233     } else if (_optimized_virtual) {
3234       call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
3235     } else {
3236       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
3237     }
3238     if (call == NULL) {
3239       ciEnv::current()->record_failure("CodeCache is full"); 
3240       return;
3241     }
3242 
3243     if (_method) {
3244       // Emit stub for static call
3245       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
3246       if (stub == NULL) {
3247         ciEnv::current()->record_failure("CodeCache is full"); 
3248         return;
3249       }
3250     }
3251 
3252     // now restore sp
3253     __ mov(sp, rfp);
3254   %}
3255 
3256   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3257     MacroAssembler _masm(&cbuf);
3258     address call = __ ic_call((address)$meth$$method);
3259     if (call == NULL) {
3260       ciEnv::current()->record_failure("CodeCache is full"); 
3261       return;
3262     }
3263   %}
3264 
3265   enc_class aarch64_enc_call_epilog() %{
3266     MacroAssembler _masm(&cbuf);
3267     if (VerifyStackAtCalls) {
3268       // Check that stack depth is unchanged: find majik cookie on stack
3269       __ call_Unimplemented();
3270     }
3271   %}
3272 
3273   enc_class aarch64_enc_java_to_runtime(method meth) %{
3274     MacroAssembler _masm(&cbuf);
3275 
3276     // some calls to generated routines (arraycopy code) are scheduled
3277     // by C2 as runtime calls. if so we can call them using a br (they
3278     // will be in a reachable segment) otherwise we have to use a blr
3279     // which loads the absolute address into a register.
3280     address entry = (address)$meth$$method;
3281     CodeBlob *cb = CodeCache::find_blob(entry);
3282     if (cb) {
3283       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3284       if (call == NULL) {
3285         ciEnv::current()->record_failure("CodeCache is full"); 
3286         return;
3287       }
3288     } else {
3289       Label retaddr;
3290       __ adr(rscratch2, retaddr);
3291       __ lea(rscratch1, RuntimeAddress(entry));
3292       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3293       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3294       __ blr(rscratch1);
3295       __ bind(retaddr);
3296       __ add(sp, sp, 2 * wordSize);
3297     }
3298   %}
3299 
3300   enc_class aarch64_enc_rethrow() %{
3301     MacroAssembler _masm(&cbuf);
3302     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3303   %}
3304 
3305   enc_class aarch64_enc_ret() %{
3306     MacroAssembler _masm(&cbuf);
3307     __ ret(lr);
3308   %}
3309 
3310   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3311     MacroAssembler _masm(&cbuf);
3312     Register target_reg = as_Register($jump_target$$reg);
3313     __ br(target_reg);
3314   %}
3315 
3316   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3317     MacroAssembler _masm(&cbuf);
3318     Register target_reg = as_Register($jump_target$$reg);
3319     // exception oop should be in r0
3320     // ret addr has been popped into lr
3321     // callee expects it in r3
3322     __ mov(r3, lr);
3323     __ br(target_reg);
3324   %}
3325 
3326   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3327     MacroAssembler _masm(&cbuf);
3328     Register oop = as_Register($object$$reg);
3329     Register box = as_Register($box$$reg);
3330     Register disp_hdr = as_Register($tmp$$reg);
3331     Register tmp = as_Register($tmp2$$reg);
3332     Label cont;
3333     Label object_has_monitor;
3334     Label cas_failed;
3335 
3336     assert_different_registers(oop, box, tmp, disp_hdr);
3337 
3338     // Load markOop from object into displaced_header.
3339     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3340 
3341     // Always do locking in runtime.
3342     if (EmitSync & 0x01) {
3343       __ cmp(oop, zr);
3344       return;
3345     }
3346     
3347     if (UseBiasedLocking && !UseOptoBiasInlining) {
3348       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
3349     }
3350 
3351     // Handle existing monitor
3352     if ((EmitSync & 0x02) == 0) {
3353       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3354     }
3355 
3356     // Set tmp to be (markOop of object | UNLOCK_VALUE).
3357     __ orr(tmp, disp_hdr, markOopDesc::unlocked_value);
3358 
3359     // Load Compare Value application register.
3360 
3361     // Initialize the box. (Must happen before we update the object mark!)
3362     __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3363 
3364     // Compare object markOop with an unlocked value (tmp) and if
3365     // equal exchange the stack address of our box with object markOop.
3366     // On failure disp_hdr contains the possibly locked markOop.
3367     if (UseLSE) {
3368       __ mov(disp_hdr, tmp);
3369       __ casal(Assembler::xword, disp_hdr, box, oop);  // Updates disp_hdr
3370       __ cmp(tmp, disp_hdr);
3371       __ br(Assembler::EQ, cont);
3372     } else {
3373       Label retry_load;
3374       if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
3375         __ prfm(Address(oop), PSTL1STRM);
3376       __ bind(retry_load);
3377       __ ldaxr(disp_hdr, oop);
3378       __ cmp(tmp, disp_hdr);
3379       __ br(Assembler::NE, cas_failed);
3380       // use stlxr to ensure update is immediately visible
3381       __ stlxr(disp_hdr, box, oop);
3382       __ cbzw(disp_hdr, cont);
3383       __ b(retry_load);
3384     }
3385 
3386     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3387 
3388     // If the compare-and-exchange succeeded, then we found an unlocked
3389     // object, will have now locked it will continue at label cont
3390 
3391     __ bind(cas_failed);
3392     // We did not see an unlocked object so try the fast recursive case.
3393 
3394     // Check if the owner is self by comparing the value in the
3395     // markOop of object (disp_hdr) with the stack pointer.
3396     __ mov(rscratch1, sp);
3397     __ sub(disp_hdr, disp_hdr, rscratch1);
3398     __ mov(tmp, (address) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
3399     // If condition is true we are cont and hence we can store 0 as the
3400     // displaced header in the box, which indicates that it is a recursive lock.
3401     __ ands(tmp/*==0?*/, disp_hdr, tmp);
3402     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3403 
3404     // Handle existing monitor.
3405     if ((EmitSync & 0x02) == 0) {
3406       __ b(cont);
3407 
3408       __ bind(object_has_monitor);
3409       // The object's monitor m is unlocked iff m->owner == NULL,
3410       // otherwise m->owner may contain a thread or a stack address.
3411       //
3412       // Try to CAS m->owner from NULL to current thread.
3413       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
3414       __ mov(disp_hdr, zr);
3415 
3416       if (UseLSE) {
3417         __ mov(rscratch1, disp_hdr);
3418         __ casal(Assembler::xword, rscratch1, rthread, tmp);
3419         __ cmp(rscratch1, disp_hdr);
3420       } else {
3421         Label retry_load, fail;
3422         if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
3423           __ prfm(Address(tmp), PSTL1STRM);
3424         __ bind(retry_load);
3425         __ ldaxr(rscratch1, tmp);
3426         __ cmp(disp_hdr, rscratch1);
3427         __ br(Assembler::NE, fail);
3428         // use stlxr to ensure update is immediately visible
3429         __ stlxr(rscratch1, rthread, tmp);
3430         __ cbnzw(rscratch1, retry_load);
3431         __ bind(fail);
3432       }
3433 
3434       // Store a non-null value into the box to avoid looking like a re-entrant
3435       // lock. The fast-path monitor unlock code checks for
3436       // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
3437       // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
3438       __ mov(tmp, (address)markOopDesc::unused_mark());
3439       __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3440     }
3441 
3442     __ bind(cont);
3443     // flag == EQ indicates success
3444     // flag == NE indicates failure
3445   %}
3446 
3447   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3448     MacroAssembler _masm(&cbuf);
3449     Register oop = as_Register($object$$reg);
3450     Register box = as_Register($box$$reg);
3451     Register disp_hdr = as_Register($tmp$$reg);
3452     Register tmp = as_Register($tmp2$$reg);
3453     Label cont;
3454     Label object_has_monitor;
3455 
3456     assert_different_registers(oop, box, tmp, disp_hdr);
3457 
3458     // Always do locking in runtime.
3459     if (EmitSync & 0x01) {
3460       __ cmp(oop, zr); // Oop can't be 0 here => always false.
3461       return;
3462     }
3463 
3464     if (UseBiasedLocking && !UseOptoBiasInlining) {
3465       __ biased_locking_exit(oop, tmp, cont);
3466     }
3467 
3468     // Find the lock address and load the displaced header from the stack.
3469     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3470 
3471     // If the displaced header is 0, we have a recursive unlock.
3472     __ cmp(disp_hdr, zr);
3473     __ br(Assembler::EQ, cont);
3474 
3475     // Handle existing monitor.
3476     if ((EmitSync & 0x02) == 0) {
3477       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
3478       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3479     }
3480 
3481     // Check if it is still a light weight lock, this is is true if we
3482     // see the stack address of the basicLock in the markOop of the
3483     // object.
3484 
3485     if (UseLSE) {
3486       __ mov(tmp, box);
3487       __ casl(Assembler::xword, tmp, disp_hdr, oop);
3488       __ cmp(tmp, box);
3489       __ b(cont);
3490     } else {
3491       Label retry_load;
3492       if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
3493         __ prfm(Address(oop), PSTL1STRM);
3494       __ bind(retry_load);
3495       __ ldxr(tmp, oop);
3496       __ cmp(box, tmp);
3497       __ br(Assembler::NE, cont);
3498       // use stlxr to ensure update is immediately visible
3499       __ stlxr(tmp, disp_hdr, oop);
3500       __ cbzw(tmp, cont);
3501       __ b(retry_load);
3502     }
3503 
3504     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3505 
3506     // Handle existing monitor.
3507     if ((EmitSync & 0x02) == 0) {
3508       __ bind(object_has_monitor);
3509       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
3510       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3511       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
3512       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
3513       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
3514       __ cmp(rscratch1, zr);
3515       __ br(Assembler::NE, cont);
3516 
3517       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
3518       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
3519       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
3520       __ cmp(rscratch1, zr);
3521       __ br(Assembler::NE, cont);
3522       // need a release store here
3523       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3524       __ stlr(zr, tmp); // set unowned
3525     }
3526 
3527     __ bind(cont);
3528     // flag == EQ indicates success
3529     // flag == NE indicates failure
3530   %}
3531 
3532 %}
3533 
3534 //----------FRAME--------------------------------------------------------------
3535 // Definition of frame structure and management information.
3536 //
3537 //  S T A C K   L A Y O U T    Allocators stack-slot number
3538 //                             |   (to get allocators register number
3539 //  G  Owned by    |        |  v    add OptoReg::stack0())
3540 //  r   CALLER     |        |
3541 //  o     |        +--------+      pad to even-align allocators stack-slot
3542 //  w     V        |  pad0  |        numbers; owned by CALLER
3543 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3544 //  h     ^        |   in   |  5
3545 //        |        |  args  |  4   Holes in incoming args owned by SELF
3546 //  |     |        |        |  3
3547 //  |     |        +--------+
3548 //  V     |        | old out|      Empty on Intel, window on Sparc
3549 //        |    old |preserve|      Must be even aligned.
3550 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3551 //        |        |   in   |  3   area for Intel ret address
3552 //     Owned by    |preserve|      Empty on Sparc.
3553 //       SELF      +--------+
3554 //        |        |  pad2  |  2   pad to align old SP
3555 //        |        +--------+  1
3556 //        |        | locks  |  0
3557 //        |        +--------+----> OptoReg::stack0(), even aligned
3558 //        |        |  pad1  | 11   pad to align new SP
3559 //        |        +--------+
3560 //        |        |        | 10
3561 //        |        | spills |  9   spills
3562 //        V        |        |  8   (pad0 slot for callee)
3563 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3564 //        ^        |  out   |  7
3565 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3566 //     Owned by    +--------+
3567 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3568 //        |    new |preserve|      Must be even-aligned.
3569 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3570 //        |        |        |
3571 //
3572 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3573 //         known from SELF's arguments and the Java calling convention.
3574 //         Region 6-7 is determined per call site.
3575 // Note 2: If the calling convention leaves holes in the incoming argument
3576 //         area, those holes are owned by SELF.  Holes in the outgoing area
3577 //         are owned by the CALLEE.  Holes should not be nessecary in the
3578 //         incoming area, as the Java calling convention is completely under
3579 //         the control of the AD file.  Doubles can be sorted and packed to
3580 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3581 //         varargs C calling conventions.
3582 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3583 //         even aligned with pad0 as needed.
3584 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3585 //           (the latter is true on Intel but is it false on AArch64?)
3586 //         region 6-11 is even aligned; it may be padded out more so that
3587 //         the region from SP to FP meets the minimum stack alignment.
3588 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3589 //         alignment.  Region 11, pad1, may be dynamically extended so that
3590 //         SP meets the minimum alignment.
3591 
3592 frame %{
3593   // What direction does stack grow in (assumed to be same for C & Java)
3594   stack_direction(TOWARDS_LOW);
3595 
3596   // These three registers define part of the calling convention
3597   // between compiled code and the interpreter.
3598 
3599   // Inline Cache Register or methodOop for I2C.
3600   inline_cache_reg(R12);
3601 
3602   // Method Oop Register when calling interpreter.
3603   interpreter_method_oop_reg(R12);
3604 
3605   // Number of stack slots consumed by locking an object
3606   sync_stack_slots(2);
3607 
3608   // Compiled code's Frame Pointer
3609   frame_pointer(R31);
3610 
3611   // Interpreter stores its frame pointer in a register which is
3612   // stored to the stack by I2CAdaptors.
3613   // I2CAdaptors convert from interpreted java to compiled java.
3614   interpreter_frame_pointer(R29);
3615 
3616   // Stack alignment requirement
3617   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3618 
3619   // Number of stack slots between incoming argument block and the start of
3620   // a new frame.  The PROLOG must add this many slots to the stack.  The
3621   // EPILOG must remove this many slots. aarch64 needs two slots for
3622   // return address and fp.
3623   // TODO think this is correct but check
3624   in_preserve_stack_slots(4);
3625 
3626   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3627   // for calls to C.  Supports the var-args backing area for register parms.
3628   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3629 
3630   // The after-PROLOG location of the return address.  Location of
3631   // return address specifies a type (REG or STACK) and a number
3632   // representing the register number (i.e. - use a register name) or
3633   // stack slot.
3634   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3635   // Otherwise, it is above the locks and verification slot and alignment word
3636   // TODO this may well be correct but need to check why that - 2 is there
3637   // ppc port uses 0 but we definitely need to allow for fixed_slots
3638   // which folds in the space used for monitors
3639   return_addr(STACK - 2 +
3640               round_to((Compile::current()->in_preserve_stack_slots() +
3641                         Compile::current()->fixed_slots()),
3642                        stack_alignment_in_slots()));
3643 
3644   // Body of function which returns an integer array locating
3645   // arguments either in registers or in stack slots.  Passed an array
3646   // of ideal registers called "sig" and a "length" count.  Stack-slot
3647   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3648   // arguments for a CALLEE.  Incoming stack arguments are
3649   // automatically biased by the preserve_stack_slots field above.
3650 
3651   calling_convention
3652   %{
3653     // No difference between ingoing/outgoing just pass false
3654     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3655   %}
3656 
3657   c_calling_convention
3658   %{
3659     // This is obviously always outgoing
3660     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
3661   %}
3662 
3663   // Location of compiled Java return values.  Same as C for now.
3664   return_value
3665   %{
3666     // TODO do we allow ideal_reg == Op_RegN???
3667     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3668            "only return normal values");
3669 
3670     static const int lo[Op_RegL + 1] = { // enum name
3671       0,                                 // Op_Node
3672       0,                                 // Op_Set
3673       R0_num,                            // Op_RegN
3674       R0_num,                            // Op_RegI
3675       R0_num,                            // Op_RegP
3676       V0_num,                            // Op_RegF
3677       V0_num,                            // Op_RegD
3678       R0_num                             // Op_RegL
3679     };
3680   
3681     static const int hi[Op_RegL + 1] = { // enum name
3682       0,                                 // Op_Node
3683       0,                                 // Op_Set
3684       OptoReg::Bad,                       // Op_RegN
3685       OptoReg::Bad,                      // Op_RegI
3686       R0_H_num,                          // Op_RegP
3687       OptoReg::Bad,                      // Op_RegF
3688       V0_H_num,                          // Op_RegD
3689       R0_H_num                           // Op_RegL
3690     };
3691 
3692     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3693   %}
3694 %}
3695 
3696 //----------ATTRIBUTES---------------------------------------------------------
3697 //----------Operand Attributes-------------------------------------------------
3698 op_attrib op_cost(1);        // Required cost attribute
3699 
3700 //----------Instruction Attributes---------------------------------------------
3701 ins_attrib ins_cost(INSN_COST); // Required cost attribute
3702 ins_attrib ins_size(32);        // Required size attribute (in bits)
3703 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3704                                 // a non-matching short branch variant
3705                                 // of some long branch?
3706 ins_attrib ins_alignment(4);    // Required alignment attribute (must
3707                                 // be a power of 2) specifies the
3708                                 // alignment that some part of the
3709                                 // instruction (not necessarily the
3710                                 // start) requires.  If > 1, a
3711                                 // compute_padding() function must be
3712                                 // provided for the instruction
3713 
3714 //----------OPERANDS-----------------------------------------------------------
3715 // Operand definitions must precede instruction definitions for correct parsing
3716 // in the ADLC because operands constitute user defined types which are used in
3717 // instruction definitions.
3718 
3719 //----------Simple Operands----------------------------------------------------
3720 
3721 // Integer operands 32 bit
3722 // 32 bit immediate
3723 operand immI()
3724 %{
3725   match(ConI);
3726 
3727   op_cost(0);
3728   format %{ %}
3729   interface(CONST_INTER);
3730 %}
3731 
3732 // 32 bit zero
3733 operand immI0()
3734 %{
3735   predicate(n->get_int() == 0);
3736   match(ConI);
3737 
3738   op_cost(0);
3739   format %{ %}
3740   interface(CONST_INTER);
3741 %}
3742 
3743 // 32 bit unit increment
3744 operand immI_1()
3745 %{
3746   predicate(n->get_int() == 1);
3747   match(ConI);
3748 
3749   op_cost(0);
3750   format %{ %}
3751   interface(CONST_INTER);
3752 %}
3753 
3754 // 32 bit unit decrement
3755 operand immI_M1()
3756 %{
3757   predicate(n->get_int() == -1);
3758   match(ConI);
3759 
3760   op_cost(0);
3761   format %{ %}
3762   interface(CONST_INTER);
3763 %}
3764 
3765 operand immI_le_4()
3766 %{
3767   predicate(n->get_int() <= 4);
3768   match(ConI);
3769 
3770   op_cost(0);
3771   format %{ %}
3772   interface(CONST_INTER);
3773 %}
3774 
3775 operand immI_31()
3776 %{
3777   predicate(n->get_int() == 31);
3778   match(ConI);
3779 
3780   op_cost(0);
3781   format %{ %}
3782   interface(CONST_INTER);
3783 %}
3784 
3785 operand immI_8()
3786 %{
3787   predicate(n->get_int() == 8);
3788   match(ConI);
3789 
3790   op_cost(0);
3791   format %{ %}
3792   interface(CONST_INTER);
3793 %}
3794 
3795 operand immI_16()
3796 %{
3797   predicate(n->get_int() == 16);
3798   match(ConI);
3799 
3800   op_cost(0);
3801   format %{ %}
3802   interface(CONST_INTER);
3803 %}
3804 
3805 operand immI_24()
3806 %{
3807   predicate(n->get_int() == 24);
3808   match(ConI);
3809 
3810   op_cost(0);
3811   format %{ %}
3812   interface(CONST_INTER);
3813 %}
3814 
3815 operand immI_32()
3816 %{
3817   predicate(n->get_int() == 32);
3818   match(ConI);
3819 
3820   op_cost(0);
3821   format %{ %}
3822   interface(CONST_INTER);
3823 %}
3824 
3825 operand immI_48()
3826 %{
3827   predicate(n->get_int() == 48);
3828   match(ConI);
3829 
3830   op_cost(0);
3831   format %{ %}
3832   interface(CONST_INTER);
3833 %}
3834 
3835 operand immI_56()
3836 %{
3837   predicate(n->get_int() == 56);
3838   match(ConI);
3839 
3840   op_cost(0);
3841   format %{ %}
3842   interface(CONST_INTER);
3843 %}
3844 
3845 operand immI_64()
3846 %{
3847   predicate(n->get_int() == 64);
3848   match(ConI);
3849 
3850   op_cost(0);
3851   format %{ %}
3852   interface(CONST_INTER);
3853 %}
3854 
3855 operand immI_255()
3856 %{
3857   predicate(n->get_int() == 255);
3858   match(ConI);
3859 
3860   op_cost(0);
3861   format %{ %}
3862   interface(CONST_INTER);
3863 %}
3864 
3865 operand immI_65535()
3866 %{
3867   predicate(n->get_int() == 65535);
3868   match(ConI);
3869 
3870   op_cost(0);
3871   format %{ %}
3872   interface(CONST_INTER);
3873 %}
3874 
3875 operand immL_63()
3876 %{
3877   predicate(n->get_int() == 63);
3878   match(ConI);
3879 
3880   op_cost(0);
3881   format %{ %}
3882   interface(CONST_INTER);
3883 %}
3884 
3885 operand immL_255()
3886 %{
3887   predicate(n->get_int() == 255);
3888   match(ConI);
3889 
3890   op_cost(0);
3891   format %{ %}
3892   interface(CONST_INTER);
3893 %}
3894 
3895 operand immL_65535()
3896 %{
3897   predicate(n->get_long() == 65535L);
3898   match(ConL);
3899 
3900   op_cost(0);
3901   format %{ %}
3902   interface(CONST_INTER);
3903 %}
3904 
3905 operand immL_4294967295()
3906 %{
3907   predicate(n->get_long() == 4294967295L);
3908   match(ConL);
3909 
3910   op_cost(0);
3911   format %{ %}
3912   interface(CONST_INTER);
3913 %}
3914 
3915 operand immL_bitmask()
3916 %{
3917   predicate((n->get_long() != 0)
3918             && ((n->get_long() & 0xc000000000000000l) == 0)
3919             && is_power_of_2(n->get_long() + 1));
3920   match(ConL);
3921 
3922   op_cost(0);
3923   format %{ %}
3924   interface(CONST_INTER);
3925 %}
3926 
3927 operand immI_bitmask()
3928 %{
3929   predicate((n->get_int() != 0)
3930             && ((n->get_int() & 0xc0000000) == 0)
3931             && is_power_of_2(n->get_int() + 1));
3932   match(ConI);
3933 
3934   op_cost(0);
3935   format %{ %}
3936   interface(CONST_INTER);
3937 %}
3938 
3939 // Scale values for scaled offset addressing modes (up to long but not quad)
3940 operand immIScale()
3941 %{
3942   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3943   match(ConI);
3944 
3945   op_cost(0);
3946   format %{ %}
3947   interface(CONST_INTER);
3948 %}
3949 
3950 // 26 bit signed offset -- for pc-relative branches
3951 operand immI26()
3952 %{
3953   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
3954   match(ConI);
3955 
3956   op_cost(0);
3957   format %{ %}
3958   interface(CONST_INTER);
3959 %}
3960 
3961 // 19 bit signed offset -- for pc-relative loads
3962 operand immI19()
3963 %{
3964   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
3965   match(ConI);
3966 
3967   op_cost(0);
3968   format %{ %}
3969   interface(CONST_INTER);
3970 %}
3971 
3972 // 12 bit unsigned offset -- for base plus immediate loads
3973 operand immIU12()
3974 %{
3975   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
3976   match(ConI);
3977 
3978   op_cost(0);
3979   format %{ %}
3980   interface(CONST_INTER);
3981 %}
3982 
3983 operand immLU12()
3984 %{
3985   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
3986   match(ConL);
3987 
3988   op_cost(0);
3989   format %{ %}
3990   interface(CONST_INTER);
3991 %}
3992 
3993 // Offset for scaled or unscaled immediate loads and stores
3994 operand immIOffset()
3995 %{
3996   predicate(Address::offset_ok_for_immed(n->get_int()));
3997   match(ConI);
3998 
3999   op_cost(0);
4000   format %{ %}
4001   interface(CONST_INTER);
4002 %}
4003 
4004 operand immIOffset4()
4005 %{
4006   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
4007   match(ConI);
4008 
4009   op_cost(0);
4010   format %{ %}
4011   interface(CONST_INTER);
4012 %}
4013 
4014 operand immIOffset8()
4015 %{
4016   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
4017   match(ConI);
4018 
4019   op_cost(0);
4020   format %{ %}
4021   interface(CONST_INTER);
4022 %}
4023 
4024 operand immIOffset16()
4025 %{
4026   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
4027   match(ConI);
4028 
4029   op_cost(0);
4030   format %{ %}
4031   interface(CONST_INTER);
4032 %}
4033 
4034 operand immLoffset()
4035 %{
4036   predicate(Address::offset_ok_for_immed(n->get_long()));
4037   match(ConL);
4038 
4039   op_cost(0);
4040   format %{ %}
4041   interface(CONST_INTER);
4042 %}
4043 
4044 operand immLoffset4()
4045 %{
4046   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
4047   match(ConL);
4048 
4049   op_cost(0);
4050   format %{ %}
4051   interface(CONST_INTER);
4052 %}
4053 
4054 operand immLoffset8()
4055 %{
4056   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
4057   match(ConL);
4058 
4059   op_cost(0);
4060   format %{ %}
4061   interface(CONST_INTER);
4062 %}
4063 
4064 operand immLoffset16()
4065 %{
4066   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
4067   match(ConL);
4068 
4069   op_cost(0);
4070   format %{ %}
4071   interface(CONST_INTER);
4072 %}
4073 
4074 // 32 bit integer valid for add sub immediate
4075 operand immIAddSub()
4076 %{
4077   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4078   match(ConI);
4079   op_cost(0);
4080   format %{ %}
4081   interface(CONST_INTER);
4082 %}
4083 
4084 // 32 bit unsigned integer valid for logical immediate
4085 // TODO -- check this is right when e.g the mask is 0x80000000
4086 operand immILog()
4087 %{
4088   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4089   match(ConI);
4090 
4091   op_cost(0);
4092   format %{ %}
4093   interface(CONST_INTER);
4094 %}
4095 
4096 // Integer operands 64 bit
4097 // 64 bit immediate
4098 operand immL()
4099 %{
4100   match(ConL);
4101 
4102   op_cost(0);
4103   format %{ %}
4104   interface(CONST_INTER);
4105 %}
4106 
4107 // 64 bit zero
4108 operand immL0()
4109 %{
4110   predicate(n->get_long() == 0);
4111   match(ConL);
4112 
4113   op_cost(0);
4114   format %{ %}
4115   interface(CONST_INTER);
4116 %}
4117 
4118 // 64 bit unit increment
4119 operand immL_1()
4120 %{
4121   predicate(n->get_long() == 1);
4122   match(ConL);
4123 
4124   op_cost(0);
4125   format %{ %}
4126   interface(CONST_INTER);
4127 %}
4128 
4129 // 64 bit unit decrement
4130 operand immL_M1()
4131 %{
4132   predicate(n->get_long() == -1);
4133   match(ConL);
4134 
4135   op_cost(0);
4136   format %{ %}
4137   interface(CONST_INTER);
4138 %}
4139 
4140 // 32 bit offset of pc in thread anchor
4141 
4142 operand immL_pc_off()
4143 %{
4144   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4145                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4146   match(ConL);
4147 
4148   op_cost(0);
4149   format %{ %}
4150   interface(CONST_INTER);
4151 %}
4152 
4153 // 64 bit integer valid for add sub immediate
4154 operand immLAddSub()
4155 %{
4156   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4157   match(ConL);
4158   op_cost(0);
4159   format %{ %}
4160   interface(CONST_INTER);
4161 %}
4162 
4163 // 64 bit integer valid for logical immediate
4164 operand immLLog()
4165 %{
4166   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4167   match(ConL);
4168   op_cost(0);
4169   format %{ %}
4170   interface(CONST_INTER);
4171 %}
4172 
4173 // Long Immediate: low 32-bit mask
4174 operand immL_32bits()
4175 %{
4176   predicate(n->get_long() == 0xFFFFFFFFL);
4177   match(ConL);
4178   op_cost(0);
4179   format %{ %}
4180   interface(CONST_INTER);
4181 %}
4182 
4183 // Pointer operands
4184 // Pointer Immediate
4185 operand immP()
4186 %{
4187   match(ConP);
4188 
4189   op_cost(0);
4190   format %{ %}
4191   interface(CONST_INTER);
4192 %}
4193 
4194 // NULL Pointer Immediate
4195 operand immP0()
4196 %{
4197   predicate(n->get_ptr() == 0);
4198   match(ConP);
4199 
4200   op_cost(0);
4201   format %{ %}
4202   interface(CONST_INTER);
4203 %}
4204 
4205 // Pointer Immediate One
4206 // this is used in object initialization (initial object header)
4207 operand immP_1()
4208 %{
4209   predicate(n->get_ptr() == 1);
4210   match(ConP);
4211 
4212   op_cost(0);
4213   format %{ %}
4214   interface(CONST_INTER);
4215 %}
4216 
4217 // Polling Page Pointer Immediate
4218 operand immPollPage()
4219 %{
4220   predicate((address)n->get_ptr() == os::get_polling_page());
4221   match(ConP);
4222 
4223   op_cost(0);
4224   format %{ %}
4225   interface(CONST_INTER);
4226 %}
4227 
4228 // Card Table Byte Map Base
4229 operand immByteMapBase()
4230 %{
4231   // Get base of card map
4232   predicate(!UseShenandoahGC && // TODO: Should really check for BS::is_a, see JDK-8193193
4233     (jbyte*)n->get_ptr() == ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
4234   match(ConP);
4235 
4236   op_cost(0);
4237   format %{ %}
4238   interface(CONST_INTER);
4239 %}
4240 
4241 // Pointer Immediate Minus One
4242 // this is used when we want to write the current PC to the thread anchor
4243 operand immP_M1()
4244 %{
4245   predicate(n->get_ptr() == -1);
4246   match(ConP);
4247 
4248   op_cost(0);
4249   format %{ %}
4250   interface(CONST_INTER);
4251 %}
4252 
4253 // Pointer Immediate Minus Two
4254 // this is used when we want to write the current PC to the thread anchor
4255 operand immP_M2()
4256 %{
4257   predicate(n->get_ptr() == -2);
4258   match(ConP);
4259 
4260   op_cost(0);
4261   format %{ %}
4262   interface(CONST_INTER);
4263 %}
4264 
4265 // Float and Double operands
4266 // Double Immediate
4267 operand immD()
4268 %{
4269   match(ConD);
4270   op_cost(0);
4271   format %{ %}
4272   interface(CONST_INTER);
4273 %}
4274 
4275 // constant 'double +0.0'.
4276 operand immD0()
4277 %{
4278   predicate((n->getd() == 0) &&
4279             (fpclassify(n->getd()) == FP_ZERO) && (signbit(n->getd()) == 0));
4280   match(ConD);
4281   op_cost(0);
4282   format %{ %}
4283   interface(CONST_INTER);
4284 %}
4285 
4286 // constant 'double +0.0'.
4287 operand immDPacked()
4288 %{
4289   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4290   match(ConD);
4291   op_cost(0);
4292   format %{ %}
4293   interface(CONST_INTER);
4294 %}
4295 
4296 // Float Immediate
4297 operand immF()
4298 %{
4299   match(ConF);
4300   op_cost(0);
4301   format %{ %}
4302   interface(CONST_INTER);
4303 %}
4304 
4305 // constant 'float +0.0'.
4306 operand immF0()
4307 %{
4308   predicate((n->getf() == 0) &&
4309             (fpclassify(n->getf()) == FP_ZERO) && (signbit(n->getf()) == 0));
4310   match(ConF);
4311   op_cost(0);
4312   format %{ %}
4313   interface(CONST_INTER);
4314 %}
4315 
4316 // 
4317 operand immFPacked()
4318 %{
4319   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4320   match(ConF);
4321   op_cost(0);
4322   format %{ %}
4323   interface(CONST_INTER);
4324 %}
4325 
4326 // Narrow pointer operands
4327 // Narrow Pointer Immediate
4328 operand immN()
4329 %{
4330   match(ConN);
4331 
4332   op_cost(0);
4333   format %{ %}
4334   interface(CONST_INTER);
4335 %}
4336 
4337 // Narrow NULL Pointer Immediate
4338 operand immN0()
4339 %{
4340   predicate(n->get_narrowcon() == 0);
4341   match(ConN);
4342 
4343   op_cost(0);
4344   format %{ %}
4345   interface(CONST_INTER);
4346 %}
4347 
4348 operand immNKlass()
4349 %{
4350   match(ConNKlass);
4351 
4352   op_cost(0);
4353   format %{ %}
4354   interface(CONST_INTER);
4355 %}
4356 
4357 // Integer 32 bit Register Operands
4358 // Integer 32 bitRegister (excludes SP)
4359 operand iRegI()
4360 %{
4361   constraint(ALLOC_IN_RC(any_reg32));
4362   match(RegI);
4363   match(iRegINoSp);
4364   op_cost(0);
4365   format %{ %}
4366   interface(REG_INTER);
4367 %}
4368 
4369 // Integer 32 bit Register not Special
4370 operand iRegINoSp()
4371 %{
4372   constraint(ALLOC_IN_RC(no_special_reg32));
4373   match(RegI);
4374   op_cost(0);
4375   format %{ %}
4376   interface(REG_INTER);
4377 %}
4378 
4379 // Integer 64 bit Register Operands
4380 // Integer 64 bit Register (includes SP)
4381 operand iRegL()
4382 %{
4383   constraint(ALLOC_IN_RC(any_reg));
4384   match(RegL);
4385   match(iRegLNoSp);
4386   op_cost(0);
4387   format %{ %}
4388   interface(REG_INTER);
4389 %}
4390 
4391 // Integer 64 bit Register not Special
4392 operand iRegLNoSp()
4393 %{
4394   constraint(ALLOC_IN_RC(no_special_reg));
4395   match(RegL);
4396   format %{ %}
4397   interface(REG_INTER);
4398 %}
4399 
4400 // Pointer Register Operands
4401 // Pointer Register
4402 operand iRegP()
4403 %{
4404   constraint(ALLOC_IN_RC(ptr_reg));
4405   match(RegP);
4406   match(iRegPNoSp);
4407   match(iRegP_R0);
4408   //match(iRegP_R2);
4409   //match(iRegP_R4);
4410   //match(iRegP_R5);
4411   match(thread_RegP);
4412   op_cost(0);
4413   format %{ %}
4414   interface(REG_INTER);
4415 %}
4416 
4417 // Pointer 64 bit Register not Special
4418 operand iRegPNoSp()
4419 %{
4420   constraint(ALLOC_IN_RC(no_special_ptr_reg));
4421   match(RegP);
4422   // match(iRegP);
4423   // match(iRegP_R0);
4424   // match(iRegP_R2);
4425   // match(iRegP_R4);
4426   // match(iRegP_R5);
4427   // match(thread_RegP);
4428   op_cost(0);
4429   format %{ %}
4430   interface(REG_INTER);
4431 %}
4432 
4433 // Pointer 64 bit Register R0 only
4434 operand iRegP_R0()
4435 %{
4436   constraint(ALLOC_IN_RC(r0_reg));
4437   match(RegP);
4438   // match(iRegP);
4439   match(iRegPNoSp);
4440   op_cost(0);
4441   format %{ %}
4442   interface(REG_INTER);
4443 %}
4444 
4445 // Pointer 64 bit Register R1 only
4446 operand iRegP_R1()
4447 %{
4448   constraint(ALLOC_IN_RC(r1_reg));
4449   match(RegP);
4450   // match(iRegP);
4451   match(iRegPNoSp);
4452   op_cost(0);
4453   format %{ %}
4454   interface(REG_INTER);
4455 %}
4456 
4457 // Pointer 64 bit Register R2 only
4458 operand iRegP_R2()
4459 %{
4460   constraint(ALLOC_IN_RC(r2_reg));
4461   match(RegP);
4462   // match(iRegP);
4463   match(iRegPNoSp);
4464   op_cost(0);
4465   format %{ %}
4466   interface(REG_INTER);
4467 %}
4468 
4469 // Pointer 64 bit Register R3 only
4470 operand iRegP_R3()
4471 %{
4472   constraint(ALLOC_IN_RC(r3_reg));
4473   match(RegP);
4474   // match(iRegP);
4475   match(iRegPNoSp);
4476   op_cost(0);
4477   format %{ %}
4478   interface(REG_INTER);
4479 %}
4480 
4481 // Pointer 64 bit Register R4 only
4482 operand iRegP_R4()
4483 %{
4484   constraint(ALLOC_IN_RC(r4_reg));
4485   match(RegP);
4486   // match(iRegP);
4487   match(iRegPNoSp);
4488   op_cost(0);
4489   format %{ %}
4490   interface(REG_INTER);
4491 %}
4492 
4493 // Pointer 64 bit Register R5 only
4494 operand iRegP_R5()
4495 %{
4496   constraint(ALLOC_IN_RC(r5_reg));
4497   match(RegP);
4498   // match(iRegP);
4499   match(iRegPNoSp);
4500   op_cost(0);
4501   format %{ %}
4502   interface(REG_INTER);
4503 %}
4504 
4505 // Pointer 64 bit Register R10 only
4506 operand iRegP_R10()
4507 %{
4508   constraint(ALLOC_IN_RC(r10_reg));
4509   match(RegP);
4510   // match(iRegP);
4511   match(iRegPNoSp);
4512   op_cost(0);
4513   format %{ %}
4514   interface(REG_INTER);
4515 %}
4516 
4517 // Long 64 bit Register R11 only
4518 operand iRegL_R11()
4519 %{
4520   constraint(ALLOC_IN_RC(r11_reg));
4521   match(RegL);
4522   match(iRegLNoSp);
4523   op_cost(0);
4524   format %{ %}
4525   interface(REG_INTER);
4526 %}
4527 
4528 // Pointer 64 bit Register FP only
4529 operand iRegP_FP()
4530 %{
4531   constraint(ALLOC_IN_RC(fp_reg));
4532   match(RegP);
4533   // match(iRegP);
4534   op_cost(0);
4535   format %{ %}
4536   interface(REG_INTER);
4537 %}
4538 
4539 // Register R0 only
4540 operand iRegI_R0()
4541 %{
4542   constraint(ALLOC_IN_RC(int_r0_reg));
4543   match(RegI);
4544   match(iRegINoSp);
4545   op_cost(0);
4546   format %{ %}
4547   interface(REG_INTER);
4548 %}
4549 
4550 // Register R2 only
4551 operand iRegI_R2()
4552 %{
4553   constraint(ALLOC_IN_RC(int_r2_reg));
4554   match(RegI);
4555   match(iRegINoSp);
4556   op_cost(0);
4557   format %{ %}
4558   interface(REG_INTER);
4559 %}
4560 
4561 // Register R3 only
4562 operand iRegI_R3()
4563 %{
4564   constraint(ALLOC_IN_RC(int_r3_reg));
4565   match(RegI);
4566   match(iRegINoSp);
4567   op_cost(0);
4568   format %{ %}
4569   interface(REG_INTER);
4570 %}
4571 
4572 
4573 // Register R2 only
4574 operand iRegI_R4()
4575 %{
4576   constraint(ALLOC_IN_RC(int_r4_reg));
4577   match(RegI);
4578   match(iRegINoSp);
4579   op_cost(0);
4580   format %{ %}
4581   interface(REG_INTER);
4582 %}
4583 
4584 
4585 // Pointer Register Operands
4586 // Narrow Pointer Register
4587 operand iRegN()
4588 %{
4589   constraint(ALLOC_IN_RC(any_reg32));
4590   match(RegN);
4591   match(iRegNNoSp);
4592   op_cost(0);
4593   format %{ %}
4594   interface(REG_INTER);
4595 %}
4596 
4597 // Integer 64 bit Register not Special
4598 operand iRegNNoSp()
4599 %{
4600   constraint(ALLOC_IN_RC(no_special_reg32));
4601   match(RegN);
4602   op_cost(0);
4603   format %{ %}
4604   interface(REG_INTER);
4605 %}
4606 
4607 // heap base register -- used for encoding immN0
4608 
4609 operand iRegIHeapbase()
4610 %{
4611   constraint(ALLOC_IN_RC(heapbase_reg));
4612   match(RegI);
4613   op_cost(0);
4614   format %{ %}
4615   interface(REG_INTER);
4616 %}
4617 
4618 // Float Register
4619 // Float register operands
4620 operand vRegF()
4621 %{
4622   constraint(ALLOC_IN_RC(float_reg));
4623   match(RegF);
4624 
4625   op_cost(0);
4626   format %{ %}
4627   interface(REG_INTER);
4628 %}
4629 
4630 // Double Register
4631 // Double register operands
4632 operand vRegD()
4633 %{
4634   constraint(ALLOC_IN_RC(double_reg));
4635   match(RegD);
4636 
4637   op_cost(0);
4638   format %{ %}
4639   interface(REG_INTER);
4640 %}
4641 
4642 operand vecD()
4643 %{
4644   constraint(ALLOC_IN_RC(vectord_reg));
4645   match(VecD);
4646 
4647   op_cost(0);
4648   format %{ %}
4649   interface(REG_INTER);
4650 %}
4651 
4652 operand vecX()
4653 %{
4654   constraint(ALLOC_IN_RC(vectorx_reg));
4655   match(VecX);
4656 
4657   op_cost(0);
4658   format %{ %}
4659   interface(REG_INTER);
4660 %}
4661 
4662 operand vRegD_V0()
4663 %{
4664   constraint(ALLOC_IN_RC(v0_reg));
4665   match(RegD);
4666   op_cost(0);
4667   format %{ %}
4668   interface(REG_INTER);
4669 %}
4670 
4671 operand vRegD_V1()
4672 %{
4673   constraint(ALLOC_IN_RC(v1_reg));
4674   match(RegD);
4675   op_cost(0);
4676   format %{ %}
4677   interface(REG_INTER);
4678 %}
4679 
4680 operand vRegD_V2()
4681 %{
4682   constraint(ALLOC_IN_RC(v2_reg));
4683   match(RegD);
4684   op_cost(0);
4685   format %{ %}
4686   interface(REG_INTER);
4687 %}
4688 
4689 operand vRegD_V3()
4690 %{
4691   constraint(ALLOC_IN_RC(v3_reg));
4692   match(RegD);
4693   op_cost(0);
4694   format %{ %}
4695   interface(REG_INTER);
4696 %}
4697 
4698 // Flags register, used as output of signed compare instructions
4699 
4700 // note that on AArch64 we also use this register as the output for
4701 // for floating point compare instructions (CmpF CmpD). this ensures
4702 // that ordered inequality tests use GT, GE, LT or LE none of which
4703 // pass through cases where the result is unordered i.e. one or both
4704 // inputs to the compare is a NaN. this means that the ideal code can
4705 // replace e.g. a GT with an LE and not end up capturing the NaN case
4706 // (where the comparison should always fail). EQ and NE tests are
4707 // always generated in ideal code so that unordered folds into the NE
4708 // case, matching the behaviour of AArch64 NE.
4709 //
4710 // This differs from x86 where the outputs of FP compares use a
4711 // special FP flags registers and where compares based on this
4712 // register are distinguished into ordered inequalities (cmpOpUCF) and
4713 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
4714 // to explicitly handle the unordered case in branches. x86 also has
4715 // to include extra CMoveX rules to accept a cmpOpUCF input.
4716 
4717 operand rFlagsReg()
4718 %{
4719   constraint(ALLOC_IN_RC(int_flags));
4720   match(RegFlags);
4721 
4722   op_cost(0);
4723   format %{ "RFLAGS" %}
4724   interface(REG_INTER);
4725 %}
4726 
4727 // Flags register, used as output of unsigned compare instructions
4728 operand rFlagsRegU()
4729 %{
4730   constraint(ALLOC_IN_RC(int_flags));
4731   match(RegFlags);
4732 
4733   op_cost(0);
4734   format %{ "RFLAGSU" %}
4735   interface(REG_INTER);
4736 %}
4737 
4738 // Special Registers
4739 
4740 // Method Register
4741 operand inline_cache_RegP(iRegP reg)
4742 %{
4743   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
4744   match(reg);
4745   match(iRegPNoSp);
4746   op_cost(0);
4747   format %{ %}
4748   interface(REG_INTER);
4749 %}
4750 
4751 operand interpreter_method_oop_RegP(iRegP reg)
4752 %{
4753   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
4754   match(reg);
4755   match(iRegPNoSp);
4756   op_cost(0);
4757   format %{ %}
4758   interface(REG_INTER);
4759 %}
4760 
4761 // Thread Register
4762 operand thread_RegP(iRegP reg)
4763 %{
4764   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
4765   match(reg);
4766   op_cost(0);
4767   format %{ %}
4768   interface(REG_INTER);
4769 %}
4770 
4771 operand lr_RegP(iRegP reg)
4772 %{
4773   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
4774   match(reg);
4775   op_cost(0);
4776   format %{ %}
4777   interface(REG_INTER);
4778 %}
4779 
4780 //----------Memory Operands----------------------------------------------------
4781 
4782 operand indirect(iRegP reg)
4783 %{
4784   constraint(ALLOC_IN_RC(ptr_reg));
4785   match(reg);
4786   op_cost(0);
4787   format %{ "[$reg]" %}
4788   interface(MEMORY_INTER) %{
4789     base($reg);
4790     index(0xffffffff);
4791     scale(0x0);
4792     disp(0x0);
4793   %}
4794 %}
4795 
4796 operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
4797 %{
4798   predicate(size_fits_all_mem_uses(n->as_AddP(),
4799                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
4800   constraint(ALLOC_IN_RC(ptr_reg));
4801   match(AddP (AddP reg (LShiftL lreg scale)) off);
4802   op_cost(INSN_COST);
4803   format %{ "$reg, $lreg lsl($scale), $off" %}
4804   interface(MEMORY_INTER) %{
4805     base($reg);
4806     index($lreg);
4807     scale($scale);
4808     disp($off);
4809   %}
4810 %}
4811 
4812 operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
4813 %{
4814   predicate(size_fits_all_mem_uses(n->as_AddP(),
4815                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
4816   constraint(ALLOC_IN_RC(ptr_reg));
4817   match(AddP (AddP reg (LShiftL lreg scale)) off);
4818   op_cost(INSN_COST);
4819   format %{ "$reg, $lreg lsl($scale), $off" %}
4820   interface(MEMORY_INTER) %{
4821     base($reg);
4822     index($lreg);
4823     scale($scale);
4824     disp($off);
4825   %}
4826 %}
4827 
4828 operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
4829 %{
4830   constraint(ALLOC_IN_RC(ptr_reg));
4831   match(AddP (AddP reg (ConvI2L ireg)) off);
4832   op_cost(INSN_COST);
4833   format %{ "$reg, $ireg, $off I2L" %}
4834   interface(MEMORY_INTER) %{
4835     base($reg);
4836     index($ireg);
4837     scale(0x0);
4838     disp($off);
4839   %}
4840 %}
4841 
4842 operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
4843 %{
4844   predicate(size_fits_all_mem_uses(n->as_AddP(),
4845                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
4846   constraint(ALLOC_IN_RC(ptr_reg));
4847   match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
4848   op_cost(INSN_COST);
4849   format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
4850   interface(MEMORY_INTER) %{
4851     base($reg);
4852     index($ireg);
4853     scale($scale);
4854     disp($off);
4855   %}
4856 %}
4857 
4858 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
4859 %{
4860   predicate(size_fits_all_mem_uses(n->as_AddP(),
4861                                    n->in(AddPNode::Offset)->in(2)->get_int()));
4862   constraint(ALLOC_IN_RC(ptr_reg));
4863   match(AddP reg (LShiftL (ConvI2L ireg) scale));
4864   op_cost(0);
4865   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
4866   interface(MEMORY_INTER) %{
4867     base($reg);
4868     index($ireg);
4869     scale($scale);
4870     disp(0x0);
4871   %}
4872 %}
4873 
4874 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
4875 %{
4876   predicate(size_fits_all_mem_uses(n->as_AddP(),
4877                                    n->in(AddPNode::Offset)->in(2)->get_int()));
4878   constraint(ALLOC_IN_RC(ptr_reg));
4879   match(AddP reg (LShiftL lreg scale));
4880   op_cost(0);
4881   format %{ "$reg, $lreg lsl($scale)" %}
4882   interface(MEMORY_INTER) %{
4883     base($reg);
4884     index($lreg);
4885     scale($scale);
4886     disp(0x0);
4887   %}
4888 %}
4889 
4890 operand indIndex(iRegP reg, iRegL lreg)
4891 %{
4892   constraint(ALLOC_IN_RC(ptr_reg));
4893   match(AddP reg lreg);
4894   op_cost(0);
4895   format %{ "$reg, $lreg" %}
4896   interface(MEMORY_INTER) %{
4897     base($reg);
4898     index($lreg);
4899     scale(0x0);
4900     disp(0x0);
4901   %}
4902 %}
4903 
4904 operand indOffI(iRegP reg, immIOffset off)
4905 %{
4906   constraint(ALLOC_IN_RC(ptr_reg));
4907   match(AddP reg off);
4908   op_cost(0);
4909   format %{ "[$reg, $off]" %}
4910   interface(MEMORY_INTER) %{
4911     base($reg);
4912     index(0xffffffff);
4913     scale(0x0);
4914     disp($off);
4915   %}
4916 %}
4917 
4918 operand indOffI4(iRegP reg, immIOffset4 off)
4919 %{
4920   constraint(ALLOC_IN_RC(ptr_reg));
4921   match(AddP reg off);
4922   op_cost(0);
4923   format %{ "[$reg, $off]" %}
4924   interface(MEMORY_INTER) %{
4925     base($reg);
4926     index(0xffffffff);
4927     scale(0x0);
4928     disp($off);
4929   %}
4930 %}
4931 
4932 operand indOffI8(iRegP reg, immIOffset8 off)
4933 %{
4934   constraint(ALLOC_IN_RC(ptr_reg));
4935   match(AddP reg off);
4936   op_cost(0);
4937   format %{ "[$reg, $off]" %}
4938   interface(MEMORY_INTER) %{
4939     base($reg);
4940     index(0xffffffff);
4941     scale(0x0);
4942     disp($off);
4943   %}
4944 %}
4945 
4946 operand indOffI16(iRegP reg, immIOffset16 off)
4947 %{
4948   constraint(ALLOC_IN_RC(ptr_reg));
4949   match(AddP reg off);
4950   op_cost(0);
4951   format %{ "[$reg, $off]" %}
4952   interface(MEMORY_INTER) %{
4953     base($reg);
4954     index(0xffffffff);
4955     scale(0x0);
4956     disp($off);
4957   %}
4958 %}
4959 
4960 operand indOffL(iRegP reg, immLoffset off)
4961 %{
4962   constraint(ALLOC_IN_RC(ptr_reg));
4963   match(AddP reg off);
4964   op_cost(0);
4965   format %{ "[$reg, $off]" %}
4966   interface(MEMORY_INTER) %{
4967     base($reg);
4968     index(0xffffffff);
4969     scale(0x0);
4970     disp($off);
4971   %}
4972 %}
4973 
4974 operand indOffL4(iRegP reg, immLoffset4 off)
4975 %{
4976   constraint(ALLOC_IN_RC(ptr_reg));
4977   match(AddP reg off);
4978   op_cost(0);
4979   format %{ "[$reg, $off]" %}
4980   interface(MEMORY_INTER) %{
4981     base($reg);
4982     index(0xffffffff);
4983     scale(0x0);
4984     disp($off);
4985   %}
4986 %}
4987 
4988 operand indOffL8(iRegP reg, immLoffset8 off)
4989 %{
4990   constraint(ALLOC_IN_RC(ptr_reg));
4991   match(AddP reg off);
4992   op_cost(0);
4993   format %{ "[$reg, $off]" %}
4994   interface(MEMORY_INTER) %{
4995     base($reg);
4996     index(0xffffffff);
4997     scale(0x0);
4998     disp($off);
4999   %}
5000 %}
5001 
5002 operand indOffL16(iRegP reg, immLoffset16 off)
5003 %{
5004   constraint(ALLOC_IN_RC(ptr_reg));
5005   match(AddP reg off);
5006   op_cost(0);
5007   format %{ "[$reg, $off]" %}
5008   interface(MEMORY_INTER) %{
5009     base($reg);
5010     index(0xffffffff);
5011     scale(0x0);
5012     disp($off);
5013   %}
5014 %}
5015 
5016 operand indirectN(iRegN reg)
5017 %{
5018   predicate(Universe::narrow_oop_shift() == 0);
5019   constraint(ALLOC_IN_RC(ptr_reg));
5020   match(DecodeN reg);
5021   op_cost(0);
5022   format %{ "[$reg]\t# narrow" %}
5023   interface(MEMORY_INTER) %{
5024     base($reg);
5025     index(0xffffffff);
5026     scale(0x0);
5027     disp(0x0);
5028   %}
5029 %}
5030 
5031 operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
5032 %{
5033   predicate(Universe::narrow_oop_shift() == 0 &&
5034             size_fits_all_mem_uses(n->as_AddP(),
5035                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
5036   constraint(ALLOC_IN_RC(ptr_reg));
5037   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5038   op_cost(0);
5039   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5040   interface(MEMORY_INTER) %{
5041     base($reg);
5042     index($lreg);
5043     scale($scale);
5044     disp($off);
5045   %}
5046 %}
5047 
5048 operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
5049 %{
5050   predicate(Universe::narrow_oop_shift() == 0 &&
5051             size_fits_all_mem_uses(n->as_AddP(),
5052                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
5053   constraint(ALLOC_IN_RC(ptr_reg));
5054   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5055   op_cost(INSN_COST);
5056   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5057   interface(MEMORY_INTER) %{
5058     base($reg);
5059     index($lreg);
5060     scale($scale);
5061     disp($off);
5062   %}
5063 %}
5064 
5065 operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
5066 %{
5067   predicate(Universe::narrow_oop_shift() == 0);
5068   constraint(ALLOC_IN_RC(ptr_reg));
5069   match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
5070   op_cost(INSN_COST);
5071   format %{ "$reg, $ireg, $off I2L\t# narrow" %}
5072   interface(MEMORY_INTER) %{
5073     base($reg);
5074     index($ireg);
5075     scale(0x0);
5076     disp($off);
5077   %}
5078 %}
5079 
5080 operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
5081 %{
5082   predicate(Universe::narrow_oop_shift() == 0 &&
5083             size_fits_all_mem_uses(n->as_AddP(),
5084                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
5085   constraint(ALLOC_IN_RC(ptr_reg));
5086   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
5087   op_cost(INSN_COST);
5088   format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
5089   interface(MEMORY_INTER) %{
5090     base($reg);
5091     index($ireg);
5092     scale($scale);
5093     disp($off);
5094   %}
5095 %}
5096 
5097 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
5098 %{
5099   predicate(Universe::narrow_oop_shift() == 0 &&
5100             size_fits_all_mem_uses(n->as_AddP(),
5101                                    n->in(AddPNode::Offset)->in(2)->get_int()));
5102   constraint(ALLOC_IN_RC(ptr_reg));
5103   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
5104   op_cost(0);
5105   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5106   interface(MEMORY_INTER) %{
5107     base($reg);
5108     index($ireg);
5109     scale($scale);
5110     disp(0x0);
5111   %}
5112 %}
5113 
5114 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5115 %{
5116   predicate(Universe::narrow_oop_shift() == 0 &&
5117             size_fits_all_mem_uses(n->as_AddP(),
5118                                    n->in(AddPNode::Offset)->in(2)->get_int()));
5119   constraint(ALLOC_IN_RC(ptr_reg));
5120   match(AddP (DecodeN reg) (LShiftL lreg scale));
5121   op_cost(0);
5122   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5123   interface(MEMORY_INTER) %{
5124     base($reg);
5125     index($lreg);
5126     scale($scale);
5127     disp(0x0);
5128   %}
5129 %}
5130 
5131 operand indIndexN(iRegN reg, iRegL lreg)
5132 %{
5133   predicate(Universe::narrow_oop_shift() == 0);
5134   constraint(ALLOC_IN_RC(ptr_reg));
5135   match(AddP (DecodeN reg) lreg);
5136   op_cost(0);
5137   format %{ "$reg, $lreg\t# narrow" %}
5138   interface(MEMORY_INTER) %{
5139     base($reg);
5140     index($lreg);
5141     scale(0x0);
5142     disp(0x0);
5143   %}
5144 %}
5145 
5146 operand indOffIN(iRegN reg, immIOffset off)
5147 %{
5148   predicate(Universe::narrow_oop_shift() == 0);
5149   constraint(ALLOC_IN_RC(ptr_reg));
5150   match(AddP (DecodeN reg) off);
5151   op_cost(0);
5152   format %{ "[$reg, $off]\t# narrow" %}
5153   interface(MEMORY_INTER) %{
5154     base($reg);
5155     index(0xffffffff);
5156     scale(0x0);
5157     disp($off);
5158   %}
5159 %}
5160 
5161 operand indOffLN(iRegN reg, immLoffset off)
5162 %{
5163   predicate(Universe::narrow_oop_shift() == 0);
5164   constraint(ALLOC_IN_RC(ptr_reg));
5165   match(AddP (DecodeN reg) off);
5166   op_cost(0);
5167   format %{ "[$reg, $off]\t# narrow" %}
5168   interface(MEMORY_INTER) %{
5169     base($reg);
5170     index(0xffffffff);
5171     scale(0x0);
5172     disp($off);
5173   %}
5174 %}
5175 
5176 
5177 
5178 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5179 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5180 %{
5181   constraint(ALLOC_IN_RC(ptr_reg));
5182   match(AddP reg off);
5183   op_cost(0);
5184   format %{ "[$reg, $off]" %}
5185   interface(MEMORY_INTER) %{
5186     base($reg);
5187     index(0xffffffff);
5188     scale(0x0);
5189     disp($off);
5190   %}
5191 %}
5192 
5193 //----------Special Memory Operands--------------------------------------------
5194 // Stack Slot Operand - This operand is used for loading and storing temporary
5195 //                      values on the stack where a match requires a value to
5196 //                      flow through memory.
5197 operand stackSlotP(sRegP reg)
5198 %{
5199   constraint(ALLOC_IN_RC(stack_slots));
5200   op_cost(100);
5201   // No match rule because this operand is only generated in matching
5202   // match(RegP);
5203   format %{ "[$reg]" %}
5204   interface(MEMORY_INTER) %{
5205     base(0x1e);  // RSP
5206     index(0x0);  // No Index
5207     scale(0x0);  // No Scale
5208     disp($reg);  // Stack Offset
5209   %}
5210 %}
5211 
5212 operand stackSlotI(sRegI reg)
5213 %{
5214   constraint(ALLOC_IN_RC(stack_slots));
5215   // No match rule because this operand is only generated in matching
5216   // match(RegI);
5217   format %{ "[$reg]" %}
5218   interface(MEMORY_INTER) %{
5219     base(0x1e);  // RSP
5220     index(0x0);  // No Index
5221     scale(0x0);  // No Scale
5222     disp($reg);  // Stack Offset
5223   %}
5224 %}
5225 
5226 operand stackSlotF(sRegF reg)
5227 %{
5228   constraint(ALLOC_IN_RC(stack_slots));
5229   // No match rule because this operand is only generated in matching
5230   // match(RegF);
5231   format %{ "[$reg]" %}
5232   interface(MEMORY_INTER) %{
5233     base(0x1e);  // RSP
5234     index(0x0);  // No Index
5235     scale(0x0);  // No Scale
5236     disp($reg);  // Stack Offset
5237   %}
5238 %}
5239 
5240 operand stackSlotD(sRegD reg)
5241 %{
5242   constraint(ALLOC_IN_RC(stack_slots));
5243   // No match rule because this operand is only generated in matching
5244   // match(RegD);
5245   format %{ "[$reg]" %}
5246   interface(MEMORY_INTER) %{
5247     base(0x1e);  // RSP
5248     index(0x0);  // No Index
5249     scale(0x0);  // No Scale
5250     disp($reg);  // Stack Offset
5251   %}
5252 %}
5253 
5254 operand stackSlotL(sRegL reg)
5255 %{
5256   constraint(ALLOC_IN_RC(stack_slots));
5257   // No match rule because this operand is only generated in matching
5258   // match(RegL);
5259   format %{ "[$reg]" %}
5260   interface(MEMORY_INTER) %{
5261     base(0x1e);  // RSP
5262     index(0x0);  // No Index
5263     scale(0x0);  // No Scale
5264     disp($reg);  // Stack Offset
5265   %}
5266 %}
5267 
5268 // Operands for expressing Control Flow
5269 // NOTE: Label is a predefined operand which should not be redefined in
5270 //       the AD file. It is generically handled within the ADLC.
5271 
5272 //----------Conditional Branch Operands----------------------------------------
5273 // Comparison Op  - This is the operation of the comparison, and is limited to
5274 //                  the following set of codes:
5275 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5276 //
5277 // Other attributes of the comparison, such as unsignedness, are specified
5278 // by the comparison instruction that sets a condition code flags register.
5279 // That result is represented by a flags operand whose subtype is appropriate
5280 // to the unsignedness (etc.) of the comparison.
5281 //
5282 // Later, the instruction which matches both the Comparison Op (a Bool) and
5283 // the flags (produced by the Cmp) specifies the coding of the comparison op
5284 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5285 
5286 // used for signed integral comparisons and fp comparisons
5287 
5288 operand cmpOp()
5289 %{
5290   match(Bool);
5291 
5292   format %{ "" %}
5293   interface(COND_INTER) %{
5294     equal(0x0, "eq");
5295     not_equal(0x1, "ne");
5296     less(0xb, "lt");
5297     greater_equal(0xa, "ge");
5298     less_equal(0xd, "le");
5299     greater(0xc, "gt");
5300     overflow(0x6, "vs");
5301     no_overflow(0x7, "vc");
5302   %}
5303 %}
5304 
5305 // used for unsigned integral comparisons
5306 
5307 operand cmpOpU()
5308 %{
5309   match(Bool);
5310 
5311   format %{ "" %}
5312   interface(COND_INTER) %{
5313     equal(0x0, "eq");
5314     not_equal(0x1, "ne");
5315     less(0x3, "lo");
5316     greater_equal(0x2, "hs");
5317     less_equal(0x9, "ls");
5318     greater(0x8, "hi");
5319     overflow(0x6, "vs");
5320     no_overflow(0x7, "vc");
5321   %}
5322 %}
5323 
5324 // Special operand allowing long args to int ops to be truncated for free
5325 
5326 operand iRegL2I(iRegL reg) %{
5327 
5328   op_cost(0);
5329 
5330   match(ConvL2I reg);
5331 
5332   format %{ "l2i($reg)" %}
5333 
5334   interface(REG_INTER)
5335 %}
5336 
5337 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
5338 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
5339 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
5340 
5341 //----------OPERAND CLASSES----------------------------------------------------
5342 // Operand Classes are groups of operands that are used as to simplify
5343 // instruction definitions by not requiring the AD writer to specify
5344 // separate instructions for every form of operand when the
5345 // instruction accepts multiple operand types with the same basic
5346 // encoding and format. The classic case of this is memory operands.
5347 
5348 // memory is used to define read/write location for load/store
5349 // instruction defs. we can turn a memory op into an Address
5350 
5351 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
5352                indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
5353  
5354  // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5355 
5356 
5357 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5358 // operations. it allows the src to be either an iRegI or a (ConvL2I
5359 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5360 // can be elided because the 32-bit instruction will just employ the
5361 // lower 32 bits anyway.
5362 //
5363 // n.b. this does not elide all L2I conversions. if the truncated
5364 // value is consumed by more than one operation then the ConvL2I
5365 // cannot be bundled into the consuming nodes so an l2i gets planted
5366 // (actually a movw $dst $src) and the downstream instructions consume
5367 // the result of the l2i as an iRegI input. That's a shame since the
5368 // movw is actually redundant but its not too costly.
5369 
5370 opclass iRegIorL2I(iRegI, iRegL2I);
5371 
5372 //----------PIPELINE-----------------------------------------------------------
5373 // Rules which define the behavior of the target architectures pipeline.
5374 
5375 // For specific pipelines, eg A53, define the stages of that pipeline
5376 //pipe_desc(ISS, EX1, EX2, WR);
5377 #define ISS S0
5378 #define EX1 S1
5379 #define EX2 S2
5380 #define WR  S3
5381 
5382 // Integer ALU reg operation
5383 pipeline %{
5384 
5385 attributes %{
5386   // ARM instructions are of fixed length
5387   fixed_size_instructions;        // Fixed size instructions TODO does
5388   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5389   // ARM instructions come in 32-bit word units
5390   instruction_unit_size = 4;         // An instruction is 4 bytes long
5391   instruction_fetch_unit_size = 64;  // The processor fetches one line
5392   instruction_fetch_units = 1;       // of 64 bytes
5393 
5394   // List of nop instructions
5395   nops( MachNop );
5396 %}
5397 
5398 // We don't use an actual pipeline model so don't care about resources
5399 // or description. we do use pipeline classes to introduce fixed
5400 // latencies
5401 
5402 //----------RESOURCES----------------------------------------------------------
5403 // Resources are the functional units available to the machine
5404 
5405 resources( INS0, INS1, INS01 = INS0 | INS1,
5406            ALU0, ALU1, ALU = ALU0 | ALU1,
5407            MAC,
5408            DIV,
5409            BRANCH,
5410            LDST,
5411            NEON_FP);
5412 
5413 //----------PIPELINE DESCRIPTION-----------------------------------------------
5414 // Pipeline Description specifies the stages in the machine's pipeline
5415 
5416 // Define the pipeline as a generic 6 stage pipeline
5417 pipe_desc(S0, S1, S2, S3, S4, S5);
5418 
5419 //----------PIPELINE CLASSES---------------------------------------------------
5420 // Pipeline Classes describe the stages in which input and output are
5421 // referenced by the hardware pipeline.
5422 
5423 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
5424 %{
5425   single_instruction;
5426   src1   : S1(read);
5427   src2   : S2(read);
5428   dst    : S5(write);
5429   INS01  : ISS;
5430   NEON_FP : S5;
5431 %}
5432 
5433 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
5434 %{
5435   single_instruction;
5436   src1   : S1(read);
5437   src2   : S2(read);
5438   dst    : S5(write);
5439   INS01  : ISS;
5440   NEON_FP : S5;
5441 %}
5442 
5443 pipe_class fp_uop_s(vRegF dst, vRegF src)
5444 %{
5445   single_instruction;
5446   src    : S1(read);
5447   dst    : S5(write);
5448   INS01  : ISS;
5449   NEON_FP : S5;
5450 %}
5451 
5452 pipe_class fp_uop_d(vRegD dst, vRegD src)
5453 %{
5454   single_instruction;
5455   src    : S1(read);
5456   dst    : S5(write);
5457   INS01  : ISS;
5458   NEON_FP : S5;
5459 %}
5460 
5461 pipe_class fp_d2f(vRegF dst, vRegD src)
5462 %{
5463   single_instruction;
5464   src    : S1(read);
5465   dst    : S5(write);
5466   INS01  : ISS;
5467   NEON_FP : S5;
5468 %}
5469 
5470 pipe_class fp_f2d(vRegD dst, vRegF src)
5471 %{
5472   single_instruction;
5473   src    : S1(read);
5474   dst    : S5(write);
5475   INS01  : ISS;
5476   NEON_FP : S5;
5477 %}
5478 
5479 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
5480 %{
5481   single_instruction;
5482   src    : S1(read);
5483   dst    : S5(write);
5484   INS01  : ISS;
5485   NEON_FP : S5;
5486 %}
5487 
5488 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
5489 %{
5490   single_instruction;
5491   src    : S1(read);
5492   dst    : S5(write);
5493   INS01  : ISS;
5494   NEON_FP : S5;
5495 %}
5496 
5497 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
5498 %{
5499   single_instruction;
5500   src    : S1(read);
5501   dst    : S5(write);
5502   INS01  : ISS;
5503   NEON_FP : S5;
5504 %}
5505 
5506 pipe_class fp_l2f(vRegF dst, iRegL src)
5507 %{
5508   single_instruction;
5509   src    : S1(read);
5510   dst    : S5(write);
5511   INS01  : ISS;
5512   NEON_FP : S5;
5513 %}
5514 
5515 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
5516 %{
5517   single_instruction;
5518   src    : S1(read);
5519   dst    : S5(write);
5520   INS01  : ISS;
5521   NEON_FP : S5;
5522 %}
5523 
5524 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
5525 %{
5526   single_instruction;
5527   src    : S1(read);
5528   dst    : S5(write);
5529   INS01  : ISS;
5530   NEON_FP : S5;
5531 %}
5532 
5533 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
5534 %{
5535   single_instruction;
5536   src    : S1(read);
5537   dst    : S5(write);
5538   INS01  : ISS;
5539   NEON_FP : S5;
5540 %}
5541 
5542 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
5543 %{
5544   single_instruction;
5545   src    : S1(read);
5546   dst    : S5(write);
5547   INS01  : ISS;
5548   NEON_FP : S5;
5549 %}
5550 
5551 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
5552 %{
5553   single_instruction;
5554   src1   : S1(read);
5555   src2   : S2(read);
5556   dst    : S5(write);
5557   INS0   : ISS;
5558   NEON_FP : S5;
5559 %}
5560 
5561 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
5562 %{
5563   single_instruction;
5564   src1   : S1(read);
5565   src2   : S2(read);
5566   dst    : S5(write);
5567   INS0   : ISS;
5568   NEON_FP : S5;
5569 %}
5570 
5571 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
5572 %{
5573   single_instruction;
5574   cr     : S1(read);
5575   src1   : S1(read);
5576   src2   : S1(read);
5577   dst    : S3(write);
5578   INS01  : ISS;
5579   NEON_FP : S3;
5580 %}
5581 
5582 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
5583 %{
5584   single_instruction;
5585   cr     : S1(read);
5586   src1   : S1(read);
5587   src2   : S1(read);
5588   dst    : S3(write);
5589   INS01  : ISS;
5590   NEON_FP : S3;
5591 %}
5592 
5593 pipe_class fp_imm_s(vRegF dst)
5594 %{
5595   single_instruction;
5596   dst    : S3(write);
5597   INS01  : ISS;
5598   NEON_FP : S3;
5599 %}
5600 
5601 pipe_class fp_imm_d(vRegD dst)
5602 %{
5603   single_instruction;
5604   dst    : S3(write);
5605   INS01  : ISS;
5606   NEON_FP : S3;
5607 %}
5608 
5609 pipe_class fp_load_constant_s(vRegF dst)
5610 %{
5611   single_instruction;
5612   dst    : S4(write);
5613   INS01  : ISS;
5614   NEON_FP : S4;
5615 %}
5616 
5617 pipe_class fp_load_constant_d(vRegD dst)
5618 %{
5619   single_instruction;
5620   dst    : S4(write);
5621   INS01  : ISS;
5622   NEON_FP : S4;
5623 %}
5624 
5625 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
5626 %{
5627   single_instruction;
5628   dst    : S5(write);
5629   src1   : S1(read);
5630   src2   : S1(read);
5631   INS01  : ISS;
5632   NEON_FP : S5;
5633 %}
5634 
5635 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
5636 %{
5637   single_instruction;
5638   dst    : S5(write);
5639   src1   : S1(read);
5640   src2   : S1(read);
5641   INS0   : ISS;
5642   NEON_FP : S5;
5643 %}
5644 
5645 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
5646 %{
5647   single_instruction;
5648   dst    : S5(write);
5649   src1   : S1(read);
5650   src2   : S1(read);
5651   dst    : S1(read);
5652   INS01  : ISS;
5653   NEON_FP : S5;
5654 %}
5655 
5656 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
5657 %{
5658   single_instruction;
5659   dst    : S5(write);
5660   src1   : S1(read);
5661   src2   : S1(read);
5662   dst    : S1(read);
5663   INS0   : ISS;
5664   NEON_FP : S5;
5665 %}
5666 
5667 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
5668 %{
5669   single_instruction;
5670   dst    : S4(write);
5671   src1   : S2(read);
5672   src2   : S2(read);
5673   INS01  : ISS;
5674   NEON_FP : S4;
5675 %}
5676 
5677 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
5678 %{
5679   single_instruction;
5680   dst    : S4(write);
5681   src1   : S2(read);
5682   src2   : S2(read);
5683   INS0   : ISS;
5684   NEON_FP : S4;
5685 %}
5686 
5687 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
5688 %{
5689   single_instruction;
5690   dst    : S3(write);
5691   src1   : S2(read);
5692   src2   : S2(read);
5693   INS01  : ISS;
5694   NEON_FP : S3;
5695 %}
5696 
5697 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
5698 %{
5699   single_instruction;
5700   dst    : S3(write);
5701   src1   : S2(read);
5702   src2   : S2(read);
5703   INS0   : ISS;
5704   NEON_FP : S3;
5705 %}
5706 
5707 pipe_class vshift64(vecD dst, vecD src, vecX shift)
5708 %{
5709   single_instruction;
5710   dst    : S3(write);
5711   src    : S1(read);
5712   shift  : S1(read);
5713   INS01  : ISS;
5714   NEON_FP : S3;
5715 %}
5716 
5717 pipe_class vshift128(vecX dst, vecX src, vecX shift)
5718 %{
5719   single_instruction;
5720   dst    : S3(write);
5721   src    : S1(read);
5722   shift  : S1(read);
5723   INS0   : ISS;
5724   NEON_FP : S3;
5725 %}
5726 
5727 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
5728 %{
5729   single_instruction;
5730   dst    : S3(write);
5731   src    : S1(read);
5732   INS01  : ISS;
5733   NEON_FP : S3;
5734 %}
5735 
5736 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
5737 %{
5738   single_instruction;
5739   dst    : S3(write);
5740   src    : S1(read);
5741   INS0   : ISS;
5742   NEON_FP : S3;
5743 %}
5744 
5745 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
5746 %{
5747   single_instruction;
5748   dst    : S5(write);
5749   src1   : S1(read);
5750   src2   : S1(read);
5751   INS01  : ISS;
5752   NEON_FP : S5;
5753 %}
5754 
5755 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
5756 %{
5757   single_instruction;
5758   dst    : S5(write);
5759   src1   : S1(read);
5760   src2   : S1(read);
5761   INS0   : ISS;
5762   NEON_FP : S5;
5763 %}
5764 
5765 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
5766 %{
5767   single_instruction;
5768   dst    : S5(write);
5769   src1   : S1(read);
5770   src2   : S1(read);
5771   INS0   : ISS;
5772   NEON_FP : S5;
5773 %}
5774 
5775 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
5776 %{
5777   single_instruction;
5778   dst    : S5(write);
5779   src1   : S1(read);
5780   src2   : S1(read);
5781   INS0   : ISS;
5782   NEON_FP : S5;
5783 %}
5784 
5785 pipe_class vsqrt_fp128(vecX dst, vecX src)
5786 %{
5787   single_instruction;
5788   dst    : S5(write);
5789   src    : S1(read);
5790   INS0   : ISS;
5791   NEON_FP : S5;
5792 %}
5793 
5794 pipe_class vunop_fp64(vecD dst, vecD src)
5795 %{
5796   single_instruction;
5797   dst    : S5(write);
5798   src    : S1(read);
5799   INS01  : ISS;
5800   NEON_FP : S5;
5801 %}
5802 
5803 pipe_class vunop_fp128(vecX dst, vecX src)
5804 %{
5805   single_instruction;
5806   dst    : S5(write);
5807   src    : S1(read);
5808   INS0   : ISS;
5809   NEON_FP : S5;
5810 %}
5811 
5812 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
5813 %{
5814   single_instruction;
5815   dst    : S3(write);
5816   src    : S1(read);
5817   INS01  : ISS;
5818   NEON_FP : S3;
5819 %}
5820 
5821 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
5822 %{
5823   single_instruction;
5824   dst    : S3(write);
5825   src    : S1(read);
5826   INS01  : ISS;
5827   NEON_FP : S3;
5828 %}
5829 
5830 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
5831 %{
5832   single_instruction;
5833   dst    : S3(write);
5834   src    : S1(read);
5835   INS01  : ISS;
5836   NEON_FP : S3;
5837 %}
5838 
5839 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
5840 %{
5841   single_instruction;
5842   dst    : S3(write);
5843   src    : S1(read);
5844   INS01  : ISS;
5845   NEON_FP : S3;
5846 %}
5847 
5848 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
5849 %{
5850   single_instruction;
5851   dst    : S3(write);
5852   src    : S1(read);
5853   INS01  : ISS;
5854   NEON_FP : S3;
5855 %}
5856 
5857 pipe_class vmovi_reg_imm64(vecD dst)
5858 %{
5859   single_instruction;
5860   dst    : S3(write);
5861   INS01  : ISS;
5862   NEON_FP : S3;
5863 %}
5864 
5865 pipe_class vmovi_reg_imm128(vecX dst)
5866 %{
5867   single_instruction;
5868   dst    : S3(write);
5869   INS0   : ISS;
5870   NEON_FP : S3;
5871 %}
5872 
5873 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
5874 %{
5875   single_instruction;
5876   dst    : S5(write);
5877   mem    : ISS(read);
5878   INS01  : ISS;
5879   NEON_FP : S3;
5880 %}
5881 
5882 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
5883 %{
5884   single_instruction;
5885   dst    : S5(write);
5886   mem    : ISS(read);
5887   INS01  : ISS;
5888   NEON_FP : S3;
5889 %}
5890 
5891 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
5892 %{
5893   single_instruction;
5894   mem    : ISS(read);
5895   src    : S2(read);
5896   INS01  : ISS;
5897   NEON_FP : S3;
5898 %}
5899 
5900 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
5901 %{
5902   single_instruction;
5903   mem    : ISS(read);
5904   src    : S2(read);
5905   INS01  : ISS;
5906   NEON_FP : S3;
5907 %}
5908 
5909 //------- Integer ALU operations --------------------------
5910 
5911 // Integer ALU reg-reg operation
5912 // Operands needed in EX1, result generated in EX2
5913 // Eg.  ADD     x0, x1, x2
5914 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
5915 %{
5916   single_instruction;
5917   dst    : EX2(write);
5918   src1   : EX1(read);
5919   src2   : EX1(read);
5920   INS01  : ISS; // Dual issue as instruction 0 or 1
5921   ALU    : EX2;
5922 %}
5923 
5924 // Integer ALU reg-reg operation with constant shift
5925 // Shifted register must be available in LATE_ISS instead of EX1
5926 // Eg.  ADD     x0, x1, x2, LSL #2
5927 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
5928 %{
5929   single_instruction;
5930   dst    : EX2(write);
5931   src1   : EX1(read);
5932   src2   : ISS(read);
5933   INS01  : ISS;
5934   ALU    : EX2;
5935 %}
5936 
5937 // Integer ALU reg operation with constant shift
5938 // Eg.  LSL     x0, x1, #shift
5939 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
5940 %{
5941   single_instruction;
5942   dst    : EX2(write);
5943   src1   : ISS(read);
5944   INS01  : ISS;
5945   ALU    : EX2;
5946 %}
5947 
5948 // Integer ALU reg-reg operation with variable shift
5949 // Both operands must be available in LATE_ISS instead of EX1
5950 // Result is available in EX1 instead of EX2
5951 // Eg.  LSLV    x0, x1, x2
5952 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
5953 %{
5954   single_instruction;
5955   dst    : EX1(write);
5956   src1   : ISS(read);
5957   src2   : ISS(read);
5958   INS01  : ISS;
5959   ALU    : EX1;
5960 %}
5961 
5962 // Integer ALU reg-reg operation with extract
5963 // As for _vshift above, but result generated in EX2
5964 // Eg.  EXTR    x0, x1, x2, #N
5965 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
5966 %{
5967   single_instruction;
5968   dst    : EX2(write);
5969   src1   : ISS(read);
5970   src2   : ISS(read);
5971   INS1   : ISS; // Can only dual issue as Instruction 1
5972   ALU    : EX1;
5973 %}
5974 
5975 // Integer ALU reg operation
5976 // Eg.  NEG     x0, x1
5977 pipe_class ialu_reg(iRegI dst, iRegI src)
5978 %{
5979   single_instruction;
5980   dst    : EX2(write);
5981   src    : EX1(read);
5982   INS01  : ISS;
5983   ALU    : EX2;
5984 %}
5985 
5986 // Integer ALU reg mmediate operation
5987 // Eg.  ADD     x0, x1, #N
5988 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
5989 %{
5990   single_instruction;
5991   dst    : EX2(write);
5992   src1   : EX1(read);
5993   INS01  : ISS;
5994   ALU    : EX2;
5995 %}
5996 
5997 // Integer ALU immediate operation (no source operands)
5998 // Eg.  MOV     x0, #N
5999 pipe_class ialu_imm(iRegI dst)
6000 %{
6001   single_instruction;
6002   dst    : EX1(write);
6003   INS01  : ISS;
6004   ALU    : EX1;
6005 %}
6006 
6007 //------- Compare operation -------------------------------
6008 
6009 // Compare reg-reg
6010 // Eg.  CMP     x0, x1
6011 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
6012 %{
6013   single_instruction;
6014 //  fixed_latency(16);
6015   cr     : EX2(write);
6016   op1    : EX1(read);
6017   op2    : EX1(read);
6018   INS01  : ISS;
6019   ALU    : EX2;
6020 %}
6021 
6022 // Compare reg-reg
6023 // Eg.  CMP     x0, #N
6024 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6025 %{
6026   single_instruction;
6027 //  fixed_latency(16);
6028   cr     : EX2(write);
6029   op1    : EX1(read);
6030   INS01  : ISS;
6031   ALU    : EX2;
6032 %}
6033 
6034 //------- Conditional instructions ------------------------
6035 
6036 // Conditional no operands
6037 // Eg.  CSINC   x0, zr, zr, <cond>
6038 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6039 %{
6040   single_instruction;
6041   cr     : EX1(read);
6042   dst    : EX2(write);
6043   INS01  : ISS;
6044   ALU    : EX2;
6045 %}
6046 
6047 // Conditional 2 operand
6048 // EG.  CSEL    X0, X1, X2, <cond>
6049 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6050 %{
6051   single_instruction;
6052   cr     : EX1(read);
6053   src1   : EX1(read);
6054   src2   : EX1(read);
6055   dst    : EX2(write);
6056   INS01  : ISS;
6057   ALU    : EX2;
6058 %}
6059 
6060 // Conditional 2 operand
6061 // EG.  CSEL    X0, X1, X2, <cond>
6062 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6063 %{
6064   single_instruction;
6065   cr     : EX1(read);
6066   src    : EX1(read);
6067   dst    : EX2(write);
6068   INS01  : ISS;
6069   ALU    : EX2;
6070 %}
6071 
6072 //------- Multiply pipeline operations --------------------
6073 
6074 // Multiply reg-reg
6075 // Eg.  MUL     w0, w1, w2
6076 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6077 %{
6078   single_instruction;
6079   dst    : WR(write);
6080   src1   : ISS(read);
6081   src2   : ISS(read);
6082   INS01  : ISS;
6083   MAC    : WR;
6084 %}
6085 
6086 // Multiply accumulate
6087 // Eg.  MADD    w0, w1, w2, w3
6088 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6089 %{
6090   single_instruction;
6091   dst    : WR(write);
6092   src1   : ISS(read);
6093   src2   : ISS(read);
6094   src3   : ISS(read);
6095   INS01  : ISS;
6096   MAC    : WR;
6097 %}
6098 
6099 // Eg.  MUL     w0, w1, w2
6100 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6101 %{
6102   single_instruction;
6103   fixed_latency(3); // Maximum latency for 64 bit mul
6104   dst    : WR(write);
6105   src1   : ISS(read);
6106   src2   : ISS(read);
6107   INS01  : ISS;
6108   MAC    : WR;
6109 %}
6110 
6111 // Multiply accumulate
6112 // Eg.  MADD    w0, w1, w2, w3
6113 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6114 %{
6115   single_instruction;
6116   fixed_latency(3); // Maximum latency for 64 bit mul
6117   dst    : WR(write);
6118   src1   : ISS(read);
6119   src2   : ISS(read);
6120   src3   : ISS(read);
6121   INS01  : ISS;
6122   MAC    : WR;
6123 %}
6124 
6125 //------- Divide pipeline operations --------------------
6126 
6127 // Eg.  SDIV    w0, w1, w2
6128 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6129 %{
6130   single_instruction;
6131   fixed_latency(8); // Maximum latency for 32 bit divide
6132   dst    : WR(write);
6133   src1   : ISS(read);
6134   src2   : ISS(read);
6135   INS0   : ISS; // Can only dual issue as instruction 0
6136   DIV    : WR;
6137 %}
6138 
6139 // Eg.  SDIV    x0, x1, x2
6140 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6141 %{
6142   single_instruction;
6143   fixed_latency(16); // Maximum latency for 64 bit divide
6144   dst    : WR(write);
6145   src1   : ISS(read);
6146   src2   : ISS(read);
6147   INS0   : ISS; // Can only dual issue as instruction 0
6148   DIV    : WR;
6149 %}
6150 
6151 //------- Load pipeline operations ------------------------
6152 
6153 // Load - prefetch
6154 // Eg.  PFRM    <mem>
6155 pipe_class iload_prefetch(memory mem)
6156 %{
6157   single_instruction;
6158   mem    : ISS(read);
6159   INS01  : ISS;
6160   LDST   : WR;
6161 %}
6162 
6163 // Load - reg, mem
6164 // Eg.  LDR     x0, <mem>
6165 pipe_class iload_reg_mem(iRegI dst, memory mem)
6166 %{
6167   single_instruction;
6168   dst    : WR(write);
6169   mem    : ISS(read);
6170   INS01  : ISS;
6171   LDST   : WR;
6172 %}
6173 
6174 // Load - reg, reg
6175 // Eg.  LDR     x0, [sp, x1]
6176 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6177 %{
6178   single_instruction;
6179   dst    : WR(write);
6180   src    : ISS(read);
6181   INS01  : ISS;
6182   LDST   : WR;
6183 %}
6184 
6185 //------- Store pipeline operations -----------------------
6186 
6187 // Store - zr, mem
6188 // Eg.  STR     zr, <mem>
6189 pipe_class istore_mem(memory mem)
6190 %{
6191   single_instruction;
6192   mem    : ISS(read);
6193   INS01  : ISS;
6194   LDST   : WR;
6195 %}
6196 
6197 // Store - reg, mem
6198 // Eg.  STR     x0, <mem>
6199 pipe_class istore_reg_mem(iRegI src, memory mem)
6200 %{
6201   single_instruction;
6202   mem    : ISS(read);
6203   src    : EX2(read);
6204   INS01  : ISS;
6205   LDST   : WR;
6206 %}
6207 
6208 // Store - reg, reg
6209 // Eg. STR      x0, [sp, x1]
6210 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6211 %{
6212   single_instruction;
6213   dst    : ISS(read);
6214   src    : EX2(read);
6215   INS01  : ISS;
6216   LDST   : WR;
6217 %}
6218 
6219 //------- Store pipeline operations -----------------------
6220 
6221 // Branch
6222 pipe_class pipe_branch()
6223 %{
6224   single_instruction;
6225   INS01  : ISS;
6226   BRANCH : EX1;
6227 %}
6228 
6229 // Conditional branch
6230 pipe_class pipe_branch_cond(rFlagsReg cr)
6231 %{
6232   single_instruction;
6233   cr     : EX1(read);
6234   INS01  : ISS;
6235   BRANCH : EX1;
6236 %}
6237 
6238 // Compare & Branch
6239 // EG.  CBZ/CBNZ
6240 pipe_class pipe_cmp_branch(iRegI op1)
6241 %{
6242   single_instruction;
6243   op1    : EX1(read);
6244   INS01  : ISS;
6245   BRANCH : EX1;
6246 %}
6247 
6248 //------- Synchronisation operations ----------------------
6249 
6250 // Any operation requiring serialization.
6251 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6252 pipe_class pipe_serial()
6253 %{
6254   single_instruction;
6255   force_serialization;
6256   fixed_latency(16);
6257   INS01  : ISS(2); // Cannot dual issue with any other instruction
6258   LDST   : WR;
6259 %}
6260 
6261 // Generic big/slow expanded idiom - also serialized
6262 pipe_class pipe_slow()
6263 %{
6264   instruction_count(10);
6265   multiple_bundles;
6266   force_serialization;
6267   fixed_latency(16);
6268   INS01  : ISS(2); // Cannot dual issue with any other instruction
6269   LDST   : WR;
6270 %}
6271 
6272 // Empty pipeline class
6273 pipe_class pipe_class_empty()
6274 %{
6275   single_instruction;
6276   fixed_latency(0);
6277 %}
6278 
6279 // Default pipeline class.
6280 pipe_class pipe_class_default()
6281 %{
6282   single_instruction;
6283   fixed_latency(2);
6284 %}
6285 
6286 // Pipeline class for compares.
6287 pipe_class pipe_class_compare()
6288 %{
6289   single_instruction;
6290   fixed_latency(16);
6291 %}
6292 
6293 // Pipeline class for memory operations.
6294 pipe_class pipe_class_memory()
6295 %{
6296   single_instruction;
6297   fixed_latency(16);
6298 %}
6299 
6300 // Pipeline class for call.
6301 pipe_class pipe_class_call()
6302 %{
6303   single_instruction;
6304   fixed_latency(100);
6305 %}
6306 
6307 // Define the class for the Nop node.
6308 define %{
6309    MachNop = pipe_class_empty;
6310 %}
6311 
6312 %}
6313 //----------INSTRUCTIONS-------------------------------------------------------
6314 //
6315 // match      -- States which machine-independent subtree may be replaced
6316 //               by this instruction.
6317 // ins_cost   -- The estimated cost of this instruction is used by instruction
6318 //               selection to identify a minimum cost tree of machine
6319 //               instructions that matches a tree of machine-independent
6320 //               instructions.
6321 // format     -- A string providing the disassembly for this instruction.
6322 //               The value of an instruction's operand may be inserted
6323 //               by referring to it with a '$' prefix.
6324 // opcode     -- Three instruction opcodes may be provided.  These are referred
6325 //               to within an encode class as $primary, $secondary, and $tertiary
6326 //               rrspectively.  The primary opcode is commonly used to
6327 //               indicate the type of machine instruction, while secondary
6328 //               and tertiary are often used for prefix options or addressing
6329 //               modes.
6330 // ins_encode -- A list of encode classes with parameters. The encode class
6331 //               name must have been defined in an 'enc_class' specification
6332 //               in the encode section of the architecture description.
6333 
6334 // ============================================================================
6335 // Memory (Load/Store) Instructions
6336 
6337 // Load Instructions
6338 
6339 // Load Byte (8 bit signed)
6340 instruct loadB(iRegINoSp dst, memory mem)
6341 %{
6342   match(Set dst (LoadB mem));
6343   predicate(!needs_acquiring_load(n));
6344 
6345   ins_cost(4 * INSN_COST);
6346   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6347 
6348   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6349 
6350   ins_pipe(iload_reg_mem);
6351 %}
6352 
6353 // Load Byte (8 bit signed) into long
6354 instruct loadB2L(iRegLNoSp dst, memory mem)
6355 %{
6356   match(Set dst (ConvI2L (LoadB mem)));
6357   predicate(!needs_acquiring_load(n->in(1)));
6358 
6359   ins_cost(4 * INSN_COST);
6360   format %{ "ldrsb  $dst, $mem\t# byte" %}
6361 
6362   ins_encode(aarch64_enc_ldrsb(dst, mem));
6363 
6364   ins_pipe(iload_reg_mem);
6365 %}
6366 
6367 // Load Byte (8 bit unsigned)
6368 instruct loadUB(iRegINoSp dst, memory mem)
6369 %{
6370   match(Set dst (LoadUB mem));
6371   predicate(!needs_acquiring_load(n));
6372 
6373   ins_cost(4 * INSN_COST);
6374   format %{ "ldrbw  $dst, $mem\t# byte" %}
6375 
6376   ins_encode(aarch64_enc_ldrb(dst, mem));
6377 
6378   ins_pipe(iload_reg_mem);
6379 %}
6380 
6381 // Load Byte (8 bit unsigned) into long
6382 instruct loadUB2L(iRegLNoSp dst, memory mem)
6383 %{
6384   match(Set dst (ConvI2L (LoadUB mem)));
6385   predicate(!needs_acquiring_load(n->in(1)));
6386 
6387   ins_cost(4 * INSN_COST);
6388   format %{ "ldrb  $dst, $mem\t# byte" %}
6389 
6390   ins_encode(aarch64_enc_ldrb(dst, mem));
6391 
6392   ins_pipe(iload_reg_mem);
6393 %}
6394 
6395 // Load Short (16 bit signed)
6396 instruct loadS(iRegINoSp dst, memory mem)
6397 %{
6398   match(Set dst (LoadS mem));
6399   predicate(!needs_acquiring_load(n));
6400 
6401   ins_cost(4 * INSN_COST);
6402   format %{ "ldrshw  $dst, $mem\t# short" %}
6403 
6404   ins_encode(aarch64_enc_ldrshw(dst, mem));
6405 
6406   ins_pipe(iload_reg_mem);
6407 %}
6408 
6409 // Load Short (16 bit signed) into long
6410 instruct loadS2L(iRegLNoSp dst, memory mem)
6411 %{
6412   match(Set dst (ConvI2L (LoadS mem)));
6413   predicate(!needs_acquiring_load(n->in(1)));
6414 
6415   ins_cost(4 * INSN_COST);
6416   format %{ "ldrsh  $dst, $mem\t# short" %}
6417 
6418   ins_encode(aarch64_enc_ldrsh(dst, mem));
6419 
6420   ins_pipe(iload_reg_mem);
6421 %}
6422 
6423 // Load Char (16 bit unsigned)
6424 instruct loadUS(iRegINoSp dst, memory mem)
6425 %{
6426   match(Set dst (LoadUS mem));
6427   predicate(!needs_acquiring_load(n));
6428 
6429   ins_cost(4 * INSN_COST);
6430   format %{ "ldrh  $dst, $mem\t# short" %}
6431 
6432   ins_encode(aarch64_enc_ldrh(dst, mem));
6433 
6434   ins_pipe(iload_reg_mem);
6435 %}
6436 
6437 // Load Short/Char (16 bit unsigned) into long
6438 instruct loadUS2L(iRegLNoSp dst, memory mem)
6439 %{
6440   match(Set dst (ConvI2L (LoadUS mem)));
6441   predicate(!needs_acquiring_load(n->in(1)));
6442 
6443   ins_cost(4 * INSN_COST);
6444   format %{ "ldrh  $dst, $mem\t# short" %}
6445 
6446   ins_encode(aarch64_enc_ldrh(dst, mem));
6447 
6448   ins_pipe(iload_reg_mem);
6449 %}
6450 
6451 // Load Integer (32 bit signed)
6452 instruct loadI(iRegINoSp dst, memory mem)
6453 %{
6454   match(Set dst (LoadI mem));
6455   predicate(!needs_acquiring_load(n));
6456 
6457   ins_cost(4 * INSN_COST);
6458   format %{ "ldrw  $dst, $mem\t# int" %}
6459 
6460   ins_encode(aarch64_enc_ldrw(dst, mem));
6461 
6462   ins_pipe(iload_reg_mem);
6463 %}
6464 
6465 // Load Integer (32 bit signed) into long
6466 instruct loadI2L(iRegLNoSp dst, memory mem)
6467 %{
6468   match(Set dst (ConvI2L (LoadI mem)));
6469   predicate(!needs_acquiring_load(n->in(1)));
6470 
6471   ins_cost(4 * INSN_COST);
6472   format %{ "ldrsw  $dst, $mem\t# int" %}
6473 
6474   ins_encode(aarch64_enc_ldrsw(dst, mem));
6475 
6476   ins_pipe(iload_reg_mem);
6477 %}
6478 
6479 // Load Integer (32 bit unsigned) into long
6480 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6481 %{
6482   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6483   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6484 
6485   ins_cost(4 * INSN_COST);
6486   format %{ "ldrw  $dst, $mem\t# int" %}
6487 
6488   ins_encode(aarch64_enc_ldrw(dst, mem));
6489 
6490   ins_pipe(iload_reg_mem);
6491 %}
6492 
6493 // Load Long (64 bit signed)
6494 instruct loadL(iRegLNoSp dst, memory mem)
6495 %{
6496   match(Set dst (LoadL mem));
6497   predicate(!needs_acquiring_load(n));
6498 
6499   ins_cost(4 * INSN_COST);
6500   format %{ "ldr  $dst, $mem\t# int" %}
6501 
6502   ins_encode(aarch64_enc_ldr(dst, mem));
6503 
6504   ins_pipe(iload_reg_mem);
6505 %}
6506 
6507 // Load Range
6508 instruct loadRange(iRegINoSp dst, memory mem)
6509 %{
6510   match(Set dst (LoadRange mem));
6511 
6512   ins_cost(4 * INSN_COST);
6513   format %{ "ldrw  $dst, $mem\t# range" %}
6514 
6515   ins_encode(aarch64_enc_ldrw(dst, mem));
6516 
6517   ins_pipe(iload_reg_mem);
6518 %}
6519 
6520 // Load Pointer
6521 instruct loadP(iRegPNoSp dst, memory mem)
6522 %{
6523   match(Set dst (LoadP mem));
6524   predicate(!needs_acquiring_load(n));
6525 
6526   ins_cost(4 * INSN_COST);
6527   format %{ "ldr  $dst, $mem\t# ptr" %}
6528 
6529   ins_encode(aarch64_enc_ldr(dst, mem));
6530 
6531   ins_pipe(iload_reg_mem);
6532 %}
6533 
6534 // Load Compressed Pointer
6535 instruct loadN(iRegNNoSp dst, memory mem)
6536 %{
6537   match(Set dst (LoadN mem));
6538   predicate(!needs_acquiring_load(n));
6539 
6540   ins_cost(4 * INSN_COST);
6541   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6542 
6543   ins_encode(aarch64_enc_ldrw(dst, mem));
6544 
6545   ins_pipe(iload_reg_mem);
6546 %}
6547 
6548 // Load Klass Pointer
6549 instruct loadKlass(iRegPNoSp dst, memory mem)
6550 %{
6551   match(Set dst (LoadKlass mem));
6552   predicate(!needs_acquiring_load(n));
6553 
6554   ins_cost(4 * INSN_COST);
6555   format %{ "ldr  $dst, $mem\t# class" %}
6556 
6557   ins_encode(aarch64_enc_ldr(dst, mem));
6558 
6559   ins_pipe(iload_reg_mem);
6560 %}
6561 
6562 // Load Narrow Klass Pointer
6563 instruct loadNKlass(iRegNNoSp dst, memory mem)
6564 %{
6565   match(Set dst (LoadNKlass mem));
6566   predicate(!needs_acquiring_load(n));
6567 
6568   ins_cost(4 * INSN_COST);
6569   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6570 
6571   ins_encode(aarch64_enc_ldrw(dst, mem));
6572 
6573   ins_pipe(iload_reg_mem);
6574 %}
6575 
6576 // Load Float
6577 instruct loadF(vRegF dst, memory mem)
6578 %{
6579   match(Set dst (LoadF mem));
6580   predicate(!needs_acquiring_load(n));
6581 
6582   ins_cost(4 * INSN_COST);
6583   format %{ "ldrs  $dst, $mem\t# float" %}
6584 
6585   ins_encode( aarch64_enc_ldrs(dst, mem) );
6586 
6587   ins_pipe(pipe_class_memory);
6588 %}
6589 
6590 // Load Double
6591 instruct loadD(vRegD dst, memory mem)
6592 %{
6593   match(Set dst (LoadD mem));
6594   predicate(!needs_acquiring_load(n));
6595 
6596   ins_cost(4 * INSN_COST);
6597   format %{ "ldrd  $dst, $mem\t# double" %}
6598 
6599   ins_encode( aarch64_enc_ldrd(dst, mem) );
6600 
6601   ins_pipe(pipe_class_memory);
6602 %}
6603 
6604 
6605 // Load Int Constant
6606 instruct loadConI(iRegINoSp dst, immI src)
6607 %{
6608   match(Set dst src);
6609 
6610   ins_cost(INSN_COST);
6611   format %{ "mov $dst, $src\t# int" %}
6612 
6613   ins_encode( aarch64_enc_movw_imm(dst, src) );
6614 
6615   ins_pipe(ialu_imm);
6616 %}
6617 
6618 // Load Long Constant
6619 instruct loadConL(iRegLNoSp dst, immL src)
6620 %{
6621   match(Set dst src);
6622 
6623   ins_cost(INSN_COST);
6624   format %{ "mov $dst, $src\t# long" %}
6625 
6626   ins_encode( aarch64_enc_mov_imm(dst, src) );
6627 
6628   ins_pipe(ialu_imm);
6629 %}
6630 
6631 // Load Pointer Constant
6632 
6633 instruct loadConP(iRegPNoSp dst, immP con)
6634 %{
6635   match(Set dst con);
6636 
6637   ins_cost(INSN_COST * 4);
6638   format %{
6639     "mov  $dst, $con\t# ptr\n\t"
6640   %}
6641 
6642   ins_encode(aarch64_enc_mov_p(dst, con));
6643 
6644   ins_pipe(ialu_imm);
6645 %}
6646 
6647 // Load Null Pointer Constant
6648 
6649 instruct loadConP0(iRegPNoSp dst, immP0 con)
6650 %{
6651   match(Set dst con);
6652 
6653   ins_cost(INSN_COST);
6654   format %{ "mov  $dst, $con\t# NULL ptr" %}
6655 
6656   ins_encode(aarch64_enc_mov_p0(dst, con));
6657 
6658   ins_pipe(ialu_imm);
6659 %}
6660 
6661 // Load Pointer Constant One
6662 
6663 instruct loadConP1(iRegPNoSp dst, immP_1 con)
6664 %{
6665   match(Set dst con);
6666 
6667   ins_cost(INSN_COST);
6668   format %{ "mov  $dst, $con\t# NULL ptr" %}
6669 
6670   ins_encode(aarch64_enc_mov_p1(dst, con));
6671 
6672   ins_pipe(ialu_imm);
6673 %}
6674 
6675 // Load Poll Page Constant
6676 
6677 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
6678 %{
6679   match(Set dst con);
6680 
6681   ins_cost(INSN_COST);
6682   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
6683 
6684   ins_encode(aarch64_enc_mov_poll_page(dst, con));
6685 
6686   ins_pipe(ialu_imm);
6687 %}
6688 
6689 // Load Byte Map Base Constant
6690 
6691 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
6692 %{
6693   match(Set dst con);
6694 
6695   ins_cost(INSN_COST);
6696   format %{ "adr  $dst, $con\t# Byte Map Base" %}
6697 
6698   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
6699 
6700   ins_pipe(ialu_imm);
6701 %}
6702 
6703 // Load Narrow Pointer Constant
6704 
6705 instruct loadConN(iRegNNoSp dst, immN con)
6706 %{
6707   match(Set dst con);
6708 
6709   ins_cost(INSN_COST * 4);
6710   format %{ "mov  $dst, $con\t# compressed ptr" %}
6711 
6712   ins_encode(aarch64_enc_mov_n(dst, con));
6713 
6714   ins_pipe(ialu_imm);
6715 %}
6716 
6717 // Load Narrow Null Pointer Constant
6718 
6719 instruct loadConN0(iRegNNoSp dst, immN0 con)
6720 %{
6721   match(Set dst con);
6722 
6723   ins_cost(INSN_COST);
6724   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
6725 
6726   ins_encode(aarch64_enc_mov_n0(dst, con));
6727 
6728   ins_pipe(ialu_imm);
6729 %}
6730 
6731 // Load Narrow Klass Constant
6732 
6733 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
6734 %{
6735   match(Set dst con);
6736 
6737   ins_cost(INSN_COST);
6738   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
6739 
6740   ins_encode(aarch64_enc_mov_nk(dst, con));
6741 
6742   ins_pipe(ialu_imm);
6743 %}
6744 
6745 // Load Packed Float Constant
6746 
6747 instruct loadConF_packed(vRegF dst, immFPacked con) %{
6748   match(Set dst con);
6749   ins_cost(INSN_COST * 4);
6750   format %{ "fmovs  $dst, $con"%}
6751   ins_encode %{
6752     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
6753   %}
6754 
6755   ins_pipe(fp_imm_s);
6756 %}
6757 
6758 // Load Float Constant
6759 
6760 instruct loadConF(vRegF dst, immF con) %{
6761   match(Set dst con);
6762 
6763   ins_cost(INSN_COST * 4);
6764 
6765   format %{
6766     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6767   %}
6768 
6769   ins_encode %{
6770     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6771   %}
6772 
6773   ins_pipe(fp_load_constant_s);
6774 %}
6775 
6776 // Load Packed Double Constant
6777 
6778 instruct loadConD_packed(vRegD dst, immDPacked con) %{
6779   match(Set dst con);
6780   ins_cost(INSN_COST);
6781   format %{ "fmovd  $dst, $con"%}
6782   ins_encode %{
6783     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
6784   %}
6785 
6786   ins_pipe(fp_imm_d);
6787 %}
6788 
6789 // Load Double Constant
6790 
6791 instruct loadConD(vRegD dst, immD con) %{
6792   match(Set dst con);
6793 
6794   ins_cost(INSN_COST * 5);
6795   format %{
6796     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6797   %}
6798 
6799   ins_encode %{
6800     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
6801   %}
6802 
6803   ins_pipe(fp_load_constant_d);
6804 %}
6805 
6806 // Store Instructions
6807 
6808 // Store CMS card-mark Immediate
6809 instruct storeimmCM0(immI0 zero, memory mem)
6810 %{
6811   match(Set mem (StoreCM mem zero));
6812   predicate(unnecessary_storestore(n));
6813 
6814   ins_cost(INSN_COST);
6815   format %{ "strb zr, $mem\t# byte" %}
6816 
6817   ins_encode(aarch64_enc_strb0(mem));
6818 
6819   ins_pipe(istore_mem);
6820 %}
6821 
6822 // Store CMS card-mark Immediate with intervening StoreStore
6823 // needed when using CMS with no conditional card marking
6824 instruct storeimmCM0_ordered(immI0 zero, memory mem)
6825 %{
6826   match(Set mem (StoreCM mem zero));
6827 
6828   ins_cost(INSN_COST * 2);
6829   format %{ "dmb ishst"
6830       "\n\tstrb zr, $mem\t# byte" %}
6831 
6832   ins_encode(aarch64_enc_strb0_ordered(mem));
6833 
6834   ins_pipe(istore_mem);
6835 %}
6836 
6837 // Store Byte
6838 instruct storeB(iRegIorL2I src, memory mem)
6839 %{
6840   match(Set mem (StoreB mem src));
6841   predicate(!needs_releasing_store(n));
6842 
6843   ins_cost(INSN_COST);
6844   format %{ "strb  $src, $mem\t# byte" %}
6845 
6846   ins_encode(aarch64_enc_strb(src, mem));
6847 
6848   ins_pipe(istore_reg_mem);
6849 %}
6850 
6851 
6852 instruct storeimmB0(immI0 zero, memory mem)
6853 %{
6854   match(Set mem (StoreB mem zero));
6855   predicate(!needs_releasing_store(n));
6856 
6857   ins_cost(INSN_COST);
6858   format %{ "strb zr, $mem\t# byte" %}
6859 
6860   ins_encode(aarch64_enc_strb0(mem));
6861 
6862   ins_pipe(istore_mem);
6863 %}
6864 
6865 // Store Char/Short
6866 instruct storeC(iRegIorL2I src, memory mem)
6867 %{
6868   match(Set mem (StoreC mem src));
6869   predicate(!needs_releasing_store(n));
6870 
6871   ins_cost(INSN_COST);
6872   format %{ "strh  $src, $mem\t# short" %}
6873 
6874   ins_encode(aarch64_enc_strh(src, mem));
6875 
6876   ins_pipe(istore_reg_mem);
6877 %}
6878 
6879 instruct storeimmC0(immI0 zero, memory mem)
6880 %{
6881   match(Set mem (StoreC mem zero));
6882   predicate(!needs_releasing_store(n));
6883 
6884   ins_cost(INSN_COST);
6885   format %{ "strh  zr, $mem\t# short" %}
6886 
6887   ins_encode(aarch64_enc_strh0(mem));
6888 
6889   ins_pipe(istore_mem);
6890 %}
6891 
6892 // Store Integer
6893 
6894 instruct storeI(iRegIorL2I src, memory mem)
6895 %{
6896   match(Set mem(StoreI mem src));
6897   predicate(!needs_releasing_store(n));
6898 
6899   ins_cost(INSN_COST);
6900   format %{ "strw  $src, $mem\t# int" %}
6901 
6902   ins_encode(aarch64_enc_strw(src, mem));
6903 
6904   ins_pipe(istore_reg_mem);
6905 %}
6906 
6907 instruct storeimmI0(immI0 zero, memory mem)
6908 %{
6909   match(Set mem(StoreI mem zero));
6910   predicate(!needs_releasing_store(n));
6911 
6912   ins_cost(INSN_COST);
6913   format %{ "strw  zr, $mem\t# int" %}
6914 
6915   ins_encode(aarch64_enc_strw0(mem));
6916 
6917   ins_pipe(istore_mem);
6918 %}
6919 
6920 // Store Long (64 bit signed)
6921 instruct storeL(iRegL src, memory mem)
6922 %{
6923   match(Set mem (StoreL mem src));
6924   predicate(!needs_releasing_store(n));
6925 
6926   ins_cost(INSN_COST);
6927   format %{ "str  $src, $mem\t# int" %}
6928 
6929   ins_encode(aarch64_enc_str(src, mem));
6930 
6931   ins_pipe(istore_reg_mem);
6932 %}
6933 
6934 // Store Long (64 bit signed)
6935 instruct storeimmL0(immL0 zero, memory mem)
6936 %{
6937   match(Set mem (StoreL mem zero));
6938   predicate(!needs_releasing_store(n));
6939 
6940   ins_cost(INSN_COST);
6941   format %{ "str  zr, $mem\t# int" %}
6942 
6943   ins_encode(aarch64_enc_str0(mem));
6944 
6945   ins_pipe(istore_mem);
6946 %}
6947 
6948 // Store Pointer
6949 instruct storeP(iRegP src, memory mem)
6950 %{
6951   match(Set mem (StoreP mem src));
6952   predicate(!needs_releasing_store(n));
6953 
6954   ins_cost(INSN_COST);
6955   format %{ "str  $src, $mem\t# ptr" %}
6956 
6957   ins_encode %{
6958     int opcode = $mem->opcode();
6959     Register base = as_Register($mem$$base);
6960     int index = $mem$$index;
6961     int size = $mem$$scale;
6962     int disp = $mem$$disp;
6963     Register reg = as_Register($src$$reg);
6964 
6965     // we sometimes get asked to store the stack pointer into the
6966     // current thread -- we cannot do that directly on AArch64
6967     if (reg == r31_sp) {
6968       MacroAssembler _masm(&cbuf);
6969       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
6970       __ mov(rscratch2, sp);
6971       reg = rscratch2;
6972     }
6973     Address::extend scale;
6974 
6975     // Hooboy, this is fugly.  We need a way to communicate to the
6976     // encoder that the index needs to be sign extended, so we have to
6977     // enumerate all the cases.
6978     switch (opcode) {
6979     case INDINDEXSCALEDOFFSETI2L:
6980     case INDINDEXSCALEDI2L:
6981     case INDINDEXSCALEDOFFSETI2LN:
6982     case INDINDEXSCALEDI2LN:
6983     case INDINDEXOFFSETI2L:
6984     case INDINDEXOFFSETI2LN:
6985       scale = Address::sxtw(size);
6986       break;
6987     default:
6988       scale = Address::lsl(size);
6989     }
6990     Address adr;
6991     if (index == -1) {
6992       adr = Address(base, disp);
6993     } else {
6994       if (disp == 0) {
6995         adr = Address(base, as_Register(index), scale);
6996       } else {
6997         __ lea(rscratch1, Address(base, disp));
6998         adr = Address(rscratch1, as_Register(index), scale);
6999       }
7000     }
7001 
7002     __ str(reg, adr);
7003   %}
7004 
7005   ins_pipe(istore_reg_mem);
7006 %}
7007 
7008 // Store Pointer
7009 instruct storeimmP0(immP0 zero, memory mem)
7010 %{
7011   match(Set mem (StoreP mem zero));
7012   predicate(!needs_releasing_store(n));
7013 
7014   ins_cost(INSN_COST);
7015   format %{ "str zr, $mem\t# ptr" %}
7016 
7017   ins_encode(aarch64_enc_str0(mem));
7018 
7019   ins_pipe(istore_mem);
7020 %}
7021 
7022 // Store Compressed Pointer
7023 instruct storeN(iRegN src, memory mem)
7024 %{
7025   match(Set mem (StoreN mem src));
7026   predicate(!needs_releasing_store(n));
7027 
7028   ins_cost(INSN_COST);
7029   format %{ "strw  $src, $mem\t# compressed ptr" %}
7030 
7031   ins_encode(aarch64_enc_strw(src, mem));
7032 
7033   ins_pipe(istore_reg_mem);
7034 %}
7035 
7036 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
7037 %{
7038   match(Set mem (StoreN mem zero));
7039   predicate(Universe::narrow_oop_base() == NULL &&
7040             Universe::narrow_klass_base() == NULL  &&
7041             (!needs_releasing_store(n)));
7042 
7043   ins_cost(INSN_COST);
7044   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
7045 
7046   ins_encode(aarch64_enc_strw(heapbase, mem));
7047 
7048   ins_pipe(istore_reg_mem);
7049 %}
7050 
7051 // Store Float
7052 instruct storeF(vRegF src, memory mem)
7053 %{
7054   match(Set mem (StoreF mem src));
7055   predicate(!needs_releasing_store(n));
7056 
7057   ins_cost(INSN_COST);
7058   format %{ "strs  $src, $mem\t# float" %}
7059 
7060   ins_encode( aarch64_enc_strs(src, mem) );
7061 
7062   ins_pipe(pipe_class_memory);
7063 %}
7064 
7065 // TODO
7066 // implement storeImmF0 and storeFImmPacked
7067 
7068 // Store Double
7069 instruct storeD(vRegD src, memory mem)
7070 %{
7071   match(Set mem (StoreD mem src));
7072   predicate(!needs_releasing_store(n));
7073 
7074   ins_cost(INSN_COST);
7075   format %{ "strd  $src, $mem\t# double" %}
7076 
7077   ins_encode( aarch64_enc_strd(src, mem) );
7078 
7079   ins_pipe(pipe_class_memory);
7080 %}
7081 
7082 // Store Compressed Klass Pointer
7083 instruct storeNKlass(iRegN src, memory mem)
7084 %{
7085   predicate(!needs_releasing_store(n));
7086   match(Set mem (StoreNKlass mem src));
7087 
7088   ins_cost(INSN_COST);
7089   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7090 
7091   ins_encode(aarch64_enc_strw(src, mem));
7092 
7093   ins_pipe(istore_reg_mem);
7094 %}
7095 
7096 // TODO
7097 // implement storeImmD0 and storeDImmPacked
7098 
7099 // prefetch instructions
7100 // Must be safe to execute with invalid address (cannot fault).
7101 
7102 instruct prefetchr( memory mem ) %{
7103   match(PrefetchRead mem);
7104 
7105   ins_cost(INSN_COST);
7106   format %{ "prfm $mem, PLDL1KEEP\t# Prefetch into level 1 cache read keep" %}
7107 
7108   ins_encode( aarch64_enc_prefetchr(mem) );
7109 
7110   ins_pipe(iload_prefetch);
7111 %}
7112 
7113 instruct prefetchw( memory mem ) %{
7114   match(PrefetchAllocation mem);
7115 
7116   ins_cost(INSN_COST);
7117   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7118 
7119   ins_encode( aarch64_enc_prefetchw(mem) );
7120 
7121   ins_pipe(iload_prefetch);
7122 %}
7123 
7124 instruct prefetchnta( memory mem ) %{
7125   match(PrefetchWrite mem);
7126 
7127   ins_cost(INSN_COST);
7128   format %{ "prfm $mem, PSTL1STRM\t# Prefetch into level 1 cache write streaming" %}
7129 
7130   ins_encode( aarch64_enc_prefetchnta(mem) );
7131 
7132   ins_pipe(iload_prefetch);
7133 %}
7134 
7135 //  ---------------- volatile loads and stores ----------------
7136 
7137 // Load Byte (8 bit signed)
7138 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7139 %{
7140   match(Set dst (LoadB mem));
7141 
7142   ins_cost(VOLATILE_REF_COST);
7143   format %{ "ldarsb  $dst, $mem\t# byte" %}
7144 
7145   ins_encode(aarch64_enc_ldarsb(dst, mem));
7146 
7147   ins_pipe(pipe_serial);
7148 %}
7149 
7150 // Load Byte (8 bit signed) into long
7151 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7152 %{
7153   match(Set dst (ConvI2L (LoadB mem)));
7154 
7155   ins_cost(VOLATILE_REF_COST);
7156   format %{ "ldarsb  $dst, $mem\t# byte" %}
7157 
7158   ins_encode(aarch64_enc_ldarsb(dst, mem));
7159 
7160   ins_pipe(pipe_serial);
7161 %}
7162 
7163 // Load Byte (8 bit unsigned)
7164 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7165 %{
7166   match(Set dst (LoadUB mem));
7167 
7168   ins_cost(VOLATILE_REF_COST);
7169   format %{ "ldarb  $dst, $mem\t# byte" %}
7170 
7171   ins_encode(aarch64_enc_ldarb(dst, mem));
7172 
7173   ins_pipe(pipe_serial);
7174 %}
7175 
7176 // Load Byte (8 bit unsigned) into long
7177 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7178 %{
7179   match(Set dst (ConvI2L (LoadUB mem)));
7180 
7181   ins_cost(VOLATILE_REF_COST);
7182   format %{ "ldarb  $dst, $mem\t# byte" %}
7183 
7184   ins_encode(aarch64_enc_ldarb(dst, mem));
7185 
7186   ins_pipe(pipe_serial);
7187 %}
7188 
7189 // Load Short (16 bit signed)
7190 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7191 %{
7192   match(Set dst (LoadS mem));
7193 
7194   ins_cost(VOLATILE_REF_COST);
7195   format %{ "ldarshw  $dst, $mem\t# short" %}
7196 
7197   ins_encode(aarch64_enc_ldarshw(dst, mem));
7198 
7199   ins_pipe(pipe_serial);
7200 %}
7201 
7202 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7203 %{
7204   match(Set dst (LoadUS mem));
7205 
7206   ins_cost(VOLATILE_REF_COST);
7207   format %{ "ldarhw  $dst, $mem\t# short" %}
7208 
7209   ins_encode(aarch64_enc_ldarhw(dst, mem));
7210 
7211   ins_pipe(pipe_serial);
7212 %}
7213 
7214 // Load Short/Char (16 bit unsigned) into long
7215 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7216 %{
7217   match(Set dst (ConvI2L (LoadUS mem)));
7218 
7219   ins_cost(VOLATILE_REF_COST);
7220   format %{ "ldarh  $dst, $mem\t# short" %}
7221 
7222   ins_encode(aarch64_enc_ldarh(dst, mem));
7223 
7224   ins_pipe(pipe_serial);
7225 %}
7226 
7227 // Load Short/Char (16 bit signed) into long
7228 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7229 %{
7230   match(Set dst (ConvI2L (LoadS mem)));
7231 
7232   ins_cost(VOLATILE_REF_COST);
7233   format %{ "ldarh  $dst, $mem\t# short" %}
7234 
7235   ins_encode(aarch64_enc_ldarsh(dst, mem));
7236 
7237   ins_pipe(pipe_serial);
7238 %}
7239 
7240 // Load Integer (32 bit signed)
7241 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7242 %{
7243   match(Set dst (LoadI mem));
7244 
7245   ins_cost(VOLATILE_REF_COST);
7246   format %{ "ldarw  $dst, $mem\t# int" %}
7247 
7248   ins_encode(aarch64_enc_ldarw(dst, mem));
7249 
7250   ins_pipe(pipe_serial);
7251 %}
7252 
7253 // Load Integer (32 bit unsigned) into long
7254 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7255 %{
7256   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7257 
7258   ins_cost(VOLATILE_REF_COST);
7259   format %{ "ldarw  $dst, $mem\t# int" %}
7260 
7261   ins_encode(aarch64_enc_ldarw(dst, mem));
7262 
7263   ins_pipe(pipe_serial);
7264 %}
7265 
7266 // Load Long (64 bit signed)
7267 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7268 %{
7269   match(Set dst (LoadL mem));
7270 
7271   ins_cost(VOLATILE_REF_COST);
7272   format %{ "ldar  $dst, $mem\t# int" %}
7273 
7274   ins_encode(aarch64_enc_ldar(dst, mem));
7275 
7276   ins_pipe(pipe_serial);
7277 %}
7278 
7279 // Load Pointer
7280 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7281 %{
7282   match(Set dst (LoadP mem));
7283 
7284   ins_cost(VOLATILE_REF_COST);
7285   format %{ "ldar  $dst, $mem\t# ptr" %}
7286 
7287   ins_encode(aarch64_enc_ldar(dst, mem));
7288 
7289   ins_pipe(pipe_serial);
7290 %}
7291 
7292 // Load Compressed Pointer
7293 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7294 %{
7295   match(Set dst (LoadN mem));
7296 
7297   ins_cost(VOLATILE_REF_COST);
7298   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7299 
7300   ins_encode(aarch64_enc_ldarw(dst, mem));
7301 
7302   ins_pipe(pipe_serial);
7303 %}
7304 
7305 // Load Float
7306 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7307 %{
7308   match(Set dst (LoadF mem));
7309 
7310   ins_cost(VOLATILE_REF_COST);
7311   format %{ "ldars  $dst, $mem\t# float" %}
7312 
7313   ins_encode( aarch64_enc_fldars(dst, mem) );
7314 
7315   ins_pipe(pipe_serial);
7316 %}
7317 
7318 // Load Double
7319 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7320 %{
7321   match(Set dst (LoadD mem));
7322 
7323   ins_cost(VOLATILE_REF_COST);
7324   format %{ "ldard  $dst, $mem\t# double" %}
7325 
7326   ins_encode( aarch64_enc_fldard(dst, mem) );
7327 
7328   ins_pipe(pipe_serial);
7329 %}
7330 
7331 // Store Byte
7332 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7333 %{
7334   match(Set mem (StoreB mem src));
7335 
7336   ins_cost(VOLATILE_REF_COST);
7337   format %{ "stlrb  $src, $mem\t# byte" %}
7338 
7339   ins_encode(aarch64_enc_stlrb(src, mem));
7340 
7341   ins_pipe(pipe_class_memory);
7342 %}
7343 
7344 // Store Char/Short
7345 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7346 %{
7347   match(Set mem (StoreC mem src));
7348 
7349   ins_cost(VOLATILE_REF_COST);
7350   format %{ "stlrh  $src, $mem\t# short" %}
7351 
7352   ins_encode(aarch64_enc_stlrh(src, mem));
7353 
7354   ins_pipe(pipe_class_memory);
7355 %}
7356 
7357 // Store Integer
7358 
7359 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7360 %{
7361   match(Set mem(StoreI mem src));
7362 
7363   ins_cost(VOLATILE_REF_COST);
7364   format %{ "stlrw  $src, $mem\t# int" %}
7365 
7366   ins_encode(aarch64_enc_stlrw(src, mem));
7367 
7368   ins_pipe(pipe_class_memory);
7369 %}
7370 
7371 // Store Long (64 bit signed)
7372 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7373 %{
7374   match(Set mem (StoreL mem src));
7375 
7376   ins_cost(VOLATILE_REF_COST);
7377   format %{ "stlr  $src, $mem\t# int" %}
7378 
7379   ins_encode(aarch64_enc_stlr(src, mem));
7380 
7381   ins_pipe(pipe_class_memory);
7382 %}
7383 
7384 // Store Pointer
7385 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7386 %{
7387   match(Set mem (StoreP mem src));
7388 
7389   ins_cost(VOLATILE_REF_COST);
7390   format %{ "stlr  $src, $mem\t# ptr" %}
7391 
7392   ins_encode(aarch64_enc_stlr(src, mem));
7393 
7394   ins_pipe(pipe_class_memory);
7395 %}
7396 
7397 // Store Compressed Pointer
7398 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7399 %{
7400   match(Set mem (StoreN mem src));
7401 
7402   ins_cost(VOLATILE_REF_COST);
7403   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7404 
7405   ins_encode(aarch64_enc_stlrw(src, mem));
7406 
7407   ins_pipe(pipe_class_memory);
7408 %}
7409 
7410 // Store Float
7411 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7412 %{
7413   match(Set mem (StoreF mem src));
7414 
7415   ins_cost(VOLATILE_REF_COST);
7416   format %{ "stlrs  $src, $mem\t# float" %}
7417 
7418   ins_encode( aarch64_enc_fstlrs(src, mem) );
7419 
7420   ins_pipe(pipe_class_memory);
7421 %}
7422 
7423 // TODO
7424 // implement storeImmF0 and storeFImmPacked
7425 
7426 // Store Double
7427 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7428 %{
7429   match(Set mem (StoreD mem src));
7430 
7431   ins_cost(VOLATILE_REF_COST);
7432   format %{ "stlrd  $src, $mem\t# double" %}
7433 
7434   ins_encode( aarch64_enc_fstlrd(src, mem) );
7435 
7436   ins_pipe(pipe_class_memory);
7437 %}
7438 
7439 //  ---------------- end of volatile loads and stores ----------------
7440 
7441 // ============================================================================
7442 // BSWAP Instructions
7443 
7444 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7445   match(Set dst (ReverseBytesI src));
7446 
7447   ins_cost(INSN_COST);
7448   format %{ "revw  $dst, $src" %}
7449 
7450   ins_encode %{
7451     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7452   %}
7453 
7454   ins_pipe(ialu_reg);
7455 %}
7456 
7457 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7458   match(Set dst (ReverseBytesL src));
7459 
7460   ins_cost(INSN_COST);
7461   format %{ "rev  $dst, $src" %}
7462 
7463   ins_encode %{
7464     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7465   %}
7466 
7467   ins_pipe(ialu_reg);
7468 %}
7469 
7470 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7471   match(Set dst (ReverseBytesUS src));
7472 
7473   ins_cost(INSN_COST);
7474   format %{ "rev16w  $dst, $src" %}
7475 
7476   ins_encode %{
7477     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7478   %}
7479 
7480   ins_pipe(ialu_reg);
7481 %}
7482 
7483 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7484   match(Set dst (ReverseBytesS src));
7485 
7486   ins_cost(INSN_COST);
7487   format %{ "rev16w  $dst, $src\n\t"
7488             "sbfmw $dst, $dst, #0, #15" %}
7489 
7490   ins_encode %{
7491     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7492     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7493   %}
7494 
7495   ins_pipe(ialu_reg);
7496 %}
7497 
7498 // ============================================================================
7499 // Zero Count Instructions
7500 
7501 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7502   match(Set dst (CountLeadingZerosI src));
7503 
7504   ins_cost(INSN_COST);
7505   format %{ "clzw  $dst, $src" %}
7506   ins_encode %{
7507     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7508   %}
7509 
7510   ins_pipe(ialu_reg);
7511 %}
7512 
7513 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7514   match(Set dst (CountLeadingZerosL src));
7515 
7516   ins_cost(INSN_COST);
7517   format %{ "clz   $dst, $src" %}
7518   ins_encode %{
7519     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7520   %}
7521 
7522   ins_pipe(ialu_reg);
7523 %}
7524 
7525 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7526   match(Set dst (CountTrailingZerosI src));
7527 
7528   ins_cost(INSN_COST * 2);
7529   format %{ "rbitw  $dst, $src\n\t"
7530             "clzw   $dst, $dst" %}
7531   ins_encode %{
7532     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7533     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7534   %}
7535 
7536   ins_pipe(ialu_reg);
7537 %}
7538 
7539 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7540   match(Set dst (CountTrailingZerosL src));
7541 
7542   ins_cost(INSN_COST * 2);
7543   format %{ "rbit   $dst, $src\n\t"
7544             "clz    $dst, $dst" %}
7545   ins_encode %{
7546     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7547     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7548   %}
7549 
7550   ins_pipe(ialu_reg);
7551 %}
7552 
7553 //---------- Population Count Instructions -------------------------------------
7554 //
7555 
7556 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
7557   predicate(UsePopCountInstruction);
7558   match(Set dst (PopCountI src));
7559   effect(TEMP tmp);
7560   ins_cost(INSN_COST * 13);
7561 
7562   format %{ "movw   $src, $src\n\t"
7563             "mov    $tmp, $src\t# vector (1D)\n\t"
7564             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7565             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7566             "mov    $dst, $tmp\t# vector (1D)" %}
7567   ins_encode %{
7568     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
7569     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7570     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7571     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7572     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7573   %}
7574 
7575   ins_pipe(pipe_class_default);
7576 %}
7577 
7578 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
7579   predicate(UsePopCountInstruction);
7580   match(Set dst (PopCountI (LoadI mem)));
7581   effect(TEMP tmp);
7582   ins_cost(INSN_COST * 13);
7583 
7584   format %{ "ldrs   $tmp, $mem\n\t"
7585             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7586             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7587             "mov    $dst, $tmp\t# vector (1D)" %}
7588   ins_encode %{
7589     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7590     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
7591                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7592     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7593     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7594     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7595   %}
7596 
7597   ins_pipe(pipe_class_default);
7598 %}
7599 
7600 // Note: Long.bitCount(long) returns an int.
7601 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
7602   predicate(UsePopCountInstruction);
7603   match(Set dst (PopCountL src));
7604   effect(TEMP tmp);
7605   ins_cost(INSN_COST * 13);
7606 
7607   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
7608             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7609             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7610             "mov    $dst, $tmp\t# vector (1D)" %}
7611   ins_encode %{
7612     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7613     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7614     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7615     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7616   %}
7617 
7618   ins_pipe(pipe_class_default);
7619 %}
7620 
7621 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
7622   predicate(UsePopCountInstruction);
7623   match(Set dst (PopCountL (LoadL mem)));
7624   effect(TEMP tmp);
7625   ins_cost(INSN_COST * 13);
7626 
7627   format %{ "ldrd   $tmp, $mem\n\t"
7628             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7629             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7630             "mov    $dst, $tmp\t# vector (1D)" %}
7631   ins_encode %{
7632     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7633     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
7634                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7635     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7636     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7637     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7638   %}
7639 
7640   ins_pipe(pipe_class_default);
7641 %}
7642 
7643 // ============================================================================
7644 // MemBar Instruction
7645 
7646 instruct load_fence() %{
7647   match(LoadFence);
7648   ins_cost(VOLATILE_REF_COST);
7649 
7650   format %{ "load_fence" %}
7651 
7652   ins_encode %{
7653     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7654   %}
7655   ins_pipe(pipe_serial);
7656 %}
7657 
7658 instruct unnecessary_membar_acquire() %{
7659   predicate(unnecessary_acquire(n));
7660   match(MemBarAcquire);
7661   ins_cost(0);
7662 
7663   format %{ "membar_acquire (elided)" %}
7664 
7665   ins_encode %{
7666     __ block_comment("membar_acquire (elided)");
7667   %}
7668 
7669   ins_pipe(pipe_class_empty);
7670 %}
7671 
7672 instruct membar_acquire() %{
7673   match(MemBarAcquire);
7674   ins_cost(VOLATILE_REF_COST);
7675 
7676   format %{ "membar_acquire" %}
7677 
7678   ins_encode %{
7679     __ block_comment("membar_acquire");
7680     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7681   %}
7682 
7683   ins_pipe(pipe_serial);
7684 %}
7685 
7686 
7687 instruct membar_acquire_lock() %{
7688   match(MemBarAcquireLock);
7689   ins_cost(VOLATILE_REF_COST);
7690 
7691   format %{ "membar_acquire_lock (elided)" %}
7692 
7693   ins_encode %{
7694     __ block_comment("membar_acquire_lock (elided)");
7695   %}
7696 
7697   ins_pipe(pipe_serial);
7698 %}
7699 
7700 instruct store_fence() %{
7701   match(StoreFence);
7702   ins_cost(VOLATILE_REF_COST);
7703 
7704   format %{ "store_fence" %}
7705 
7706   ins_encode %{
7707     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7708   %}
7709   ins_pipe(pipe_serial);
7710 %}
7711 
7712 instruct unnecessary_membar_release() %{
7713   predicate(unnecessary_release(n));
7714   match(MemBarRelease);
7715   ins_cost(0);
7716 
7717   format %{ "membar_release (elided)" %}
7718 
7719   ins_encode %{
7720     __ block_comment("membar_release (elided)");
7721   %}
7722   ins_pipe(pipe_serial);
7723 %}
7724 
7725 instruct membar_release() %{
7726   match(MemBarRelease);
7727   ins_cost(VOLATILE_REF_COST);
7728 
7729   format %{ "membar_release" %}
7730 
7731   ins_encode %{
7732     __ block_comment("membar_release");
7733     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7734   %}
7735   ins_pipe(pipe_serial);
7736 %}
7737 
7738 instruct membar_storestore() %{
7739   match(MemBarStoreStore);
7740   ins_cost(VOLATILE_REF_COST);
7741 
7742   format %{ "MEMBAR-store-store" %}
7743 
7744   ins_encode %{
7745     __ membar(Assembler::StoreStore);
7746   %}
7747   ins_pipe(pipe_serial);
7748 %}
7749 
7750 instruct membar_release_lock() %{
7751   match(MemBarReleaseLock);
7752   ins_cost(VOLATILE_REF_COST);
7753 
7754   format %{ "membar_release_lock (elided)" %}
7755 
7756   ins_encode %{
7757     __ block_comment("membar_release_lock (elided)");
7758   %}
7759 
7760   ins_pipe(pipe_serial);
7761 %}
7762 
7763 instruct unnecessary_membar_volatile() %{
7764   predicate(unnecessary_volatile(n));
7765   match(MemBarVolatile);
7766   ins_cost(0);
7767 
7768   format %{ "membar_volatile (elided)" %}
7769 
7770   ins_encode %{
7771     __ block_comment("membar_volatile (elided)");
7772   %}
7773 
7774   ins_pipe(pipe_serial);
7775 %}
7776 
7777 instruct membar_volatile() %{
7778   match(MemBarVolatile);
7779   ins_cost(VOLATILE_REF_COST*100);
7780 
7781   format %{ "membar_volatile" %}
7782 
7783   ins_encode %{
7784     __ block_comment("membar_volatile");
7785     __ membar(Assembler::StoreLoad);
7786     %}
7787 
7788   ins_pipe(pipe_serial);
7789 %}
7790 
7791 // ============================================================================
7792 // Cast/Convert Instructions
7793 
7794 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7795   match(Set dst (CastX2P src));
7796 
7797   ins_cost(INSN_COST);
7798   format %{ "mov $dst, $src\t# long -> ptr" %}
7799 
7800   ins_encode %{
7801     if ($dst$$reg != $src$$reg) {
7802       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7803     }
7804   %}
7805 
7806   ins_pipe(ialu_reg);
7807 %}
7808 
7809 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7810   match(Set dst (CastP2X src));
7811 
7812   ins_cost(INSN_COST);
7813   format %{ "mov $dst, $src\t# ptr -> long" %}
7814 
7815   ins_encode %{
7816     if ($dst$$reg != $src$$reg) {
7817       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7818     }
7819   %}
7820 
7821   ins_pipe(ialu_reg);
7822 %}
7823 
7824 // Convert oop into int for vectors alignment masking
7825 instruct convP2I(iRegINoSp dst, iRegP src) %{
7826   match(Set dst (ConvL2I (CastP2X src)));
7827 
7828   ins_cost(INSN_COST);
7829   format %{ "movw $dst, $src\t# ptr -> int" %}
7830   ins_encode %{
7831     __ movw($dst$$Register, $src$$Register);
7832   %}
7833 
7834   ins_pipe(ialu_reg);
7835 %}
7836 
7837 // Convert compressed oop into int for vectors alignment masking
7838 // in case of 32bit oops (heap < 4Gb).
7839 instruct convN2I(iRegINoSp dst, iRegN src)
7840 %{
7841   predicate(Universe::narrow_oop_shift() == 0);
7842   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7843 
7844   ins_cost(INSN_COST);
7845   format %{ "mov dst, $src\t# compressed ptr -> int" %}
7846   ins_encode %{
7847     __ movw($dst$$Register, $src$$Register);
7848   %}
7849 
7850   ins_pipe(ialu_reg);
7851 %}
7852 
7853 
7854 // Convert oop pointer into compressed form
7855 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7856   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7857   match(Set dst (EncodeP src));
7858   effect(KILL cr);
7859   ins_cost(INSN_COST * 3);
7860   format %{ "encode_heap_oop $dst, $src" %}
7861   ins_encode %{
7862     Register s = $src$$Register;
7863     Register d = $dst$$Register;
7864     __ encode_heap_oop(d, s);
7865   %}
7866   ins_pipe(ialu_reg);
7867 %}
7868 
7869 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7870   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7871   match(Set dst (EncodeP src));
7872   ins_cost(INSN_COST * 3);
7873   format %{ "encode_heap_oop_not_null $dst, $src" %}
7874   ins_encode %{
7875     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7876   %}
7877   ins_pipe(ialu_reg);
7878 %}
7879 
7880 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7881   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
7882             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
7883   match(Set dst (DecodeN src));
7884   ins_cost(INSN_COST * 3);
7885   format %{ "decode_heap_oop $dst, $src" %}
7886   ins_encode %{
7887     Register s = $src$$Register;
7888     Register d = $dst$$Register;
7889     __ decode_heap_oop(d, s);
7890   %}
7891   ins_pipe(ialu_reg);
7892 %}
7893 
7894 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7895   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
7896             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
7897   match(Set dst (DecodeN src));
7898   ins_cost(INSN_COST * 3);
7899   format %{ "decode_heap_oop_not_null $dst, $src" %}
7900   ins_encode %{
7901     Register s = $src$$Register;
7902     Register d = $dst$$Register;
7903     __ decode_heap_oop_not_null(d, s);
7904   %}
7905   ins_pipe(ialu_reg);
7906 %}
7907 
7908 // n.b. AArch64 implementations of encode_klass_not_null and
7909 // decode_klass_not_null do not modify the flags register so, unlike
7910 // Intel, we don't kill CR as a side effect here
7911 
7912 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
7913   match(Set dst (EncodePKlass src));
7914 
7915   ins_cost(INSN_COST * 3);
7916   format %{ "encode_klass_not_null $dst,$src" %}
7917 
7918   ins_encode %{
7919     Register src_reg = as_Register($src$$reg);
7920     Register dst_reg = as_Register($dst$$reg);
7921     __ encode_klass_not_null(dst_reg, src_reg);
7922   %}
7923 
7924    ins_pipe(ialu_reg);
7925 %}
7926 
7927 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
7928   match(Set dst (DecodeNKlass src));
7929 
7930   ins_cost(INSN_COST * 3);
7931   format %{ "decode_klass_not_null $dst,$src" %}
7932 
7933   ins_encode %{
7934     Register src_reg = as_Register($src$$reg);
7935     Register dst_reg = as_Register($dst$$reg);
7936     if (dst_reg != src_reg) {
7937       __ decode_klass_not_null(dst_reg, src_reg);
7938     } else {
7939       __ decode_klass_not_null(dst_reg);
7940     }
7941   %}
7942 
7943    ins_pipe(ialu_reg);
7944 %}
7945 
7946 instruct checkCastPP(iRegPNoSp dst)
7947 %{
7948   match(Set dst (CheckCastPP dst));
7949 
7950   size(0);
7951   format %{ "# checkcastPP of $dst" %}
7952   ins_encode(/* empty encoding */);
7953   ins_pipe(pipe_class_empty);
7954 %}
7955 
7956 instruct castPP(iRegPNoSp dst)
7957 %{
7958   match(Set dst (CastPP dst));
7959 
7960   size(0);
7961   format %{ "# castPP of $dst" %}
7962   ins_encode(/* empty encoding */);
7963   ins_pipe(pipe_class_empty);
7964 %}
7965 
7966 instruct castII(iRegI dst)
7967 %{
7968   match(Set dst (CastII dst));
7969 
7970   size(0);
7971   format %{ "# castII of $dst" %}
7972   ins_encode(/* empty encoding */);
7973   ins_cost(0);
7974   ins_pipe(pipe_class_empty);
7975 %}
7976 
7977 // ============================================================================
7978 // Atomic operation instructions
7979 //
7980 // Intel and SPARC both implement Ideal Node LoadPLocked and
7981 // Store{PIL}Conditional instructions using a normal load for the
7982 // LoadPLocked and a CAS for the Store{PIL}Conditional.
7983 //
7984 // The ideal code appears only to use LoadPLocked/StorePLocked as a
7985 // pair to lock object allocations from Eden space when not using
7986 // TLABs.
7987 //
7988 // There does not appear to be a Load{IL}Locked Ideal Node and the
7989 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
7990 // and to use StoreIConditional only for 32-bit and StoreLConditional
7991 // only for 64-bit.
7992 //
7993 // We implement LoadPLocked and StorePLocked instructions using,
7994 // respectively the AArch64 hw load-exclusive and store-conditional
7995 // instructions. Whereas we must implement each of
7996 // Store{IL}Conditional using a CAS which employs a pair of
7997 // instructions comprising a load-exclusive followed by a
7998 // store-conditional.
7999 
8000 
8001 // Locked-load (linked load) of the current heap-top
8002 // used when updating the eden heap top
8003 // implemented using ldaxr on AArch64
8004 
8005 instruct loadPLocked(iRegPNoSp dst, indirect mem)
8006 %{
8007   match(Set dst (LoadPLocked mem));
8008 
8009   ins_cost(VOLATILE_REF_COST);
8010 
8011   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
8012 
8013   ins_encode(aarch64_enc_ldaxr(dst, mem));
8014 
8015   ins_pipe(pipe_serial);
8016 %}
8017 
8018 // Conditional-store of the updated heap-top.
8019 // Used during allocation of the shared heap.
8020 // Sets flag (EQ) on success.
8021 // implemented using stlxr on AArch64.
8022 
8023 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) 
8024 %{
8025   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8026 
8027   ins_cost(VOLATILE_REF_COST);
8028 
8029  // TODO
8030  // do we need to do a store-conditional release or can we just use a
8031  // plain store-conditional?
8032 
8033   format %{
8034     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
8035     "cmpw rscratch1, zr\t# EQ on successful write"
8036   %}
8037 
8038   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
8039 
8040   ins_pipe(pipe_serial);
8041 %}
8042 
8043 
8044 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
8045 // when attempting to rebias a lock towards the current thread.  We
8046 // must use the acquire form of cmpxchg in order to guarantee acquire
8047 // semantics in this case.
8048 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) 
8049 %{
8050   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8051 
8052   ins_cost(VOLATILE_REF_COST);
8053 
8054   format %{
8055     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8056     "cmpw rscratch1, zr\t# EQ on successful write"
8057   %}
8058 
8059   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
8060 
8061   ins_pipe(pipe_slow);
8062 %}
8063 
8064 // storeIConditional also has acquire semantics, for no better reason
8065 // than matching storeLConditional.  At the time of writing this
8066 // comment storeIConditional was not used anywhere by AArch64.
8067 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) 
8068 %{
8069   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8070 
8071   ins_cost(VOLATILE_REF_COST);
8072 
8073   format %{
8074     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8075     "cmpw rscratch1, zr\t# EQ on successful write"
8076   %}
8077 
8078   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8079 
8080   ins_pipe(pipe_slow);
8081 %}
8082 
8083 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8084 // can't match them
8085 
8086 // standard CompareAndSwapX when we are using barriers
8087 // these have higher priority than the rules selected by a predicate
8088 
8089 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8090 
8091   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8092   ins_cost(2 * VOLATILE_REF_COST);
8093 
8094   effect(KILL cr);
8095 
8096  format %{
8097     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8098     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8099  %}
8100 
8101  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8102             aarch64_enc_cset_eq(res));
8103 
8104   ins_pipe(pipe_slow);
8105 %}
8106 
8107 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8108 
8109   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8110   ins_cost(2 * VOLATILE_REF_COST);
8111 
8112   effect(KILL cr);
8113 
8114  format %{
8115     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8116     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8117  %}
8118 
8119  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8120             aarch64_enc_cset_eq(res));
8121 
8122   ins_pipe(pipe_slow);
8123 %}
8124 
8125 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8126 
8127   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
8128   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8129   ins_cost(2 * VOLATILE_REF_COST);
8130 
8131   effect(KILL cr);
8132 
8133  format %{
8134     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8135     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8136  %}
8137 
8138  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8139             aarch64_enc_cset_eq(res));
8140 
8141   ins_pipe(pipe_slow);
8142 %}
8143 
8144 instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
8145 
8146   predicate(UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypePtr::NULL_PTR);
8147   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8148   ins_cost(2 * VOLATILE_REF_COST);
8149 
8150   effect(TEMP tmp, KILL cr);
8151 
8152   format %{
8153     "cmpxchg_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
8154   %}
8155 
8156   ins_encode(aarch64_enc_cmpxchg_oop_shenandoah(mem, oldval, newval, tmp, res));
8157 
8158   ins_pipe(pipe_slow);
8159 %}
8160 
8161 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8162 
8163   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypeNarrowOop::NULL_PTR);
8164   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8165   ins_cost(2 * VOLATILE_REF_COST);
8166 
8167   effect(KILL cr);
8168 
8169  format %{
8170     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8171     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8172  %}
8173 
8174  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8175             aarch64_enc_cset_eq(res));
8176 
8177   ins_pipe(pipe_slow);
8178 %}
8179 
8180 instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
8181 
8182   predicate(UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypeNarrowOop::NULL_PTR);
8183   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8184   ins_cost(2 * VOLATILE_REF_COST);
8185 
8186   effect(TEMP tmp, KILL cr);
8187 
8188   format %{
8189     "cmpxchgw_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
8190   %}
8191 
8192   ins_encode %{
8193     Register tmp = $tmp$$Register;
8194     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
8195     ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register);
8196   %}
8197 
8198   ins_pipe(pipe_slow);
8199 %}
8200 
8201 // alternative CompareAndSwapX when we are eliding barriers
8202 
8203 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8204 
8205   predicate(needs_acquiring_load_exclusive(n));
8206   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8207   ins_cost(VOLATILE_REF_COST);
8208 
8209   effect(KILL cr);
8210 
8211  format %{
8212     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8213     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8214  %}
8215 
8216  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8217             aarch64_enc_cset_eq(res));
8218 
8219   ins_pipe(pipe_slow);
8220 %}
8221 
8222 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8223 
8224   predicate(needs_acquiring_load_exclusive(n));
8225   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8226   ins_cost(VOLATILE_REF_COST);
8227 
8228   effect(KILL cr);
8229 
8230  format %{
8231     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8232     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8233  %}
8234 
8235  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8236             aarch64_enc_cset_eq(res));
8237 
8238   ins_pipe(pipe_slow);
8239 %}
8240 
8241 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8242 
8243   predicate(needs_acquiring_load_exclusive(n) && (!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR));
8244   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8245   ins_cost(VOLATILE_REF_COST);
8246 
8247   effect(KILL cr);
8248 
8249  format %{
8250     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8251     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8252  %}
8253 
8254  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8255             aarch64_enc_cset_eq(res));
8256 
8257   ins_pipe(pipe_slow);
8258 %}
8259 
8260 instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
8261 
8262   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypePtr::NULL_PTR);
8263   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8264   ins_cost(VOLATILE_REF_COST);
8265 
8266   effect(TEMP tmp, KILL cr);
8267 
8268   format %{
8269     "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
8270   %}
8271 
8272   ins_encode(aarch64_enc_cmpxchg_acq_oop_shenandoah(mem, oldval, newval, tmp, res));
8273 
8274   ins_pipe(pipe_slow);
8275 %}
8276 
8277 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8278 
8279   predicate(needs_acquiring_load_exclusive(n) && (!UseShenandoahGC || !ShenandoahCASBarrier|| n->in(3)->in(1)->bottom_type() == TypeNarrowOop::NULL_PTR));
8280   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8281   ins_cost(VOLATILE_REF_COST);
8282 
8283   effect(KILL cr);
8284 
8285  format %{
8286     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8287     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8288  %}
8289 
8290  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8291             aarch64_enc_cset_eq(res));
8292 
8293   ins_pipe(pipe_slow);
8294 %}
8295 
8296 instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
8297 
8298   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypeNarrowOop::NULL_PTR);
8299   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8300   ins_cost(VOLATILE_REF_COST);
8301 
8302   effect(TEMP tmp, KILL cr);
8303 
8304  format %{
8305     "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
8306  %}
8307 
8308   ins_encode %{
8309     Register tmp = $tmp$$Register;
8310     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
8311     ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register);
8312   %}
8313 
8314   ins_pipe(pipe_slow);
8315 %}
8316 
8317 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
8318   match(Set prev (GetAndSetI mem newv));
8319   ins_cost(2 * VOLATILE_REF_COST);
8320   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8321   ins_encode %{
8322     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8323   %}
8324   ins_pipe(pipe_serial);
8325 %}
8326 
8327 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
8328   match(Set prev (GetAndSetL mem newv));
8329   ins_cost(2 * VOLATILE_REF_COST);
8330   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8331   ins_encode %{
8332     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8333   %}
8334   ins_pipe(pipe_serial);
8335 %}
8336 
8337 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
8338   match(Set prev (GetAndSetN mem newv));
8339   ins_cost(2 * VOLATILE_REF_COST);
8340   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8341   ins_encode %{
8342     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8343   %}
8344   ins_pipe(pipe_serial);
8345 %}
8346 
8347 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
8348   match(Set prev (GetAndSetP mem newv));
8349   ins_cost(2 * VOLATILE_REF_COST);
8350   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8351   ins_encode %{
8352     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8353   %}
8354   ins_pipe(pipe_serial);
8355 %}
8356 
8357 instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) %{
8358   predicate(needs_acquiring_load_exclusive(n));
8359   match(Set prev (GetAndSetI mem newv));
8360   ins_cost(VOLATILE_REF_COST);
8361   format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]" %}
8362   ins_encode %{
8363     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8364   %}
8365   ins_pipe(pipe_serial);
8366 %}
8367 
8368 instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{
8369   predicate(needs_acquiring_load_exclusive(n));
8370   match(Set prev (GetAndSetL mem newv));
8371   ins_cost(VOLATILE_REF_COST);
8372   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8373   ins_encode %{
8374     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8375   %}
8376   ins_pipe(pipe_serial);
8377 %}
8378 
8379 instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
8380   predicate(needs_acquiring_load_exclusive(n));
8381   match(Set prev (GetAndSetN mem newv));
8382   ins_cost(VOLATILE_REF_COST);
8383   format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %}
8384   ins_encode %{
8385     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8386   %}
8387   ins_pipe(pipe_serial);
8388 %}
8389 
8390 instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{
8391   predicate(needs_acquiring_load_exclusive(n));
8392   match(Set prev (GetAndSetP mem newv));
8393   ins_cost(VOLATILE_REF_COST);
8394   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8395   ins_encode %{
8396     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8397   %}
8398   ins_pipe(pipe_serial);
8399 %}
8400 
8401 
8402 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8403   match(Set newval (GetAndAddL mem incr));
8404   ins_cost(2 * VOLATILE_REF_COST + 1);
8405   format %{ "get_and_addL $newval, [$mem], $incr" %}
8406   ins_encode %{
8407     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
8408   %}
8409   ins_pipe(pipe_serial);
8410 %}
8411 
8412 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
8413   predicate(n->as_LoadStore()->result_not_used());
8414   match(Set dummy (GetAndAddL mem incr));
8415   ins_cost(2 * VOLATILE_REF_COST);
8416   format %{ "get_and_addL [$mem], $incr" %}
8417   ins_encode %{
8418     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
8419   %}
8420   ins_pipe(pipe_serial);
8421 %}
8422 
8423 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8424   match(Set newval (GetAndAddL mem incr));
8425   ins_cost(2 * VOLATILE_REF_COST + 1);
8426   format %{ "get_and_addL $newval, [$mem], $incr" %}
8427   ins_encode %{
8428     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
8429   %}
8430   ins_pipe(pipe_serial);
8431 %}
8432 
8433 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
8434   predicate(n->as_LoadStore()->result_not_used());
8435   match(Set dummy (GetAndAddL mem incr));
8436   ins_cost(2 * VOLATILE_REF_COST);
8437   format %{ "get_and_addL [$mem], $incr" %}
8438   ins_encode %{
8439     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
8440   %}
8441   ins_pipe(pipe_serial);
8442 %}
8443 
8444 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8445   match(Set newval (GetAndAddI mem incr));
8446   ins_cost(2 * VOLATILE_REF_COST + 1);
8447   format %{ "get_and_addI $newval, [$mem], $incr" %}
8448   ins_encode %{
8449     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8450   %}
8451   ins_pipe(pipe_serial);
8452 %}
8453 
8454 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
8455   predicate(n->as_LoadStore()->result_not_used());
8456   match(Set dummy (GetAndAddI mem incr));
8457   ins_cost(2 * VOLATILE_REF_COST);
8458   format %{ "get_and_addI [$mem], $incr" %}
8459   ins_encode %{
8460     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
8461   %}
8462   ins_pipe(pipe_serial);
8463 %}
8464 
8465 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8466   match(Set newval (GetAndAddI mem incr));
8467   ins_cost(2 * VOLATILE_REF_COST + 1);
8468   format %{ "get_and_addI $newval, [$mem], $incr" %}
8469   ins_encode %{
8470     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8471   %}
8472   ins_pipe(pipe_serial);
8473 %}
8474 
8475 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
8476   predicate(n->as_LoadStore()->result_not_used());
8477   match(Set dummy (GetAndAddI mem incr));
8478   ins_cost(2 * VOLATILE_REF_COST);
8479   format %{ "get_and_addI [$mem], $incr" %}
8480   ins_encode %{
8481     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
8482   %}
8483   ins_pipe(pipe_serial);
8484 %}
8485 
8486 instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) %{
8487   predicate(needs_acquiring_load_exclusive(n));
8488   match(Set newval (GetAndAddL mem incr));
8489   ins_cost(VOLATILE_REF_COST + 1);
8490   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
8491   ins_encode %{
8492     __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
8493   %}
8494   ins_pipe(pipe_serial);
8495 %}
8496 
8497 instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
8498   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8499   match(Set dummy (GetAndAddL mem incr));
8500   ins_cost(VOLATILE_REF_COST);
8501   format %{ "get_and_addL_acq [$mem], $incr" %}
8502   ins_encode %{
8503     __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
8504   %}
8505   ins_pipe(pipe_serial);
8506 %}
8507 
8508 instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8509   predicate(needs_acquiring_load_exclusive(n));
8510   match(Set newval (GetAndAddL mem incr));
8511   ins_cost(VOLATILE_REF_COST + 1);
8512   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
8513   ins_encode %{
8514     __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
8515   %}
8516   ins_pipe(pipe_serial);
8517 %}
8518 
8519 instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAddSub incr) %{
8520   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8521   match(Set dummy (GetAndAddL mem incr));
8522   ins_cost(VOLATILE_REF_COST);
8523   format %{ "get_and_addL_acq [$mem], $incr" %}
8524   ins_encode %{
8525     __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
8526   %}
8527   ins_pipe(pipe_serial);
8528 %}
8529 
8530 instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8531   predicate(needs_acquiring_load_exclusive(n));
8532   match(Set newval (GetAndAddI mem incr));
8533   ins_cost(VOLATILE_REF_COST + 1);
8534   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
8535   ins_encode %{
8536     __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8537   %}
8538   ins_pipe(pipe_serial);
8539 %}
8540 
8541 instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) %{
8542   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8543   match(Set dummy (GetAndAddI mem incr));
8544   ins_cost(VOLATILE_REF_COST);
8545   format %{ "get_and_addI_acq [$mem], $incr" %}
8546   ins_encode %{
8547     __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
8548   %}
8549   ins_pipe(pipe_serial);
8550 %}
8551 
8552 instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8553   predicate(needs_acquiring_load_exclusive(n));
8554   match(Set newval (GetAndAddI mem incr));
8555   ins_cost(VOLATILE_REF_COST + 1);
8556   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
8557   ins_encode %{
8558     __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8559   %}
8560   ins_pipe(pipe_serial);
8561 %}
8562 
8563 instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAddSub incr) %{
8564   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8565   match(Set dummy (GetAndAddI mem incr));
8566   ins_cost(VOLATILE_REF_COST);
8567   format %{ "get_and_addI_acq [$mem], $incr" %}
8568   ins_encode %{
8569     __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
8570   %}
8571   ins_pipe(pipe_serial);
8572 %}
8573 
8574 // ============================================================================
8575 // Conditional Move Instructions
8576 
8577 // n.b. we have identical rules for both a signed compare op (cmpOp)
8578 // and an unsigned compare op (cmpOpU). it would be nice if we could
8579 // define an op class which merged both inputs and use it to type the
8580 // argument to a single rule. unfortunatelyt his fails because the
8581 // opclass does not live up to the COND_INTER interface of its
8582 // component operands. When the generic code tries to negate the
8583 // operand it ends up running the generci Machoper::negate method
8584 // which throws a ShouldNotHappen. So, we have to provide two flavours
8585 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
8586 
8587 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8588   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8589 
8590   ins_cost(INSN_COST * 2);
8591   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
8592 
8593   ins_encode %{
8594     __ cselw(as_Register($dst$$reg),
8595              as_Register($src2$$reg),
8596              as_Register($src1$$reg),
8597              (Assembler::Condition)$cmp$$cmpcode);
8598   %}
8599 
8600   ins_pipe(icond_reg_reg);
8601 %}
8602 
8603 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8604   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8605 
8606   ins_cost(INSN_COST * 2);
8607   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
8608 
8609   ins_encode %{
8610     __ cselw(as_Register($dst$$reg),
8611              as_Register($src2$$reg),
8612              as_Register($src1$$reg),
8613              (Assembler::Condition)$cmp$$cmpcode);
8614   %}
8615 
8616   ins_pipe(icond_reg_reg);
8617 %}
8618 
8619 // special cases where one arg is zero
8620 
8621 // n.b. this is selected in preference to the rule above because it
8622 // avoids loading constant 0 into a source register
8623 
8624 // TODO
8625 // we ought only to be able to cull one of these variants as the ideal
8626 // transforms ought always to order the zero consistently (to left/right?)
8627 
8628 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8629   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8630 
8631   ins_cost(INSN_COST * 2);
8632   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
8633 
8634   ins_encode %{
8635     __ cselw(as_Register($dst$$reg),
8636              as_Register($src$$reg),
8637              zr,
8638              (Assembler::Condition)$cmp$$cmpcode);
8639   %}
8640 
8641   ins_pipe(icond_reg);
8642 %}
8643 
8644 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8645   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8646 
8647   ins_cost(INSN_COST * 2);
8648   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
8649 
8650   ins_encode %{
8651     __ cselw(as_Register($dst$$reg),
8652              as_Register($src$$reg),
8653              zr,
8654              (Assembler::Condition)$cmp$$cmpcode);
8655   %}
8656 
8657   ins_pipe(icond_reg);
8658 %}
8659 
8660 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8661   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8662 
8663   ins_cost(INSN_COST * 2);
8664   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
8665 
8666   ins_encode %{
8667     __ cselw(as_Register($dst$$reg),
8668              zr,
8669              as_Register($src$$reg),
8670              (Assembler::Condition)$cmp$$cmpcode);
8671   %}
8672 
8673   ins_pipe(icond_reg);
8674 %}
8675 
8676 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8677   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8678 
8679   ins_cost(INSN_COST * 2);
8680   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
8681 
8682   ins_encode %{
8683     __ cselw(as_Register($dst$$reg),
8684              zr,
8685              as_Register($src$$reg),
8686              (Assembler::Condition)$cmp$$cmpcode);
8687   %}
8688 
8689   ins_pipe(icond_reg);
8690 %}
8691 
8692 // special case for creating a boolean 0 or 1
8693 
8694 // n.b. this is selected in preference to the rule above because it
8695 // avoids loading constants 0 and 1 into a source register
8696 
8697 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8698   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8699 
8700   ins_cost(INSN_COST * 2);
8701   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
8702 
8703   ins_encode %{
8704     // equivalently
8705     // cset(as_Register($dst$$reg),
8706     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8707     __ csincw(as_Register($dst$$reg),
8708              zr,
8709              zr,
8710              (Assembler::Condition)$cmp$$cmpcode);
8711   %}
8712 
8713   ins_pipe(icond_none);
8714 %}
8715 
8716 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8717   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8718 
8719   ins_cost(INSN_COST * 2);
8720   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
8721 
8722   ins_encode %{
8723     // equivalently
8724     // cset(as_Register($dst$$reg),
8725     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8726     __ csincw(as_Register($dst$$reg),
8727              zr,
8728              zr,
8729              (Assembler::Condition)$cmp$$cmpcode);
8730   %}
8731 
8732   ins_pipe(icond_none);
8733 %}
8734 
8735 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8736   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8737 
8738   ins_cost(INSN_COST * 2);
8739   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
8740 
8741   ins_encode %{
8742     __ csel(as_Register($dst$$reg),
8743             as_Register($src2$$reg),
8744             as_Register($src1$$reg),
8745             (Assembler::Condition)$cmp$$cmpcode);
8746   %}
8747 
8748   ins_pipe(icond_reg_reg);
8749 %}
8750 
8751 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8752   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8753 
8754   ins_cost(INSN_COST * 2);
8755   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
8756 
8757   ins_encode %{
8758     __ csel(as_Register($dst$$reg),
8759             as_Register($src2$$reg),
8760             as_Register($src1$$reg),
8761             (Assembler::Condition)$cmp$$cmpcode);
8762   %}
8763 
8764   ins_pipe(icond_reg_reg);
8765 %}
8766 
8767 // special cases where one arg is zero
8768 
8769 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8770   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8771 
8772   ins_cost(INSN_COST * 2);
8773   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
8774 
8775   ins_encode %{
8776     __ csel(as_Register($dst$$reg),
8777             zr,
8778             as_Register($src$$reg),
8779             (Assembler::Condition)$cmp$$cmpcode);
8780   %}
8781 
8782   ins_pipe(icond_reg);
8783 %}
8784 
8785 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8786   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8787 
8788   ins_cost(INSN_COST * 2);
8789   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
8790 
8791   ins_encode %{
8792     __ csel(as_Register($dst$$reg),
8793             zr,
8794             as_Register($src$$reg),
8795             (Assembler::Condition)$cmp$$cmpcode);
8796   %}
8797 
8798   ins_pipe(icond_reg);
8799 %}
8800 
8801 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8802   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8803 
8804   ins_cost(INSN_COST * 2);
8805   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
8806 
8807   ins_encode %{
8808     __ csel(as_Register($dst$$reg),
8809             as_Register($src$$reg),
8810             zr,
8811             (Assembler::Condition)$cmp$$cmpcode);
8812   %}
8813 
8814   ins_pipe(icond_reg);
8815 %}
8816 
8817 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8818   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8819 
8820   ins_cost(INSN_COST * 2);
8821   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
8822 
8823   ins_encode %{
8824     __ csel(as_Register($dst$$reg),
8825             as_Register($src$$reg),
8826             zr,
8827             (Assembler::Condition)$cmp$$cmpcode);
8828   %}
8829 
8830   ins_pipe(icond_reg);
8831 %}
8832 
8833 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8834   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8835 
8836   ins_cost(INSN_COST * 2);
8837   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
8838 
8839   ins_encode %{
8840     __ csel(as_Register($dst$$reg),
8841             as_Register($src2$$reg),
8842             as_Register($src1$$reg),
8843             (Assembler::Condition)$cmp$$cmpcode);
8844   %}
8845 
8846   ins_pipe(icond_reg_reg);
8847 %}
8848 
8849 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8850   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8851 
8852   ins_cost(INSN_COST * 2);
8853   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
8854 
8855   ins_encode %{
8856     __ csel(as_Register($dst$$reg),
8857             as_Register($src2$$reg),
8858             as_Register($src1$$reg),
8859             (Assembler::Condition)$cmp$$cmpcode);
8860   %}
8861 
8862   ins_pipe(icond_reg_reg);
8863 %}
8864 
8865 // special cases where one arg is zero
8866 
8867 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8868   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8869 
8870   ins_cost(INSN_COST * 2);
8871   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
8872 
8873   ins_encode %{
8874     __ csel(as_Register($dst$$reg),
8875             zr,
8876             as_Register($src$$reg),
8877             (Assembler::Condition)$cmp$$cmpcode);
8878   %}
8879 
8880   ins_pipe(icond_reg);
8881 %}
8882 
8883 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8884   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8885 
8886   ins_cost(INSN_COST * 2);
8887   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
8888 
8889   ins_encode %{
8890     __ csel(as_Register($dst$$reg),
8891             zr,
8892             as_Register($src$$reg),
8893             (Assembler::Condition)$cmp$$cmpcode);
8894   %}
8895 
8896   ins_pipe(icond_reg);
8897 %}
8898 
8899 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8900   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8901 
8902   ins_cost(INSN_COST * 2);
8903   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
8904 
8905   ins_encode %{
8906     __ csel(as_Register($dst$$reg),
8907             as_Register($src$$reg),
8908             zr,
8909             (Assembler::Condition)$cmp$$cmpcode);
8910   %}
8911 
8912   ins_pipe(icond_reg);
8913 %}
8914 
8915 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8916   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8917 
8918   ins_cost(INSN_COST * 2);
8919   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
8920 
8921   ins_encode %{
8922     __ csel(as_Register($dst$$reg),
8923             as_Register($src$$reg),
8924             zr,
8925             (Assembler::Condition)$cmp$$cmpcode);
8926   %}
8927 
8928   ins_pipe(icond_reg);
8929 %}
8930 
8931 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8932   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8933 
8934   ins_cost(INSN_COST * 2);
8935   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8936 
8937   ins_encode %{
8938     __ cselw(as_Register($dst$$reg),
8939              as_Register($src2$$reg),
8940              as_Register($src1$$reg),
8941              (Assembler::Condition)$cmp$$cmpcode);
8942   %}
8943 
8944   ins_pipe(icond_reg_reg);
8945 %}
8946 
8947 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8948   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8949 
8950   ins_cost(INSN_COST * 2);
8951   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8952 
8953   ins_encode %{
8954     __ cselw(as_Register($dst$$reg),
8955              as_Register($src2$$reg),
8956              as_Register($src1$$reg),
8957              (Assembler::Condition)$cmp$$cmpcode);
8958   %}
8959 
8960   ins_pipe(icond_reg_reg);
8961 %}
8962 
8963 // special cases where one arg is zero
8964 
8965 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8966   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8967 
8968   ins_cost(INSN_COST * 2);
8969   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
8970 
8971   ins_encode %{
8972     __ cselw(as_Register($dst$$reg),
8973              zr,
8974              as_Register($src$$reg),
8975              (Assembler::Condition)$cmp$$cmpcode);
8976   %}
8977 
8978   ins_pipe(icond_reg);
8979 %}
8980 
8981 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8982   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8983 
8984   ins_cost(INSN_COST * 2);
8985   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
8986 
8987   ins_encode %{
8988     __ cselw(as_Register($dst$$reg),
8989              zr,
8990              as_Register($src$$reg),
8991              (Assembler::Condition)$cmp$$cmpcode);
8992   %}
8993 
8994   ins_pipe(icond_reg);
8995 %}
8996 
8997 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
8998   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
8999 
9000   ins_cost(INSN_COST * 2);
9001   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
9002 
9003   ins_encode %{
9004     __ cselw(as_Register($dst$$reg),
9005              as_Register($src$$reg),
9006              zr,
9007              (Assembler::Condition)$cmp$$cmpcode);
9008   %}
9009 
9010   ins_pipe(icond_reg);
9011 %}
9012 
9013 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9014   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9015 
9016   ins_cost(INSN_COST * 2);
9017   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
9018 
9019   ins_encode %{
9020     __ cselw(as_Register($dst$$reg),
9021              as_Register($src$$reg),
9022              zr,
9023              (Assembler::Condition)$cmp$$cmpcode);
9024   %}
9025 
9026   ins_pipe(icond_reg);
9027 %}
9028 
9029 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
9030 %{
9031   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9032 
9033   ins_cost(INSN_COST * 3);
9034 
9035   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9036   ins_encode %{
9037     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9038     __ fcsels(as_FloatRegister($dst$$reg),
9039               as_FloatRegister($src2$$reg),
9040               as_FloatRegister($src1$$reg),
9041               cond);
9042   %}
9043 
9044   ins_pipe(fp_cond_reg_reg_s);
9045 %}
9046 
9047 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
9048 %{
9049   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9050 
9051   ins_cost(INSN_COST * 3);
9052 
9053   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9054   ins_encode %{
9055     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9056     __ fcsels(as_FloatRegister($dst$$reg),
9057               as_FloatRegister($src2$$reg),
9058               as_FloatRegister($src1$$reg),
9059               cond);
9060   %}
9061 
9062   ins_pipe(fp_cond_reg_reg_s);
9063 %}
9064 
9065 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
9066 %{
9067   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9068 
9069   ins_cost(INSN_COST * 3);
9070 
9071   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9072   ins_encode %{
9073     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9074     __ fcseld(as_FloatRegister($dst$$reg),
9075               as_FloatRegister($src2$$reg),
9076               as_FloatRegister($src1$$reg),
9077               cond);
9078   %}
9079 
9080   ins_pipe(fp_cond_reg_reg_d);
9081 %}
9082 
9083 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
9084 %{
9085   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9086 
9087   ins_cost(INSN_COST * 3);
9088 
9089   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9090   ins_encode %{
9091     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9092     __ fcseld(as_FloatRegister($dst$$reg),
9093               as_FloatRegister($src2$$reg),
9094               as_FloatRegister($src1$$reg),
9095               cond);
9096   %}
9097 
9098   ins_pipe(fp_cond_reg_reg_d);
9099 %}
9100 
9101 // ============================================================================
9102 // Arithmetic Instructions
9103 //
9104 
9105 // Integer Addition
9106 
9107 // TODO
9108 // these currently employ operations which do not set CR and hence are
9109 // not flagged as killing CR but we would like to isolate the cases
9110 // where we want to set flags from those where we don't. need to work
9111 // out how to do that.
9112 
9113 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9114   match(Set dst (AddI src1 src2));
9115 
9116   ins_cost(INSN_COST);
9117   format %{ "addw  $dst, $src1, $src2" %}
9118 
9119   ins_encode %{
9120     __ addw(as_Register($dst$$reg),
9121             as_Register($src1$$reg),
9122             as_Register($src2$$reg));
9123   %}
9124 
9125   ins_pipe(ialu_reg_reg);
9126 %}
9127 
9128 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9129   match(Set dst (AddI src1 src2));
9130 
9131   ins_cost(INSN_COST);
9132   format %{ "addw $dst, $src1, $src2" %}
9133 
9134   // use opcode to indicate that this is an add not a sub
9135   opcode(0x0);
9136 
9137   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9138 
9139   ins_pipe(ialu_reg_imm);
9140 %}
9141 
9142 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
9143   match(Set dst (AddI (ConvL2I src1) src2));
9144 
9145   ins_cost(INSN_COST);
9146   format %{ "addw $dst, $src1, $src2" %}
9147 
9148   // use opcode to indicate that this is an add not a sub
9149   opcode(0x0);
9150 
9151   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9152 
9153   ins_pipe(ialu_reg_imm);
9154 %}
9155 
9156 // Pointer Addition
9157 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9158   match(Set dst (AddP src1 src2));
9159 
9160   ins_cost(INSN_COST);
9161   format %{ "add $dst, $src1, $src2\t# ptr" %}
9162 
9163   ins_encode %{
9164     __ add(as_Register($dst$$reg),
9165            as_Register($src1$$reg),
9166            as_Register($src2$$reg));
9167   %}
9168 
9169   ins_pipe(ialu_reg_reg);
9170 %}
9171 
9172 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9173   match(Set dst (AddP src1 (ConvI2L src2)));
9174 
9175   ins_cost(1.9 * INSN_COST);
9176   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9177 
9178   ins_encode %{
9179     __ add(as_Register($dst$$reg),
9180            as_Register($src1$$reg),
9181            as_Register($src2$$reg), ext::sxtw);
9182   %}
9183 
9184   ins_pipe(ialu_reg_reg);
9185 %}
9186 
9187 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9188   match(Set dst (AddP src1 (LShiftL src2 scale)));
9189 
9190   ins_cost(1.9 * INSN_COST);
9191   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9192 
9193   ins_encode %{
9194     __ lea(as_Register($dst$$reg),
9195            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9196                    Address::lsl($scale$$constant)));
9197   %}
9198 
9199   ins_pipe(ialu_reg_reg_shift);
9200 %}
9201 
9202 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9203   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9204 
9205   ins_cost(1.9 * INSN_COST);
9206   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9207 
9208   ins_encode %{
9209     __ lea(as_Register($dst$$reg),
9210            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9211                    Address::sxtw($scale$$constant)));
9212   %}
9213 
9214   ins_pipe(ialu_reg_reg_shift);
9215 %}
9216 
9217 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9218   match(Set dst (LShiftL (ConvI2L src) scale));
9219 
9220   ins_cost(INSN_COST);
9221   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9222 
9223   ins_encode %{
9224     __ sbfiz(as_Register($dst$$reg),
9225           as_Register($src$$reg),
9226           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9227   %}
9228 
9229   ins_pipe(ialu_reg_shift);
9230 %}
9231 
9232 // Pointer Immediate Addition
9233 // n.b. this needs to be more expensive than using an indirect memory
9234 // operand
9235 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9236   match(Set dst (AddP src1 src2));
9237 
9238   ins_cost(INSN_COST);
9239   format %{ "add $dst, $src1, $src2\t# ptr" %}
9240 
9241   // use opcode to indicate that this is an add not a sub
9242   opcode(0x0);
9243 
9244   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9245 
9246   ins_pipe(ialu_reg_imm);
9247 %}
9248 
9249 // Long Addition
9250 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9251 
9252   match(Set dst (AddL src1 src2));
9253 
9254   ins_cost(INSN_COST);
9255   format %{ "add  $dst, $src1, $src2" %}
9256 
9257   ins_encode %{
9258     __ add(as_Register($dst$$reg),
9259            as_Register($src1$$reg),
9260            as_Register($src2$$reg));
9261   %}
9262 
9263   ins_pipe(ialu_reg_reg);
9264 %}
9265 
9266 // No constant pool entries requiredLong Immediate Addition.
9267 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9268   match(Set dst (AddL src1 src2));
9269 
9270   ins_cost(INSN_COST);
9271   format %{ "add $dst, $src1, $src2" %}
9272 
9273   // use opcode to indicate that this is an add not a sub
9274   opcode(0x0);
9275 
9276   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9277 
9278   ins_pipe(ialu_reg_imm);
9279 %}
9280 
9281 // Integer Subtraction
9282 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9283   match(Set dst (SubI src1 src2));
9284 
9285   ins_cost(INSN_COST);
9286   format %{ "subw  $dst, $src1, $src2" %}
9287 
9288   ins_encode %{
9289     __ subw(as_Register($dst$$reg),
9290             as_Register($src1$$reg),
9291             as_Register($src2$$reg));
9292   %}
9293 
9294   ins_pipe(ialu_reg_reg);
9295 %}
9296 
9297 // Immediate Subtraction
9298 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9299   match(Set dst (SubI src1 src2));
9300 
9301   ins_cost(INSN_COST);
9302   format %{ "subw $dst, $src1, $src2" %}
9303 
9304   // use opcode to indicate that this is a sub not an add
9305   opcode(0x1);
9306 
9307   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9308 
9309   ins_pipe(ialu_reg_imm);
9310 %}
9311 
9312 // Long Subtraction
9313 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9314 
9315   match(Set dst (SubL src1 src2));
9316 
9317   ins_cost(INSN_COST);
9318   format %{ "sub  $dst, $src1, $src2" %}
9319 
9320   ins_encode %{
9321     __ sub(as_Register($dst$$reg),
9322            as_Register($src1$$reg),
9323            as_Register($src2$$reg));
9324   %}
9325 
9326   ins_pipe(ialu_reg_reg);
9327 %}
9328 
9329 // No constant pool entries requiredLong Immediate Subtraction.
9330 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9331   match(Set dst (SubL src1 src2));
9332 
9333   ins_cost(INSN_COST);
9334   format %{ "sub$dst, $src1, $src2" %}
9335 
9336   // use opcode to indicate that this is a sub not an add
9337   opcode(0x1);
9338 
9339   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9340 
9341   ins_pipe(ialu_reg_imm);
9342 %}
9343 
9344 // Integer Negation (special case for sub)
9345 
9346 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9347   match(Set dst (SubI zero src));
9348 
9349   ins_cost(INSN_COST);
9350   format %{ "negw $dst, $src\t# int" %}
9351 
9352   ins_encode %{
9353     __ negw(as_Register($dst$$reg),
9354             as_Register($src$$reg));
9355   %}
9356 
9357   ins_pipe(ialu_reg);
9358 %}
9359 
9360 // Long Negation
9361 
9362 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
9363   match(Set dst (SubL zero src));
9364 
9365   ins_cost(INSN_COST);
9366   format %{ "neg $dst, $src\t# long" %}
9367 
9368   ins_encode %{
9369     __ neg(as_Register($dst$$reg),
9370            as_Register($src$$reg));
9371   %}
9372 
9373   ins_pipe(ialu_reg);
9374 %}
9375 
9376 // Integer Multiply
9377 
9378 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9379   match(Set dst (MulI src1 src2));
9380 
9381   ins_cost(INSN_COST * 3);
9382   format %{ "mulw  $dst, $src1, $src2" %}
9383 
9384   ins_encode %{
9385     __ mulw(as_Register($dst$$reg),
9386             as_Register($src1$$reg),
9387             as_Register($src2$$reg));
9388   %}
9389 
9390   ins_pipe(imul_reg_reg);
9391 %}
9392 
9393 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9394   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9395 
9396   ins_cost(INSN_COST * 3);
9397   format %{ "smull  $dst, $src1, $src2" %}
9398 
9399   ins_encode %{
9400     __ smull(as_Register($dst$$reg),
9401              as_Register($src1$$reg),
9402              as_Register($src2$$reg));
9403   %}
9404 
9405   ins_pipe(imul_reg_reg);
9406 %}
9407 
9408 // Long Multiply
9409 
9410 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9411   match(Set dst (MulL src1 src2));
9412 
9413   ins_cost(INSN_COST * 5);
9414   format %{ "mul  $dst, $src1, $src2" %}
9415 
9416   ins_encode %{
9417     __ mul(as_Register($dst$$reg),
9418            as_Register($src1$$reg),
9419            as_Register($src2$$reg));
9420   %}
9421 
9422   ins_pipe(lmul_reg_reg);
9423 %}
9424 
9425 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
9426 %{
9427   match(Set dst (MulHiL src1 src2));
9428 
9429   ins_cost(INSN_COST * 7);
9430   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
9431 
9432   ins_encode %{
9433     __ smulh(as_Register($dst$$reg),
9434              as_Register($src1$$reg),
9435              as_Register($src2$$reg));
9436   %}
9437 
9438   ins_pipe(lmul_reg_reg);
9439 %}
9440 
9441 // Combined Integer Multiply & Add/Sub
9442 
9443 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9444   match(Set dst (AddI src3 (MulI src1 src2)));
9445 
9446   ins_cost(INSN_COST * 3);
9447   format %{ "madd  $dst, $src1, $src2, $src3" %}
9448 
9449   ins_encode %{
9450     __ maddw(as_Register($dst$$reg),
9451              as_Register($src1$$reg),
9452              as_Register($src2$$reg),
9453              as_Register($src3$$reg));
9454   %}
9455 
9456   ins_pipe(imac_reg_reg);
9457 %}
9458 
9459 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9460   match(Set dst (SubI src3 (MulI src1 src2)));
9461 
9462   ins_cost(INSN_COST * 3);
9463   format %{ "msub  $dst, $src1, $src2, $src3" %}
9464 
9465   ins_encode %{
9466     __ msubw(as_Register($dst$$reg),
9467              as_Register($src1$$reg),
9468              as_Register($src2$$reg),
9469              as_Register($src3$$reg));
9470   %}
9471 
9472   ins_pipe(imac_reg_reg);
9473 %}
9474 
9475 // Combined Long Multiply & Add/Sub
9476 
9477 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9478   match(Set dst (AddL src3 (MulL src1 src2)));
9479 
9480   ins_cost(INSN_COST * 5);
9481   format %{ "madd  $dst, $src1, $src2, $src3" %}
9482 
9483   ins_encode %{
9484     __ madd(as_Register($dst$$reg),
9485             as_Register($src1$$reg),
9486             as_Register($src2$$reg),
9487             as_Register($src3$$reg));
9488   %}
9489 
9490   ins_pipe(lmac_reg_reg);
9491 %}
9492 
9493 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9494   match(Set dst (SubL src3 (MulL src1 src2)));
9495 
9496   ins_cost(INSN_COST * 5);
9497   format %{ "msub  $dst, $src1, $src2, $src3" %}
9498 
9499   ins_encode %{
9500     __ msub(as_Register($dst$$reg),
9501             as_Register($src1$$reg),
9502             as_Register($src2$$reg),
9503             as_Register($src3$$reg));
9504   %}
9505 
9506   ins_pipe(lmac_reg_reg);
9507 %}
9508 
9509 // Integer Divide
9510 
9511 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9512   match(Set dst (DivI src1 src2));
9513 
9514   ins_cost(INSN_COST * 19);
9515   format %{ "sdivw  $dst, $src1, $src2" %}
9516 
9517   ins_encode(aarch64_enc_divw(dst, src1, src2));
9518   ins_pipe(idiv_reg_reg);
9519 %}
9520 
9521 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
9522   match(Set dst (URShiftI (RShiftI src1 div1) div2));
9523   ins_cost(INSN_COST);
9524   format %{ "lsrw $dst, $src1, $div1" %}
9525   ins_encode %{
9526     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
9527   %}
9528   ins_pipe(ialu_reg_shift);
9529 %}
9530 
9531 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
9532   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
9533   ins_cost(INSN_COST);
9534   format %{ "addw $dst, $src, LSR $div1" %}
9535 
9536   ins_encode %{
9537     __ addw(as_Register($dst$$reg),
9538               as_Register($src$$reg),
9539               as_Register($src$$reg),
9540               Assembler::LSR, 31);
9541   %}
9542   ins_pipe(ialu_reg);
9543 %}
9544 
9545 // Long Divide
9546 
9547 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9548   match(Set dst (DivL src1 src2));
9549 
9550   ins_cost(INSN_COST * 35);
9551   format %{ "sdiv   $dst, $src1, $src2" %}
9552 
9553   ins_encode(aarch64_enc_div(dst, src1, src2));
9554   ins_pipe(ldiv_reg_reg);
9555 %}
9556 
9557 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
9558   match(Set dst (URShiftL (RShiftL src1 div1) div2));
9559   ins_cost(INSN_COST);
9560   format %{ "lsr $dst, $src1, $div1" %}
9561   ins_encode %{
9562     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
9563   %}
9564   ins_pipe(ialu_reg_shift);
9565 %}
9566 
9567 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
9568   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
9569   ins_cost(INSN_COST);
9570   format %{ "add $dst, $src, $div1" %}
9571 
9572   ins_encode %{
9573     __ add(as_Register($dst$$reg),
9574               as_Register($src$$reg),
9575               as_Register($src$$reg),
9576               Assembler::LSR, 63);
9577   %}
9578   ins_pipe(ialu_reg);
9579 %}
9580 
9581 // Integer Remainder
9582 
9583 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9584   match(Set dst (ModI src1 src2));
9585 
9586   ins_cost(INSN_COST * 22);
9587   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
9588             "msubw($dst, rscratch1, $src2, $src1" %}
9589 
9590   ins_encode(aarch64_enc_modw(dst, src1, src2));
9591   ins_pipe(idiv_reg_reg);
9592 %}
9593 
9594 // Long Remainder
9595 
9596 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9597   match(Set dst (ModL src1 src2));
9598 
9599   ins_cost(INSN_COST * 38);
9600   format %{ "sdiv   rscratch1, $src1, $src2\n"
9601             "msub($dst, rscratch1, $src2, $src1" %}
9602 
9603   ins_encode(aarch64_enc_mod(dst, src1, src2));
9604   ins_pipe(ldiv_reg_reg);
9605 %}
9606 
9607 // Integer Shifts
9608 
9609 // Shift Left Register
9610 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9611   match(Set dst (LShiftI src1 src2));
9612 
9613   ins_cost(INSN_COST * 2);
9614   format %{ "lslvw  $dst, $src1, $src2" %}
9615 
9616   ins_encode %{
9617     __ lslvw(as_Register($dst$$reg),
9618              as_Register($src1$$reg),
9619              as_Register($src2$$reg));
9620   %}
9621 
9622   ins_pipe(ialu_reg_reg_vshift);
9623 %}
9624 
9625 // Shift Left Immediate
9626 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9627   match(Set dst (LShiftI src1 src2));
9628 
9629   ins_cost(INSN_COST);
9630   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
9631 
9632   ins_encode %{
9633     __ lslw(as_Register($dst$$reg),
9634             as_Register($src1$$reg),
9635             $src2$$constant & 0x1f);
9636   %}
9637 
9638   ins_pipe(ialu_reg_shift);
9639 %}
9640 
9641 // Shift Right Logical Register
9642 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9643   match(Set dst (URShiftI src1 src2));
9644 
9645   ins_cost(INSN_COST * 2);
9646   format %{ "lsrvw  $dst, $src1, $src2" %}
9647 
9648   ins_encode %{
9649     __ lsrvw(as_Register($dst$$reg),
9650              as_Register($src1$$reg),
9651              as_Register($src2$$reg));
9652   %}
9653 
9654   ins_pipe(ialu_reg_reg_vshift);
9655 %}
9656 
9657 // Shift Right Logical Immediate
9658 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9659   match(Set dst (URShiftI src1 src2));
9660 
9661   ins_cost(INSN_COST);
9662   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
9663 
9664   ins_encode %{
9665     __ lsrw(as_Register($dst$$reg),
9666             as_Register($src1$$reg),
9667             $src2$$constant & 0x1f);
9668   %}
9669 
9670   ins_pipe(ialu_reg_shift);
9671 %}
9672 
9673 // Shift Right Arithmetic Register
9674 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9675   match(Set dst (RShiftI src1 src2));
9676 
9677   ins_cost(INSN_COST * 2);
9678   format %{ "asrvw  $dst, $src1, $src2" %}
9679 
9680   ins_encode %{
9681     __ asrvw(as_Register($dst$$reg),
9682              as_Register($src1$$reg),
9683              as_Register($src2$$reg));
9684   %}
9685 
9686   ins_pipe(ialu_reg_reg_vshift);
9687 %}
9688 
9689 // Shift Right Arithmetic Immediate
9690 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9691   match(Set dst (RShiftI src1 src2));
9692 
9693   ins_cost(INSN_COST);
9694   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
9695 
9696   ins_encode %{
9697     __ asrw(as_Register($dst$$reg),
9698             as_Register($src1$$reg),
9699             $src2$$constant & 0x1f);
9700   %}
9701 
9702   ins_pipe(ialu_reg_shift);
9703 %}
9704 
9705 // Combined Int Mask and Right Shift (using UBFM)
9706 // TODO
9707 
9708 // Long Shifts
9709 
9710 // Shift Left Register
9711 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9712   match(Set dst (LShiftL src1 src2));
9713 
9714   ins_cost(INSN_COST * 2);
9715   format %{ "lslv  $dst, $src1, $src2" %}
9716 
9717   ins_encode %{
9718     __ lslv(as_Register($dst$$reg),
9719             as_Register($src1$$reg),
9720             as_Register($src2$$reg));
9721   %}
9722 
9723   ins_pipe(ialu_reg_reg_vshift);
9724 %}
9725 
9726 // Shift Left Immediate
9727 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9728   match(Set dst (LShiftL src1 src2));
9729 
9730   ins_cost(INSN_COST);
9731   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
9732 
9733   ins_encode %{
9734     __ lsl(as_Register($dst$$reg),
9735             as_Register($src1$$reg),
9736             $src2$$constant & 0x3f);
9737   %}
9738 
9739   ins_pipe(ialu_reg_shift);
9740 %}
9741 
9742 // Shift Right Logical Register
9743 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9744   match(Set dst (URShiftL src1 src2));
9745 
9746   ins_cost(INSN_COST * 2);
9747   format %{ "lsrv  $dst, $src1, $src2" %}
9748 
9749   ins_encode %{
9750     __ lsrv(as_Register($dst$$reg),
9751             as_Register($src1$$reg),
9752             as_Register($src2$$reg));
9753   %}
9754 
9755   ins_pipe(ialu_reg_reg_vshift);
9756 %}
9757 
9758 // Shift Right Logical Immediate
9759 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9760   match(Set dst (URShiftL src1 src2));
9761 
9762   ins_cost(INSN_COST);
9763   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
9764 
9765   ins_encode %{
9766     __ lsr(as_Register($dst$$reg),
9767            as_Register($src1$$reg),
9768            $src2$$constant & 0x3f);
9769   %}
9770 
9771   ins_pipe(ialu_reg_shift);
9772 %}
9773 
9774 // A special-case pattern for card table stores.
9775 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
9776   match(Set dst (URShiftL (CastP2X src1) src2));
9777 
9778   ins_cost(INSN_COST);
9779   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
9780 
9781   ins_encode %{
9782     __ lsr(as_Register($dst$$reg),
9783            as_Register($src1$$reg),
9784            $src2$$constant & 0x3f);
9785   %}
9786 
9787   ins_pipe(ialu_reg_shift);
9788 %}
9789 
9790 // Shift Right Arithmetic Register
9791 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9792   match(Set dst (RShiftL src1 src2));
9793 
9794   ins_cost(INSN_COST * 2);
9795   format %{ "asrv  $dst, $src1, $src2" %}
9796 
9797   ins_encode %{
9798     __ asrv(as_Register($dst$$reg),
9799             as_Register($src1$$reg),
9800             as_Register($src2$$reg));
9801   %}
9802 
9803   ins_pipe(ialu_reg_reg_vshift);
9804 %}
9805 
9806 // Shift Right Arithmetic Immediate
9807 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9808   match(Set dst (RShiftL src1 src2));
9809 
9810   ins_cost(INSN_COST);
9811   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
9812 
9813   ins_encode %{
9814     __ asr(as_Register($dst$$reg),
9815            as_Register($src1$$reg),
9816            $src2$$constant & 0x3f);
9817   %}
9818 
9819   ins_pipe(ialu_reg_shift);
9820 %}
9821 
9822 // BEGIN This section of the file is automatically generated. Do not edit --------------
9823 
9824 instruct regL_not_reg(iRegLNoSp dst,
9825                          iRegL src1, immL_M1 m1,
9826                          rFlagsReg cr) %{
9827   match(Set dst (XorL src1 m1));
9828   ins_cost(INSN_COST);
9829   format %{ "eon  $dst, $src1, zr" %}
9830 
9831   ins_encode %{
9832     __ eon(as_Register($dst$$reg),
9833               as_Register($src1$$reg),
9834               zr,
9835               Assembler::LSL, 0);
9836   %}
9837 
9838   ins_pipe(ialu_reg);
9839 %}
9840 instruct regI_not_reg(iRegINoSp dst,
9841                          iRegIorL2I src1, immI_M1 m1,
9842                          rFlagsReg cr) %{
9843   match(Set dst (XorI src1 m1));
9844   ins_cost(INSN_COST);
9845   format %{ "eonw  $dst, $src1, zr" %}
9846 
9847   ins_encode %{
9848     __ eonw(as_Register($dst$$reg),
9849               as_Register($src1$$reg),
9850               zr,
9851               Assembler::LSL, 0);
9852   %}
9853 
9854   ins_pipe(ialu_reg);
9855 %}
9856 
9857 instruct AndI_reg_not_reg(iRegINoSp dst,
9858                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9859                          rFlagsReg cr) %{
9860   match(Set dst (AndI src1 (XorI src2 m1)));
9861   ins_cost(INSN_COST);
9862   format %{ "bicw  $dst, $src1, $src2" %}
9863 
9864   ins_encode %{
9865     __ bicw(as_Register($dst$$reg),
9866               as_Register($src1$$reg),
9867               as_Register($src2$$reg),
9868               Assembler::LSL, 0);
9869   %}
9870 
9871   ins_pipe(ialu_reg_reg);
9872 %}
9873 
9874 instruct AndL_reg_not_reg(iRegLNoSp dst,
9875                          iRegL src1, iRegL src2, immL_M1 m1,
9876                          rFlagsReg cr) %{
9877   match(Set dst (AndL src1 (XorL src2 m1)));
9878   ins_cost(INSN_COST);
9879   format %{ "bic  $dst, $src1, $src2" %}
9880 
9881   ins_encode %{
9882     __ bic(as_Register($dst$$reg),
9883               as_Register($src1$$reg),
9884               as_Register($src2$$reg),
9885               Assembler::LSL, 0);
9886   %}
9887 
9888   ins_pipe(ialu_reg_reg);
9889 %}
9890 
9891 instruct OrI_reg_not_reg(iRegINoSp dst,
9892                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9893                          rFlagsReg cr) %{
9894   match(Set dst (OrI src1 (XorI src2 m1)));
9895   ins_cost(INSN_COST);
9896   format %{ "ornw  $dst, $src1, $src2" %}
9897 
9898   ins_encode %{
9899     __ ornw(as_Register($dst$$reg),
9900               as_Register($src1$$reg),
9901               as_Register($src2$$reg),
9902               Assembler::LSL, 0);
9903   %}
9904 
9905   ins_pipe(ialu_reg_reg);
9906 %}
9907 
9908 instruct OrL_reg_not_reg(iRegLNoSp dst,
9909                          iRegL src1, iRegL src2, immL_M1 m1,
9910                          rFlagsReg cr) %{
9911   match(Set dst (OrL src1 (XorL src2 m1)));
9912   ins_cost(INSN_COST);
9913   format %{ "orn  $dst, $src1, $src2" %}
9914 
9915   ins_encode %{
9916     __ orn(as_Register($dst$$reg),
9917               as_Register($src1$$reg),
9918               as_Register($src2$$reg),
9919               Assembler::LSL, 0);
9920   %}
9921 
9922   ins_pipe(ialu_reg_reg);
9923 %}
9924 
9925 instruct XorI_reg_not_reg(iRegINoSp dst,
9926                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9927                          rFlagsReg cr) %{
9928   match(Set dst (XorI m1 (XorI src2 src1)));
9929   ins_cost(INSN_COST);
9930   format %{ "eonw  $dst, $src1, $src2" %}
9931 
9932   ins_encode %{
9933     __ eonw(as_Register($dst$$reg),
9934               as_Register($src1$$reg),
9935               as_Register($src2$$reg),
9936               Assembler::LSL, 0);
9937   %}
9938 
9939   ins_pipe(ialu_reg_reg);
9940 %}
9941 
9942 instruct XorL_reg_not_reg(iRegLNoSp dst,
9943                          iRegL src1, iRegL src2, immL_M1 m1,
9944                          rFlagsReg cr) %{
9945   match(Set dst (XorL m1 (XorL src2 src1)));
9946   ins_cost(INSN_COST);
9947   format %{ "eon  $dst, $src1, $src2" %}
9948 
9949   ins_encode %{
9950     __ eon(as_Register($dst$$reg),
9951               as_Register($src1$$reg),
9952               as_Register($src2$$reg),
9953               Assembler::LSL, 0);
9954   %}
9955 
9956   ins_pipe(ialu_reg_reg);
9957 %}
9958 
9959 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
9960                          iRegIorL2I src1, iRegIorL2I src2,
9961                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9962   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
9963   ins_cost(1.9 * INSN_COST);
9964   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
9965 
9966   ins_encode %{
9967     __ bicw(as_Register($dst$$reg),
9968               as_Register($src1$$reg),
9969               as_Register($src2$$reg),
9970               Assembler::LSR,
9971               $src3$$constant & 0x1f);
9972   %}
9973 
9974   ins_pipe(ialu_reg_reg_shift);
9975 %}
9976 
9977 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
9978                          iRegL src1, iRegL src2,
9979                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9980   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
9981   ins_cost(1.9 * INSN_COST);
9982   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
9983 
9984   ins_encode %{
9985     __ bic(as_Register($dst$$reg),
9986               as_Register($src1$$reg),
9987               as_Register($src2$$reg),
9988               Assembler::LSR,
9989               $src3$$constant & 0x3f);
9990   %}
9991 
9992   ins_pipe(ialu_reg_reg_shift);
9993 %}
9994 
9995 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
9996                          iRegIorL2I src1, iRegIorL2I src2,
9997                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9998   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
9999   ins_cost(1.9 * INSN_COST);
10000   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
10001 
10002   ins_encode %{
10003     __ bicw(as_Register($dst$$reg),
10004               as_Register($src1$$reg),
10005               as_Register($src2$$reg),
10006               Assembler::ASR,
10007               $src3$$constant & 0x1f);
10008   %}
10009 
10010   ins_pipe(ialu_reg_reg_shift);
10011 %}
10012 
10013 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
10014                          iRegL src1, iRegL src2,
10015                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10016   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
10017   ins_cost(1.9 * INSN_COST);
10018   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
10019 
10020   ins_encode %{
10021     __ bic(as_Register($dst$$reg),
10022               as_Register($src1$$reg),
10023               as_Register($src2$$reg),
10024               Assembler::ASR,
10025               $src3$$constant & 0x3f);
10026   %}
10027 
10028   ins_pipe(ialu_reg_reg_shift);
10029 %}
10030 
10031 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
10032                          iRegIorL2I src1, iRegIorL2I src2,
10033                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10034   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
10035   ins_cost(1.9 * INSN_COST);
10036   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
10037 
10038   ins_encode %{
10039     __ bicw(as_Register($dst$$reg),
10040               as_Register($src1$$reg),
10041               as_Register($src2$$reg),
10042               Assembler::LSL,
10043               $src3$$constant & 0x1f);
10044   %}
10045 
10046   ins_pipe(ialu_reg_reg_shift);
10047 %}
10048 
10049 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
10050                          iRegL src1, iRegL src2,
10051                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10052   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
10053   ins_cost(1.9 * INSN_COST);
10054   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
10055 
10056   ins_encode %{
10057     __ bic(as_Register($dst$$reg),
10058               as_Register($src1$$reg),
10059               as_Register($src2$$reg),
10060               Assembler::LSL,
10061               $src3$$constant & 0x3f);
10062   %}
10063 
10064   ins_pipe(ialu_reg_reg_shift);
10065 %}
10066 
10067 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
10068                          iRegIorL2I src1, iRegIorL2I src2,
10069                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10070   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
10071   ins_cost(1.9 * INSN_COST);
10072   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
10073 
10074   ins_encode %{
10075     __ eonw(as_Register($dst$$reg),
10076               as_Register($src1$$reg),
10077               as_Register($src2$$reg),
10078               Assembler::LSR,
10079               $src3$$constant & 0x1f);
10080   %}
10081 
10082   ins_pipe(ialu_reg_reg_shift);
10083 %}
10084 
10085 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
10086                          iRegL src1, iRegL src2,
10087                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10088   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
10089   ins_cost(1.9 * INSN_COST);
10090   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
10091 
10092   ins_encode %{
10093     __ eon(as_Register($dst$$reg),
10094               as_Register($src1$$reg),
10095               as_Register($src2$$reg),
10096               Assembler::LSR,
10097               $src3$$constant & 0x3f);
10098   %}
10099 
10100   ins_pipe(ialu_reg_reg_shift);
10101 %}
10102 
10103 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
10104                          iRegIorL2I src1, iRegIorL2I src2,
10105                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10106   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
10107   ins_cost(1.9 * INSN_COST);
10108   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
10109 
10110   ins_encode %{
10111     __ eonw(as_Register($dst$$reg),
10112               as_Register($src1$$reg),
10113               as_Register($src2$$reg),
10114               Assembler::ASR,
10115               $src3$$constant & 0x1f);
10116   %}
10117 
10118   ins_pipe(ialu_reg_reg_shift);
10119 %}
10120 
10121 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
10122                          iRegL src1, iRegL src2,
10123                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10124   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
10125   ins_cost(1.9 * INSN_COST);
10126   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
10127 
10128   ins_encode %{
10129     __ eon(as_Register($dst$$reg),
10130               as_Register($src1$$reg),
10131               as_Register($src2$$reg),
10132               Assembler::ASR,
10133               $src3$$constant & 0x3f);
10134   %}
10135 
10136   ins_pipe(ialu_reg_reg_shift);
10137 %}
10138 
10139 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
10140                          iRegIorL2I src1, iRegIorL2I src2,
10141                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10142   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
10143   ins_cost(1.9 * INSN_COST);
10144   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10145 
10146   ins_encode %{
10147     __ eonw(as_Register($dst$$reg),
10148               as_Register($src1$$reg),
10149               as_Register($src2$$reg),
10150               Assembler::LSL,
10151               $src3$$constant & 0x1f);
10152   %}
10153 
10154   ins_pipe(ialu_reg_reg_shift);
10155 %}
10156 
10157 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10158                          iRegL src1, iRegL src2,
10159                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10160   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10161   ins_cost(1.9 * INSN_COST);
10162   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10163 
10164   ins_encode %{
10165     __ eon(as_Register($dst$$reg),
10166               as_Register($src1$$reg),
10167               as_Register($src2$$reg),
10168               Assembler::LSL,
10169               $src3$$constant & 0x3f);
10170   %}
10171 
10172   ins_pipe(ialu_reg_reg_shift);
10173 %}
10174 
10175 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10176                          iRegIorL2I src1, iRegIorL2I src2,
10177                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10178   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10179   ins_cost(1.9 * INSN_COST);
10180   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10181 
10182   ins_encode %{
10183     __ ornw(as_Register($dst$$reg),
10184               as_Register($src1$$reg),
10185               as_Register($src2$$reg),
10186               Assembler::LSR,
10187               $src3$$constant & 0x1f);
10188   %}
10189 
10190   ins_pipe(ialu_reg_reg_shift);
10191 %}
10192 
10193 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10194                          iRegL src1, iRegL src2,
10195                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10196   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10197   ins_cost(1.9 * INSN_COST);
10198   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10199 
10200   ins_encode %{
10201     __ orn(as_Register($dst$$reg),
10202               as_Register($src1$$reg),
10203               as_Register($src2$$reg),
10204               Assembler::LSR,
10205               $src3$$constant & 0x3f);
10206   %}
10207 
10208   ins_pipe(ialu_reg_reg_shift);
10209 %}
10210 
10211 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10212                          iRegIorL2I src1, iRegIorL2I src2,
10213                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10214   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10215   ins_cost(1.9 * INSN_COST);
10216   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10217 
10218   ins_encode %{
10219     __ ornw(as_Register($dst$$reg),
10220               as_Register($src1$$reg),
10221               as_Register($src2$$reg),
10222               Assembler::ASR,
10223               $src3$$constant & 0x1f);
10224   %}
10225 
10226   ins_pipe(ialu_reg_reg_shift);
10227 %}
10228 
10229 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10230                          iRegL src1, iRegL src2,
10231                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10232   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10233   ins_cost(1.9 * INSN_COST);
10234   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10235 
10236   ins_encode %{
10237     __ orn(as_Register($dst$$reg),
10238               as_Register($src1$$reg),
10239               as_Register($src2$$reg),
10240               Assembler::ASR,
10241               $src3$$constant & 0x3f);
10242   %}
10243 
10244   ins_pipe(ialu_reg_reg_shift);
10245 %}
10246 
10247 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10248                          iRegIorL2I src1, iRegIorL2I src2,
10249                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10250   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10251   ins_cost(1.9 * INSN_COST);
10252   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10253 
10254   ins_encode %{
10255     __ ornw(as_Register($dst$$reg),
10256               as_Register($src1$$reg),
10257               as_Register($src2$$reg),
10258               Assembler::LSL,
10259               $src3$$constant & 0x1f);
10260   %}
10261 
10262   ins_pipe(ialu_reg_reg_shift);
10263 %}
10264 
10265 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10266                          iRegL src1, iRegL src2,
10267                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10268   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10269   ins_cost(1.9 * INSN_COST);
10270   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10271 
10272   ins_encode %{
10273     __ orn(as_Register($dst$$reg),
10274               as_Register($src1$$reg),
10275               as_Register($src2$$reg),
10276               Assembler::LSL,
10277               $src3$$constant & 0x3f);
10278   %}
10279 
10280   ins_pipe(ialu_reg_reg_shift);
10281 %}
10282 
10283 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10284                          iRegIorL2I src1, iRegIorL2I src2,
10285                          immI src3, rFlagsReg cr) %{
10286   match(Set dst (AndI src1 (URShiftI src2 src3)));
10287 
10288   ins_cost(1.9 * INSN_COST);
10289   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10290 
10291   ins_encode %{
10292     __ andw(as_Register($dst$$reg),
10293               as_Register($src1$$reg),
10294               as_Register($src2$$reg),
10295               Assembler::LSR,
10296               $src3$$constant & 0x1f);
10297   %}
10298 
10299   ins_pipe(ialu_reg_reg_shift);
10300 %}
10301 
10302 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10303                          iRegL src1, iRegL src2,
10304                          immI src3, rFlagsReg cr) %{
10305   match(Set dst (AndL src1 (URShiftL src2 src3)));
10306 
10307   ins_cost(1.9 * INSN_COST);
10308   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10309 
10310   ins_encode %{
10311     __ andr(as_Register($dst$$reg),
10312               as_Register($src1$$reg),
10313               as_Register($src2$$reg),
10314               Assembler::LSR,
10315               $src3$$constant & 0x3f);
10316   %}
10317 
10318   ins_pipe(ialu_reg_reg_shift);
10319 %}
10320 
10321 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10322                          iRegIorL2I src1, iRegIorL2I src2,
10323                          immI src3, rFlagsReg cr) %{
10324   match(Set dst (AndI src1 (RShiftI src2 src3)));
10325 
10326   ins_cost(1.9 * INSN_COST);
10327   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10328 
10329   ins_encode %{
10330     __ andw(as_Register($dst$$reg),
10331               as_Register($src1$$reg),
10332               as_Register($src2$$reg),
10333               Assembler::ASR,
10334               $src3$$constant & 0x1f);
10335   %}
10336 
10337   ins_pipe(ialu_reg_reg_shift);
10338 %}
10339 
10340 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10341                          iRegL src1, iRegL src2,
10342                          immI src3, rFlagsReg cr) %{
10343   match(Set dst (AndL src1 (RShiftL src2 src3)));
10344 
10345   ins_cost(1.9 * INSN_COST);
10346   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10347 
10348   ins_encode %{
10349     __ andr(as_Register($dst$$reg),
10350               as_Register($src1$$reg),
10351               as_Register($src2$$reg),
10352               Assembler::ASR,
10353               $src3$$constant & 0x3f);
10354   %}
10355 
10356   ins_pipe(ialu_reg_reg_shift);
10357 %}
10358 
10359 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10360                          iRegIorL2I src1, iRegIorL2I src2,
10361                          immI src3, rFlagsReg cr) %{
10362   match(Set dst (AndI src1 (LShiftI src2 src3)));
10363 
10364   ins_cost(1.9 * INSN_COST);
10365   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10366 
10367   ins_encode %{
10368     __ andw(as_Register($dst$$reg),
10369               as_Register($src1$$reg),
10370               as_Register($src2$$reg),
10371               Assembler::LSL,
10372               $src3$$constant & 0x1f);
10373   %}
10374 
10375   ins_pipe(ialu_reg_reg_shift);
10376 %}
10377 
10378 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10379                          iRegL src1, iRegL src2,
10380                          immI src3, rFlagsReg cr) %{
10381   match(Set dst (AndL src1 (LShiftL src2 src3)));
10382 
10383   ins_cost(1.9 * INSN_COST);
10384   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10385 
10386   ins_encode %{
10387     __ andr(as_Register($dst$$reg),
10388               as_Register($src1$$reg),
10389               as_Register($src2$$reg),
10390               Assembler::LSL,
10391               $src3$$constant & 0x3f);
10392   %}
10393 
10394   ins_pipe(ialu_reg_reg_shift);
10395 %}
10396 
10397 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10398                          iRegIorL2I src1, iRegIorL2I src2,
10399                          immI src3, rFlagsReg cr) %{
10400   match(Set dst (XorI src1 (URShiftI src2 src3)));
10401 
10402   ins_cost(1.9 * INSN_COST);
10403   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10404 
10405   ins_encode %{
10406     __ eorw(as_Register($dst$$reg),
10407               as_Register($src1$$reg),
10408               as_Register($src2$$reg),
10409               Assembler::LSR,
10410               $src3$$constant & 0x1f);
10411   %}
10412 
10413   ins_pipe(ialu_reg_reg_shift);
10414 %}
10415 
10416 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10417                          iRegL src1, iRegL src2,
10418                          immI src3, rFlagsReg cr) %{
10419   match(Set dst (XorL src1 (URShiftL src2 src3)));
10420 
10421   ins_cost(1.9 * INSN_COST);
10422   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10423 
10424   ins_encode %{
10425     __ eor(as_Register($dst$$reg),
10426               as_Register($src1$$reg),
10427               as_Register($src2$$reg),
10428               Assembler::LSR,
10429               $src3$$constant & 0x3f);
10430   %}
10431 
10432   ins_pipe(ialu_reg_reg_shift);
10433 %}
10434 
10435 instruct XorI_reg_RShift_reg(iRegINoSp dst,
10436                          iRegIorL2I src1, iRegIorL2I src2,
10437                          immI src3, rFlagsReg cr) %{
10438   match(Set dst (XorI src1 (RShiftI src2 src3)));
10439 
10440   ins_cost(1.9 * INSN_COST);
10441   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
10442 
10443   ins_encode %{
10444     __ eorw(as_Register($dst$$reg),
10445               as_Register($src1$$reg),
10446               as_Register($src2$$reg),
10447               Assembler::ASR,
10448               $src3$$constant & 0x1f);
10449   %}
10450 
10451   ins_pipe(ialu_reg_reg_shift);
10452 %}
10453 
10454 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
10455                          iRegL src1, iRegL src2,
10456                          immI src3, rFlagsReg cr) %{
10457   match(Set dst (XorL src1 (RShiftL src2 src3)));
10458 
10459   ins_cost(1.9 * INSN_COST);
10460   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
10461 
10462   ins_encode %{
10463     __ eor(as_Register($dst$$reg),
10464               as_Register($src1$$reg),
10465               as_Register($src2$$reg),
10466               Assembler::ASR,
10467               $src3$$constant & 0x3f);
10468   %}
10469 
10470   ins_pipe(ialu_reg_reg_shift);
10471 %}
10472 
10473 instruct XorI_reg_LShift_reg(iRegINoSp dst,
10474                          iRegIorL2I src1, iRegIorL2I src2,
10475                          immI src3, rFlagsReg cr) %{
10476   match(Set dst (XorI src1 (LShiftI src2 src3)));
10477 
10478   ins_cost(1.9 * INSN_COST);
10479   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
10480 
10481   ins_encode %{
10482     __ eorw(as_Register($dst$$reg),
10483               as_Register($src1$$reg),
10484               as_Register($src2$$reg),
10485               Assembler::LSL,
10486               $src3$$constant & 0x1f);
10487   %}
10488 
10489   ins_pipe(ialu_reg_reg_shift);
10490 %}
10491 
10492 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
10493                          iRegL src1, iRegL src2,
10494                          immI src3, rFlagsReg cr) %{
10495   match(Set dst (XorL src1 (LShiftL src2 src3)));
10496 
10497   ins_cost(1.9 * INSN_COST);
10498   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
10499 
10500   ins_encode %{
10501     __ eor(as_Register($dst$$reg),
10502               as_Register($src1$$reg),
10503               as_Register($src2$$reg),
10504               Assembler::LSL,
10505               $src3$$constant & 0x3f);
10506   %}
10507 
10508   ins_pipe(ialu_reg_reg_shift);
10509 %}
10510 
10511 instruct OrI_reg_URShift_reg(iRegINoSp dst,
10512                          iRegIorL2I src1, iRegIorL2I src2,
10513                          immI src3, rFlagsReg cr) %{
10514   match(Set dst (OrI src1 (URShiftI src2 src3)));
10515 
10516   ins_cost(1.9 * INSN_COST);
10517   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
10518 
10519   ins_encode %{
10520     __ orrw(as_Register($dst$$reg),
10521               as_Register($src1$$reg),
10522               as_Register($src2$$reg),
10523               Assembler::LSR,
10524               $src3$$constant & 0x1f);
10525   %}
10526 
10527   ins_pipe(ialu_reg_reg_shift);
10528 %}
10529 
10530 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
10531                          iRegL src1, iRegL src2,
10532                          immI src3, rFlagsReg cr) %{
10533   match(Set dst (OrL src1 (URShiftL src2 src3)));
10534 
10535   ins_cost(1.9 * INSN_COST);
10536   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
10537 
10538   ins_encode %{
10539     __ orr(as_Register($dst$$reg),
10540               as_Register($src1$$reg),
10541               as_Register($src2$$reg),
10542               Assembler::LSR,
10543               $src3$$constant & 0x3f);
10544   %}
10545 
10546   ins_pipe(ialu_reg_reg_shift);
10547 %}
10548 
10549 instruct OrI_reg_RShift_reg(iRegINoSp dst,
10550                          iRegIorL2I src1, iRegIorL2I src2,
10551                          immI src3, rFlagsReg cr) %{
10552   match(Set dst (OrI src1 (RShiftI src2 src3)));
10553 
10554   ins_cost(1.9 * INSN_COST);
10555   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
10556 
10557   ins_encode %{
10558     __ orrw(as_Register($dst$$reg),
10559               as_Register($src1$$reg),
10560               as_Register($src2$$reg),
10561               Assembler::ASR,
10562               $src3$$constant & 0x1f);
10563   %}
10564 
10565   ins_pipe(ialu_reg_reg_shift);
10566 %}
10567 
10568 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
10569                          iRegL src1, iRegL src2,
10570                          immI src3, rFlagsReg cr) %{
10571   match(Set dst (OrL src1 (RShiftL src2 src3)));
10572 
10573   ins_cost(1.9 * INSN_COST);
10574   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
10575 
10576   ins_encode %{
10577     __ orr(as_Register($dst$$reg),
10578               as_Register($src1$$reg),
10579               as_Register($src2$$reg),
10580               Assembler::ASR,
10581               $src3$$constant & 0x3f);
10582   %}
10583 
10584   ins_pipe(ialu_reg_reg_shift);
10585 %}
10586 
10587 instruct OrI_reg_LShift_reg(iRegINoSp dst,
10588                          iRegIorL2I src1, iRegIorL2I src2,
10589                          immI src3, rFlagsReg cr) %{
10590   match(Set dst (OrI src1 (LShiftI src2 src3)));
10591 
10592   ins_cost(1.9 * INSN_COST);
10593   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
10594 
10595   ins_encode %{
10596     __ orrw(as_Register($dst$$reg),
10597               as_Register($src1$$reg),
10598               as_Register($src2$$reg),
10599               Assembler::LSL,
10600               $src3$$constant & 0x1f);
10601   %}
10602 
10603   ins_pipe(ialu_reg_reg_shift);
10604 %}
10605 
10606 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
10607                          iRegL src1, iRegL src2,
10608                          immI src3, rFlagsReg cr) %{
10609   match(Set dst (OrL src1 (LShiftL src2 src3)));
10610 
10611   ins_cost(1.9 * INSN_COST);
10612   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
10613 
10614   ins_encode %{
10615     __ orr(as_Register($dst$$reg),
10616               as_Register($src1$$reg),
10617               as_Register($src2$$reg),
10618               Assembler::LSL,
10619               $src3$$constant & 0x3f);
10620   %}
10621 
10622   ins_pipe(ialu_reg_reg_shift);
10623 %}
10624 
10625 instruct AddI_reg_URShift_reg(iRegINoSp dst,
10626                          iRegIorL2I src1, iRegIorL2I src2,
10627                          immI src3, rFlagsReg cr) %{
10628   match(Set dst (AddI src1 (URShiftI src2 src3)));
10629 
10630   ins_cost(1.9 * INSN_COST);
10631   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
10632 
10633   ins_encode %{
10634     __ addw(as_Register($dst$$reg),
10635               as_Register($src1$$reg),
10636               as_Register($src2$$reg),
10637               Assembler::LSR,
10638               $src3$$constant & 0x1f);
10639   %}
10640 
10641   ins_pipe(ialu_reg_reg_shift);
10642 %}
10643 
10644 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
10645                          iRegL src1, iRegL src2,
10646                          immI src3, rFlagsReg cr) %{
10647   match(Set dst (AddL src1 (URShiftL src2 src3)));
10648 
10649   ins_cost(1.9 * INSN_COST);
10650   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
10651 
10652   ins_encode %{
10653     __ add(as_Register($dst$$reg),
10654               as_Register($src1$$reg),
10655               as_Register($src2$$reg),
10656               Assembler::LSR,
10657               $src3$$constant & 0x3f);
10658   %}
10659 
10660   ins_pipe(ialu_reg_reg_shift);
10661 %}
10662 
10663 instruct AddI_reg_RShift_reg(iRegINoSp dst,
10664                          iRegIorL2I src1, iRegIorL2I src2,
10665                          immI src3, rFlagsReg cr) %{
10666   match(Set dst (AddI src1 (RShiftI src2 src3)));
10667 
10668   ins_cost(1.9 * INSN_COST);
10669   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
10670 
10671   ins_encode %{
10672     __ addw(as_Register($dst$$reg),
10673               as_Register($src1$$reg),
10674               as_Register($src2$$reg),
10675               Assembler::ASR,
10676               $src3$$constant & 0x1f);
10677   %}
10678 
10679   ins_pipe(ialu_reg_reg_shift);
10680 %}
10681 
10682 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
10683                          iRegL src1, iRegL src2,
10684                          immI src3, rFlagsReg cr) %{
10685   match(Set dst (AddL src1 (RShiftL src2 src3)));
10686 
10687   ins_cost(1.9 * INSN_COST);
10688   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
10689 
10690   ins_encode %{
10691     __ add(as_Register($dst$$reg),
10692               as_Register($src1$$reg),
10693               as_Register($src2$$reg),
10694               Assembler::ASR,
10695               $src3$$constant & 0x3f);
10696   %}
10697 
10698   ins_pipe(ialu_reg_reg_shift);
10699 %}
10700 
10701 instruct AddI_reg_LShift_reg(iRegINoSp dst,
10702                          iRegIorL2I src1, iRegIorL2I src2,
10703                          immI src3, rFlagsReg cr) %{
10704   match(Set dst (AddI src1 (LShiftI src2 src3)));
10705 
10706   ins_cost(1.9 * INSN_COST);
10707   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
10708 
10709   ins_encode %{
10710     __ addw(as_Register($dst$$reg),
10711               as_Register($src1$$reg),
10712               as_Register($src2$$reg),
10713               Assembler::LSL,
10714               $src3$$constant & 0x1f);
10715   %}
10716 
10717   ins_pipe(ialu_reg_reg_shift);
10718 %}
10719 
10720 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
10721                          iRegL src1, iRegL src2,
10722                          immI src3, rFlagsReg cr) %{
10723   match(Set dst (AddL src1 (LShiftL src2 src3)));
10724 
10725   ins_cost(1.9 * INSN_COST);
10726   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
10727 
10728   ins_encode %{
10729     __ add(as_Register($dst$$reg),
10730               as_Register($src1$$reg),
10731               as_Register($src2$$reg),
10732               Assembler::LSL,
10733               $src3$$constant & 0x3f);
10734   %}
10735 
10736   ins_pipe(ialu_reg_reg_shift);
10737 %}
10738 
10739 instruct SubI_reg_URShift_reg(iRegINoSp dst,
10740                          iRegIorL2I src1, iRegIorL2I src2,
10741                          immI src3, rFlagsReg cr) %{
10742   match(Set dst (SubI src1 (URShiftI src2 src3)));
10743 
10744   ins_cost(1.9 * INSN_COST);
10745   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
10746 
10747   ins_encode %{
10748     __ subw(as_Register($dst$$reg),
10749               as_Register($src1$$reg),
10750               as_Register($src2$$reg),
10751               Assembler::LSR,
10752               $src3$$constant & 0x1f);
10753   %}
10754 
10755   ins_pipe(ialu_reg_reg_shift);
10756 %}
10757 
10758 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
10759                          iRegL src1, iRegL src2,
10760                          immI src3, rFlagsReg cr) %{
10761   match(Set dst (SubL src1 (URShiftL src2 src3)));
10762 
10763   ins_cost(1.9 * INSN_COST);
10764   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
10765 
10766   ins_encode %{
10767     __ sub(as_Register($dst$$reg),
10768               as_Register($src1$$reg),
10769               as_Register($src2$$reg),
10770               Assembler::LSR,
10771               $src3$$constant & 0x3f);
10772   %}
10773 
10774   ins_pipe(ialu_reg_reg_shift);
10775 %}
10776 
10777 instruct SubI_reg_RShift_reg(iRegINoSp dst,
10778                          iRegIorL2I src1, iRegIorL2I src2,
10779                          immI src3, rFlagsReg cr) %{
10780   match(Set dst (SubI src1 (RShiftI src2 src3)));
10781 
10782   ins_cost(1.9 * INSN_COST);
10783   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
10784 
10785   ins_encode %{
10786     __ subw(as_Register($dst$$reg),
10787               as_Register($src1$$reg),
10788               as_Register($src2$$reg),
10789               Assembler::ASR,
10790               $src3$$constant & 0x1f);
10791   %}
10792 
10793   ins_pipe(ialu_reg_reg_shift);
10794 %}
10795 
10796 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
10797                          iRegL src1, iRegL src2,
10798                          immI src3, rFlagsReg cr) %{
10799   match(Set dst (SubL src1 (RShiftL src2 src3)));
10800 
10801   ins_cost(1.9 * INSN_COST);
10802   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
10803 
10804   ins_encode %{
10805     __ sub(as_Register($dst$$reg),
10806               as_Register($src1$$reg),
10807               as_Register($src2$$reg),
10808               Assembler::ASR,
10809               $src3$$constant & 0x3f);
10810   %}
10811 
10812   ins_pipe(ialu_reg_reg_shift);
10813 %}
10814 
10815 instruct SubI_reg_LShift_reg(iRegINoSp dst,
10816                          iRegIorL2I src1, iRegIorL2I src2,
10817                          immI src3, rFlagsReg cr) %{
10818   match(Set dst (SubI src1 (LShiftI src2 src3)));
10819 
10820   ins_cost(1.9 * INSN_COST);
10821   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
10822 
10823   ins_encode %{
10824     __ subw(as_Register($dst$$reg),
10825               as_Register($src1$$reg),
10826               as_Register($src2$$reg),
10827               Assembler::LSL,
10828               $src3$$constant & 0x1f);
10829   %}
10830 
10831   ins_pipe(ialu_reg_reg_shift);
10832 %}
10833 
10834 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
10835                          iRegL src1, iRegL src2,
10836                          immI src3, rFlagsReg cr) %{
10837   match(Set dst (SubL src1 (LShiftL src2 src3)));
10838 
10839   ins_cost(1.9 * INSN_COST);
10840   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
10841 
10842   ins_encode %{
10843     __ sub(as_Register($dst$$reg),
10844               as_Register($src1$$reg),
10845               as_Register($src2$$reg),
10846               Assembler::LSL,
10847               $src3$$constant & 0x3f);
10848   %}
10849 
10850   ins_pipe(ialu_reg_reg_shift);
10851 %}
10852 
10853 
10854 
10855 // Shift Left followed by Shift Right.
10856 // This idiom is used by the compiler for the i2b bytecode etc.
10857 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10858 %{
10859   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
10860   // Make sure we are not going to exceed what sbfm can do.
10861   predicate((unsigned int)n->in(2)->get_int() <= 63
10862             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10863 
10864   ins_cost(INSN_COST * 2);
10865   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10866   ins_encode %{
10867     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10868     int s = 63 - lshift;
10869     int r = (rshift - lshift) & 63;
10870     __ sbfm(as_Register($dst$$reg),
10871             as_Register($src$$reg),
10872             r, s);
10873   %}
10874 
10875   ins_pipe(ialu_reg_shift);
10876 %}
10877 
10878 // Shift Left followed by Shift Right.
10879 // This idiom is used by the compiler for the i2b bytecode etc.
10880 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10881 %{
10882   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
10883   // Make sure we are not going to exceed what sbfmw can do.
10884   predicate((unsigned int)n->in(2)->get_int() <= 31
10885             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10886 
10887   ins_cost(INSN_COST * 2);
10888   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10889   ins_encode %{
10890     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10891     int s = 31 - lshift;
10892     int r = (rshift - lshift) & 31;
10893     __ sbfmw(as_Register($dst$$reg),
10894             as_Register($src$$reg),
10895             r, s);
10896   %}
10897 
10898   ins_pipe(ialu_reg_shift);
10899 %}
10900 
10901 // Shift Left followed by Shift Right.
10902 // This idiom is used by the compiler for the i2b bytecode etc.
10903 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10904 %{
10905   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
10906   // Make sure we are not going to exceed what ubfm can do.
10907   predicate((unsigned int)n->in(2)->get_int() <= 63
10908             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10909 
10910   ins_cost(INSN_COST * 2);
10911   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10912   ins_encode %{
10913     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10914     int s = 63 - lshift;
10915     int r = (rshift - lshift) & 63;
10916     __ ubfm(as_Register($dst$$reg),
10917             as_Register($src$$reg),
10918             r, s);
10919   %}
10920 
10921   ins_pipe(ialu_reg_shift);
10922 %}
10923 
10924 // Shift Left followed by Shift Right.
10925 // This idiom is used by the compiler for the i2b bytecode etc.
10926 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10927 %{
10928   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
10929   // Make sure we are not going to exceed what ubfmw can do.
10930   predicate((unsigned int)n->in(2)->get_int() <= 31
10931             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10932 
10933   ins_cost(INSN_COST * 2);
10934   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10935   ins_encode %{
10936     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10937     int s = 31 - lshift;
10938     int r = (rshift - lshift) & 31;
10939     __ ubfmw(as_Register($dst$$reg),
10940             as_Register($src$$reg),
10941             r, s);
10942   %}
10943 
10944   ins_pipe(ialu_reg_shift);
10945 %}
10946 // Bitfield extract with shift & mask
10947 
10948 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10949 %{
10950   match(Set dst (AndI (URShiftI src rshift) mask));
10951   // Make sure we are not going to exceed what ubfxw can do.
10952   predicate((exact_log2(n->in(2)->get_int() + 1) + (n->in(1)->in(2)->get_int() & 31)) <= (31 + 1));
10953 
10954   ins_cost(INSN_COST);
10955   format %{ "ubfxw $dst, $src, $mask" %}
10956   ins_encode %{
10957     int rshift = $rshift$$constant & 31;
10958     long mask = $mask$$constant;
10959     int width = exact_log2(mask+1);
10960     __ ubfxw(as_Register($dst$$reg),
10961             as_Register($src$$reg), rshift, width);
10962   %}
10963   ins_pipe(ialu_reg_shift);
10964 %}
10965 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
10966 %{
10967   match(Set dst (AndL (URShiftL src rshift) mask));
10968   // Make sure we are not going to exceed what ubfx can do.
10969   predicate((exact_log2_long(n->in(2)->get_long() + 1) + (n->in(1)->in(2)->get_int() & 63)) <= (63 + 1));
10970 
10971   ins_cost(INSN_COST);
10972   format %{ "ubfx $dst, $src, $mask" %}
10973   ins_encode %{
10974     int rshift = $rshift$$constant & 63;
10975     long mask = $mask$$constant;
10976     int width = exact_log2_long(mask+1);
10977     __ ubfx(as_Register($dst$$reg),
10978             as_Register($src$$reg), rshift, width);
10979   %}
10980   ins_pipe(ialu_reg_shift);
10981 %}
10982 
10983 // We can use ubfx when extending an And with a mask when we know mask
10984 // is positive.  We know that because immI_bitmask guarantees it.
10985 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10986 %{
10987   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
10988   // Make sure we are not going to exceed what ubfxw can do.
10989   predicate((exact_log2(n->in(1)->in(2)->get_int() + 1) + (n->in(1)->in(1)->in(2)->get_int() & 31)) <= (31 + 1));
10990 
10991   ins_cost(INSN_COST * 2);
10992   format %{ "ubfx $dst, $src, $mask" %}
10993   ins_encode %{
10994     int rshift = $rshift$$constant & 31;
10995     long mask = $mask$$constant;
10996     int width = exact_log2(mask+1);
10997     __ ubfx(as_Register($dst$$reg),
10998             as_Register($src$$reg), rshift, width);
10999   %}
11000   ins_pipe(ialu_reg_shift);
11001 %}
11002 
11003 // Rotations
11004 
11005 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11006 %{
11007   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11008   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11009 
11010   ins_cost(INSN_COST);
11011   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11012 
11013   ins_encode %{
11014     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11015             $rshift$$constant & 63);
11016   %}
11017   ins_pipe(ialu_reg_reg_extr);
11018 %}
11019 
11020 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11021 %{
11022   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11023   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11024 
11025   ins_cost(INSN_COST);
11026   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11027 
11028   ins_encode %{
11029     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11030             $rshift$$constant & 31);
11031   %}
11032   ins_pipe(ialu_reg_reg_extr);
11033 %}
11034 
11035 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11036 %{
11037   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11038   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11039 
11040   ins_cost(INSN_COST);
11041   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11042 
11043   ins_encode %{
11044     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11045             $rshift$$constant & 63);
11046   %}
11047   ins_pipe(ialu_reg_reg_extr);
11048 %}
11049 
11050 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11051 %{
11052   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11053   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11054 
11055   ins_cost(INSN_COST);
11056   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11057 
11058   ins_encode %{
11059     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11060             $rshift$$constant & 31);
11061   %}
11062   ins_pipe(ialu_reg_reg_extr);
11063 %}
11064 
11065 
11066 // rol expander
11067 
11068 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11069 %{
11070   effect(DEF dst, USE src, USE shift);
11071 
11072   format %{ "rol    $dst, $src, $shift" %}
11073   ins_cost(INSN_COST * 3);
11074   ins_encode %{
11075     __ subw(rscratch1, zr, as_Register($shift$$reg));
11076     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11077             rscratch1);
11078     %}
11079   ins_pipe(ialu_reg_reg_vshift);
11080 %}
11081 
11082 // rol expander
11083 
11084 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11085 %{
11086   effect(DEF dst, USE src, USE shift);
11087 
11088   format %{ "rol    $dst, $src, $shift" %}
11089   ins_cost(INSN_COST * 3);
11090   ins_encode %{
11091     __ subw(rscratch1, zr, as_Register($shift$$reg));
11092     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11093             rscratch1);
11094     %}
11095   ins_pipe(ialu_reg_reg_vshift);
11096 %}
11097 
11098 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11099 %{
11100   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
11101 
11102   expand %{
11103     rolL_rReg(dst, src, shift, cr);
11104   %}
11105 %}
11106 
11107 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11108 %{
11109   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
11110 
11111   expand %{
11112     rolL_rReg(dst, src, shift, cr);
11113   %}
11114 %}
11115 
11116 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11117 %{
11118   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
11119 
11120   expand %{
11121     rolI_rReg(dst, src, shift, cr);
11122   %}
11123 %}
11124 
11125 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11126 %{
11127   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
11128 
11129   expand %{
11130     rolI_rReg(dst, src, shift, cr);
11131   %}
11132 %}
11133 
11134 // ror expander
11135 
11136 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11137 %{
11138   effect(DEF dst, USE src, USE shift);
11139 
11140   format %{ "ror    $dst, $src, $shift" %}
11141   ins_cost(INSN_COST);
11142   ins_encode %{
11143     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11144             as_Register($shift$$reg));
11145     %}
11146   ins_pipe(ialu_reg_reg_vshift);
11147 %}
11148 
11149 // ror expander
11150 
11151 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11152 %{
11153   effect(DEF dst, USE src, USE shift);
11154 
11155   format %{ "ror    $dst, $src, $shift" %}
11156   ins_cost(INSN_COST);
11157   ins_encode %{
11158     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11159             as_Register($shift$$reg));
11160     %}
11161   ins_pipe(ialu_reg_reg_vshift);
11162 %}
11163 
11164 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11165 %{
11166   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11167 
11168   expand %{
11169     rorL_rReg(dst, src, shift, cr);
11170   %}
11171 %}
11172 
11173 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11174 %{
11175   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11176 
11177   expand %{
11178     rorL_rReg(dst, src, shift, cr);
11179   %}
11180 %}
11181 
11182 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11183 %{
11184   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11185 
11186   expand %{
11187     rorI_rReg(dst, src, shift, cr);
11188   %}
11189 %}
11190 
11191 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11192 %{
11193   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11194 
11195   expand %{
11196     rorI_rReg(dst, src, shift, cr);
11197   %}
11198 %}
11199 
11200 // Add/subtract (extended)
11201 
11202 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11203 %{
11204   match(Set dst (AddL src1 (ConvI2L src2)));
11205   ins_cost(INSN_COST);
11206   format %{ "add  $dst, $src1, sxtw $src2" %}
11207 
11208    ins_encode %{
11209      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11210             as_Register($src2$$reg), ext::sxtw);
11211    %}
11212   ins_pipe(ialu_reg_reg);
11213 %};
11214 
11215 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11216 %{
11217   match(Set dst (SubL src1 (ConvI2L src2)));
11218   ins_cost(INSN_COST);
11219   format %{ "sub  $dst, $src1, sxtw $src2" %}
11220 
11221    ins_encode %{
11222      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11223             as_Register($src2$$reg), ext::sxtw);
11224    %}
11225   ins_pipe(ialu_reg_reg);
11226 %};
11227 
11228 
11229 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11230 %{
11231   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11232   ins_cost(INSN_COST);
11233   format %{ "add  $dst, $src1, sxth $src2" %}
11234 
11235    ins_encode %{
11236      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11237             as_Register($src2$$reg), ext::sxth);
11238    %}
11239   ins_pipe(ialu_reg_reg);
11240 %}
11241 
11242 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11243 %{
11244   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11245   ins_cost(INSN_COST);
11246   format %{ "add  $dst, $src1, sxtb $src2" %}
11247 
11248    ins_encode %{
11249      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11250             as_Register($src2$$reg), ext::sxtb);
11251    %}
11252   ins_pipe(ialu_reg_reg);
11253 %}
11254 
11255 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11256 %{
11257   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11258   ins_cost(INSN_COST);
11259   format %{ "add  $dst, $src1, uxtb $src2" %}
11260 
11261    ins_encode %{
11262      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11263             as_Register($src2$$reg), ext::uxtb);
11264    %}
11265   ins_pipe(ialu_reg_reg);
11266 %}
11267 
11268 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11269 %{
11270   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11271   ins_cost(INSN_COST);
11272   format %{ "add  $dst, $src1, sxth $src2" %}
11273 
11274    ins_encode %{
11275      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11276             as_Register($src2$$reg), ext::sxth);
11277    %}
11278   ins_pipe(ialu_reg_reg);
11279 %}
11280 
11281 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11282 %{
11283   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11284   ins_cost(INSN_COST);
11285   format %{ "add  $dst, $src1, sxtw $src2" %}
11286 
11287    ins_encode %{
11288      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11289             as_Register($src2$$reg), ext::sxtw);
11290    %}
11291   ins_pipe(ialu_reg_reg);
11292 %}
11293 
11294 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11295 %{
11296   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11297   ins_cost(INSN_COST);
11298   format %{ "add  $dst, $src1, sxtb $src2" %}
11299 
11300    ins_encode %{
11301      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11302             as_Register($src2$$reg), ext::sxtb);
11303    %}
11304   ins_pipe(ialu_reg_reg);
11305 %}
11306 
11307 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11308 %{
11309   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11310   ins_cost(INSN_COST);
11311   format %{ "add  $dst, $src1, uxtb $src2" %}
11312 
11313    ins_encode %{
11314      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11315             as_Register($src2$$reg), ext::uxtb);
11316    %}
11317   ins_pipe(ialu_reg_reg);
11318 %}
11319 
11320 
11321 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11322 %{
11323   match(Set dst (AddI src1 (AndI src2 mask)));
11324   ins_cost(INSN_COST);
11325   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11326 
11327    ins_encode %{
11328      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11329             as_Register($src2$$reg), ext::uxtb);
11330    %}
11331   ins_pipe(ialu_reg_reg);
11332 %}
11333 
11334 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11335 %{
11336   match(Set dst (AddI src1 (AndI src2 mask)));
11337   ins_cost(INSN_COST);
11338   format %{ "addw  $dst, $src1, $src2, uxth" %}
11339 
11340    ins_encode %{
11341      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11342             as_Register($src2$$reg), ext::uxth);
11343    %}
11344   ins_pipe(ialu_reg_reg);
11345 %}
11346 
11347 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11348 %{
11349   match(Set dst (AddL src1 (AndL src2 mask)));
11350   ins_cost(INSN_COST);
11351   format %{ "add  $dst, $src1, $src2, uxtb" %}
11352 
11353    ins_encode %{
11354      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11355             as_Register($src2$$reg), ext::uxtb);
11356    %}
11357   ins_pipe(ialu_reg_reg);
11358 %}
11359 
11360 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11361 %{
11362   match(Set dst (AddL src1 (AndL src2 mask)));
11363   ins_cost(INSN_COST);
11364   format %{ "add  $dst, $src1, $src2, uxth" %}
11365 
11366    ins_encode %{
11367      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11368             as_Register($src2$$reg), ext::uxth);
11369    %}
11370   ins_pipe(ialu_reg_reg);
11371 %}
11372 
11373 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11374 %{
11375   match(Set dst (AddL src1 (AndL src2 mask)));
11376   ins_cost(INSN_COST);
11377   format %{ "add  $dst, $src1, $src2, uxtw" %}
11378 
11379    ins_encode %{
11380      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11381             as_Register($src2$$reg), ext::uxtw);
11382    %}
11383   ins_pipe(ialu_reg_reg);
11384 %}
11385 
11386 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11387 %{
11388   match(Set dst (SubI src1 (AndI src2 mask)));
11389   ins_cost(INSN_COST);
11390   format %{ "subw  $dst, $src1, $src2, uxtb" %}
11391 
11392    ins_encode %{
11393      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11394             as_Register($src2$$reg), ext::uxtb);
11395    %}
11396   ins_pipe(ialu_reg_reg);
11397 %}
11398 
11399 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11400 %{
11401   match(Set dst (SubI src1 (AndI src2 mask)));
11402   ins_cost(INSN_COST);
11403   format %{ "subw  $dst, $src1, $src2, uxth" %}
11404 
11405    ins_encode %{
11406      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11407             as_Register($src2$$reg), ext::uxth);
11408    %}
11409   ins_pipe(ialu_reg_reg);
11410 %}
11411 
11412 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11413 %{
11414   match(Set dst (SubL src1 (AndL src2 mask)));
11415   ins_cost(INSN_COST);
11416   format %{ "sub  $dst, $src1, $src2, uxtb" %}
11417 
11418    ins_encode %{
11419      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11420             as_Register($src2$$reg), ext::uxtb);
11421    %}
11422   ins_pipe(ialu_reg_reg);
11423 %}
11424 
11425 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11426 %{
11427   match(Set dst (SubL src1 (AndL src2 mask)));
11428   ins_cost(INSN_COST);
11429   format %{ "sub  $dst, $src1, $src2, uxth" %}
11430 
11431    ins_encode %{
11432      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11433             as_Register($src2$$reg), ext::uxth);
11434    %}
11435   ins_pipe(ialu_reg_reg);
11436 %}
11437 
11438 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11439 %{
11440   match(Set dst (SubL src1 (AndL src2 mask)));
11441   ins_cost(INSN_COST);
11442   format %{ "sub  $dst, $src1, $src2, uxtw" %}
11443 
11444    ins_encode %{
11445      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11446             as_Register($src2$$reg), ext::uxtw);
11447    %}
11448   ins_pipe(ialu_reg_reg);
11449 %}
11450 
11451 // END This section of the file is automatically generated. Do not edit --------------
11452 
11453 // ============================================================================
11454 // Floating Point Arithmetic Instructions
11455 
11456 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11457   match(Set dst (AddF src1 src2));
11458 
11459   ins_cost(INSN_COST * 5);
11460   format %{ "fadds   $dst, $src1, $src2" %}
11461 
11462   ins_encode %{
11463     __ fadds(as_FloatRegister($dst$$reg),
11464              as_FloatRegister($src1$$reg),
11465              as_FloatRegister($src2$$reg));
11466   %}
11467 
11468   ins_pipe(fp_dop_reg_reg_s);
11469 %}
11470 
11471 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11472   match(Set dst (AddD src1 src2));
11473 
11474   ins_cost(INSN_COST * 5);
11475   format %{ "faddd   $dst, $src1, $src2" %}
11476 
11477   ins_encode %{
11478     __ faddd(as_FloatRegister($dst$$reg),
11479              as_FloatRegister($src1$$reg),
11480              as_FloatRegister($src2$$reg));
11481   %}
11482 
11483   ins_pipe(fp_dop_reg_reg_d);
11484 %}
11485 
11486 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11487   match(Set dst (SubF src1 src2));
11488 
11489   ins_cost(INSN_COST * 5);
11490   format %{ "fsubs   $dst, $src1, $src2" %}
11491 
11492   ins_encode %{
11493     __ fsubs(as_FloatRegister($dst$$reg),
11494              as_FloatRegister($src1$$reg),
11495              as_FloatRegister($src2$$reg));
11496   %}
11497 
11498   ins_pipe(fp_dop_reg_reg_s);
11499 %}
11500 
11501 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11502   match(Set dst (SubD src1 src2));
11503 
11504   ins_cost(INSN_COST * 5);
11505   format %{ "fsubd   $dst, $src1, $src2" %}
11506 
11507   ins_encode %{
11508     __ fsubd(as_FloatRegister($dst$$reg),
11509              as_FloatRegister($src1$$reg),
11510              as_FloatRegister($src2$$reg));
11511   %}
11512 
11513   ins_pipe(fp_dop_reg_reg_d);
11514 %}
11515 
11516 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11517   match(Set dst (MulF src1 src2));
11518 
11519   ins_cost(INSN_COST * 6);
11520   format %{ "fmuls   $dst, $src1, $src2" %}
11521 
11522   ins_encode %{
11523     __ fmuls(as_FloatRegister($dst$$reg),
11524              as_FloatRegister($src1$$reg),
11525              as_FloatRegister($src2$$reg));
11526   %}
11527 
11528   ins_pipe(fp_dop_reg_reg_s);
11529 %}
11530 
11531 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11532   match(Set dst (MulD src1 src2));
11533 
11534   ins_cost(INSN_COST * 6);
11535   format %{ "fmuld   $dst, $src1, $src2" %}
11536 
11537   ins_encode %{
11538     __ fmuld(as_FloatRegister($dst$$reg),
11539              as_FloatRegister($src1$$reg),
11540              as_FloatRegister($src2$$reg));
11541   %}
11542 
11543   ins_pipe(fp_dop_reg_reg_d);
11544 %}
11545 
11546 // We cannot use these fused mul w add/sub ops because they don't
11547 // produce the same result as the equivalent separated ops
11548 // (essentially they don't round the intermediate result). that's a
11549 // shame. leaving them here in case we can idenitfy cases where it is
11550 // legitimate to use them
11551 
11552 
11553 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11554 //   match(Set dst (AddF (MulF src1 src2) src3));
11555 
11556 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
11557 
11558 //   ins_encode %{
11559 //     __ fmadds(as_FloatRegister($dst$$reg),
11560 //              as_FloatRegister($src1$$reg),
11561 //              as_FloatRegister($src2$$reg),
11562 //              as_FloatRegister($src3$$reg));
11563 //   %}
11564 
11565 //   ins_pipe(pipe_class_default);
11566 // %}
11567 
11568 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11569 //   match(Set dst (AddD (MulD src1 src2) src3));
11570 
11571 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
11572 
11573 //   ins_encode %{
11574 //     __ fmaddd(as_FloatRegister($dst$$reg),
11575 //              as_FloatRegister($src1$$reg),
11576 //              as_FloatRegister($src2$$reg),
11577 //              as_FloatRegister($src3$$reg));
11578 //   %}
11579 
11580 //   ins_pipe(pipe_class_default);
11581 // %}
11582 
11583 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11584 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
11585 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
11586 
11587 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
11588 
11589 //   ins_encode %{
11590 //     __ fmsubs(as_FloatRegister($dst$$reg),
11591 //               as_FloatRegister($src1$$reg),
11592 //               as_FloatRegister($src2$$reg),
11593 //              as_FloatRegister($src3$$reg));
11594 //   %}
11595 
11596 //   ins_pipe(pipe_class_default);
11597 // %}
11598 
11599 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11600 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
11601 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
11602 
11603 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
11604 
11605 //   ins_encode %{
11606 //     __ fmsubd(as_FloatRegister($dst$$reg),
11607 //               as_FloatRegister($src1$$reg),
11608 //               as_FloatRegister($src2$$reg),
11609 //               as_FloatRegister($src3$$reg));
11610 //   %}
11611 
11612 //   ins_pipe(pipe_class_default);
11613 // %}
11614 
11615 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11616 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
11617 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
11618 
11619 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
11620 
11621 //   ins_encode %{
11622 //     __ fnmadds(as_FloatRegister($dst$$reg),
11623 //                as_FloatRegister($src1$$reg),
11624 //                as_FloatRegister($src2$$reg),
11625 //                as_FloatRegister($src3$$reg));
11626 //   %}
11627 
11628 //   ins_pipe(pipe_class_default);
11629 // %}
11630 
11631 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11632 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
11633 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
11634 
11635 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
11636 
11637 //   ins_encode %{
11638 //     __ fnmaddd(as_FloatRegister($dst$$reg),
11639 //                as_FloatRegister($src1$$reg),
11640 //                as_FloatRegister($src2$$reg),
11641 //                as_FloatRegister($src3$$reg));
11642 //   %}
11643 
11644 //   ins_pipe(pipe_class_default);
11645 // %}
11646 
11647 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
11648 //   match(Set dst (SubF (MulF src1 src2) src3));
11649 
11650 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
11651 
11652 //   ins_encode %{
11653 //     __ fnmsubs(as_FloatRegister($dst$$reg),
11654 //                as_FloatRegister($src1$$reg),
11655 //                as_FloatRegister($src2$$reg),
11656 //                as_FloatRegister($src3$$reg));
11657 //   %}
11658 
11659 //   ins_pipe(pipe_class_default);
11660 // %}
11661 
11662 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
11663 //   match(Set dst (SubD (MulD src1 src2) src3));
11664 
11665 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
11666 
11667 //   ins_encode %{
11668 //   // n.b. insn name should be fnmsubd
11669 //     __ fnmsub(as_FloatRegister($dst$$reg),
11670 //                as_FloatRegister($src1$$reg),
11671 //                as_FloatRegister($src2$$reg),
11672 //                as_FloatRegister($src3$$reg));
11673 //   %}
11674 
11675 //   ins_pipe(pipe_class_default);
11676 // %}
11677 
11678 
11679 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11680   match(Set dst (DivF src1  src2));
11681 
11682   ins_cost(INSN_COST * 18);
11683   format %{ "fdivs   $dst, $src1, $src2" %}
11684 
11685   ins_encode %{
11686     __ fdivs(as_FloatRegister($dst$$reg),
11687              as_FloatRegister($src1$$reg),
11688              as_FloatRegister($src2$$reg));
11689   %}
11690 
11691   ins_pipe(fp_div_s);
11692 %}
11693 
11694 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11695   match(Set dst (DivD src1  src2));
11696 
11697   ins_cost(INSN_COST * 32);
11698   format %{ "fdivd   $dst, $src1, $src2" %}
11699 
11700   ins_encode %{
11701     __ fdivd(as_FloatRegister($dst$$reg),
11702              as_FloatRegister($src1$$reg),
11703              as_FloatRegister($src2$$reg));
11704   %}
11705 
11706   ins_pipe(fp_div_d);
11707 %}
11708 
11709 instruct negF_reg_reg(vRegF dst, vRegF src) %{
11710   match(Set dst (NegF src));
11711 
11712   ins_cost(INSN_COST * 3);
11713   format %{ "fneg   $dst, $src" %}
11714 
11715   ins_encode %{
11716     __ fnegs(as_FloatRegister($dst$$reg),
11717              as_FloatRegister($src$$reg));
11718   %}
11719 
11720   ins_pipe(fp_uop_s);
11721 %}
11722 
11723 instruct negD_reg_reg(vRegD dst, vRegD src) %{
11724   match(Set dst (NegD src));
11725 
11726   ins_cost(INSN_COST * 3);
11727   format %{ "fnegd   $dst, $src" %}
11728 
11729   ins_encode %{
11730     __ fnegd(as_FloatRegister($dst$$reg),
11731              as_FloatRegister($src$$reg));
11732   %}
11733 
11734   ins_pipe(fp_uop_d);
11735 %}
11736 
11737 instruct absF_reg(vRegF dst, vRegF src) %{
11738   match(Set dst (AbsF src));
11739 
11740   ins_cost(INSN_COST * 3);
11741   format %{ "fabss   $dst, $src" %}
11742   ins_encode %{
11743     __ fabss(as_FloatRegister($dst$$reg),
11744              as_FloatRegister($src$$reg));
11745   %}
11746 
11747   ins_pipe(fp_uop_s);
11748 %}
11749 
11750 instruct absD_reg(vRegD dst, vRegD src) %{
11751   match(Set dst (AbsD src));
11752 
11753   ins_cost(INSN_COST * 3);
11754   format %{ "fabsd   $dst, $src" %}
11755   ins_encode %{
11756     __ fabsd(as_FloatRegister($dst$$reg),
11757              as_FloatRegister($src$$reg));
11758   %}
11759 
11760   ins_pipe(fp_uop_d);
11761 %}
11762 
11763 instruct sqrtD_reg(vRegD dst, vRegD src) %{
11764   match(Set dst (SqrtD src));
11765 
11766   ins_cost(INSN_COST * 50);
11767   format %{ "fsqrtd  $dst, $src" %}
11768   ins_encode %{
11769     __ fsqrtd(as_FloatRegister($dst$$reg),
11770              as_FloatRegister($src$$reg));
11771   %}
11772 
11773   ins_pipe(fp_div_s);
11774 %}
11775 
11776 instruct sqrtF_reg(vRegF dst, vRegF src) %{
11777   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11778 
11779   ins_cost(INSN_COST * 50);
11780   format %{ "fsqrts  $dst, $src" %}
11781   ins_encode %{
11782     __ fsqrts(as_FloatRegister($dst$$reg),
11783              as_FloatRegister($src$$reg));
11784   %}
11785 
11786   ins_pipe(fp_div_d);
11787 %}
11788 
11789 // ============================================================================
11790 // Logical Instructions
11791 
11792 // Integer Logical Instructions
11793 
11794 // And Instructions
11795 
11796 
11797 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
11798   match(Set dst (AndI src1 src2));
11799 
11800   format %{ "andw  $dst, $src1, $src2\t# int" %}
11801 
11802   ins_cost(INSN_COST);
11803   ins_encode %{
11804     __ andw(as_Register($dst$$reg),
11805             as_Register($src1$$reg),
11806             as_Register($src2$$reg));
11807   %}
11808 
11809   ins_pipe(ialu_reg_reg);
11810 %}
11811 
11812 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
11813   match(Set dst (AndI src1 src2));
11814 
11815   format %{ "andsw  $dst, $src1, $src2\t# int" %}
11816 
11817   ins_cost(INSN_COST);
11818   ins_encode %{
11819     __ andw(as_Register($dst$$reg),
11820             as_Register($src1$$reg),
11821             (unsigned long)($src2$$constant));
11822   %}
11823 
11824   ins_pipe(ialu_reg_imm);
11825 %}
11826 
11827 // Or Instructions
11828 
11829 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11830   match(Set dst (OrI src1 src2));
11831 
11832   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11833 
11834   ins_cost(INSN_COST);
11835   ins_encode %{
11836     __ orrw(as_Register($dst$$reg),
11837             as_Register($src1$$reg),
11838             as_Register($src2$$reg));
11839   %}
11840 
11841   ins_pipe(ialu_reg_reg);
11842 %}
11843 
11844 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11845   match(Set dst (OrI src1 src2));
11846 
11847   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11848 
11849   ins_cost(INSN_COST);
11850   ins_encode %{
11851     __ orrw(as_Register($dst$$reg),
11852             as_Register($src1$$reg),
11853             (unsigned long)($src2$$constant));
11854   %}
11855 
11856   ins_pipe(ialu_reg_imm);
11857 %}
11858 
11859 // Xor Instructions
11860 
11861 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11862   match(Set dst (XorI src1 src2));
11863 
11864   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11865 
11866   ins_cost(INSN_COST);
11867   ins_encode %{
11868     __ eorw(as_Register($dst$$reg),
11869             as_Register($src1$$reg),
11870             as_Register($src2$$reg));
11871   %}
11872 
11873   ins_pipe(ialu_reg_reg);
11874 %}
11875 
11876 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11877   match(Set dst (XorI src1 src2));
11878 
11879   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11880 
11881   ins_cost(INSN_COST);
11882   ins_encode %{
11883     __ eorw(as_Register($dst$$reg),
11884             as_Register($src1$$reg),
11885             (unsigned long)($src2$$constant));
11886   %}
11887 
11888   ins_pipe(ialu_reg_imm);
11889 %}
11890 
11891 // Long Logical Instructions
11892 // TODO
11893 
11894 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
11895   match(Set dst (AndL src1 src2));
11896 
11897   format %{ "and  $dst, $src1, $src2\t# int" %}
11898 
11899   ins_cost(INSN_COST);
11900   ins_encode %{
11901     __ andr(as_Register($dst$$reg),
11902             as_Register($src1$$reg),
11903             as_Register($src2$$reg));
11904   %}
11905 
11906   ins_pipe(ialu_reg_reg);
11907 %}
11908 
11909 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
11910   match(Set dst (AndL src1 src2));
11911 
11912   format %{ "and  $dst, $src1, $src2\t# int" %}
11913 
11914   ins_cost(INSN_COST);
11915   ins_encode %{
11916     __ andr(as_Register($dst$$reg),
11917             as_Register($src1$$reg),
11918             (unsigned long)($src2$$constant));
11919   %}
11920 
11921   ins_pipe(ialu_reg_imm);
11922 %}
11923 
11924 // Or Instructions
11925 
11926 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11927   match(Set dst (OrL src1 src2));
11928 
11929   format %{ "orr  $dst, $src1, $src2\t# int" %}
11930 
11931   ins_cost(INSN_COST);
11932   ins_encode %{
11933     __ orr(as_Register($dst$$reg),
11934            as_Register($src1$$reg),
11935            as_Register($src2$$reg));
11936   %}
11937 
11938   ins_pipe(ialu_reg_reg);
11939 %}
11940 
11941 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
11942   match(Set dst (OrL src1 src2));
11943 
11944   format %{ "orr  $dst, $src1, $src2\t# int" %}
11945 
11946   ins_cost(INSN_COST);
11947   ins_encode %{
11948     __ orr(as_Register($dst$$reg),
11949            as_Register($src1$$reg),
11950            (unsigned long)($src2$$constant));
11951   %}
11952 
11953   ins_pipe(ialu_reg_imm);
11954 %}
11955 
11956 // Xor Instructions
11957 
11958 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11959   match(Set dst (XorL src1 src2));
11960 
11961   format %{ "eor  $dst, $src1, $src2\t# int" %}
11962 
11963   ins_cost(INSN_COST);
11964   ins_encode %{
11965     __ eor(as_Register($dst$$reg),
11966            as_Register($src1$$reg),
11967            as_Register($src2$$reg));
11968   %}
11969 
11970   ins_pipe(ialu_reg_reg);
11971 %}
11972 
11973 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
11974   match(Set dst (XorL src1 src2));
11975 
11976   ins_cost(INSN_COST);
11977   format %{ "eor  $dst, $src1, $src2\t# int" %}
11978 
11979   ins_encode %{
11980     __ eor(as_Register($dst$$reg),
11981            as_Register($src1$$reg),
11982            (unsigned long)($src2$$constant));
11983   %}
11984 
11985   ins_pipe(ialu_reg_imm);
11986 %}
11987 
11988 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
11989 %{
11990   match(Set dst (ConvI2L src));
11991 
11992   ins_cost(INSN_COST);
11993   format %{ "sxtw  $dst, $src\t# i2l" %}
11994   ins_encode %{
11995     __ sbfm($dst$$Register, $src$$Register, 0, 31);
11996   %}
11997   ins_pipe(ialu_reg_shift);
11998 %}
11999 
12000 // this pattern occurs in bigmath arithmetic
12001 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
12002 %{
12003   match(Set dst (AndL (ConvI2L src) mask));
12004 
12005   ins_cost(INSN_COST);
12006   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
12007   ins_encode %{
12008     __ ubfm($dst$$Register, $src$$Register, 0, 31);
12009   %}
12010 
12011   ins_pipe(ialu_reg_shift);
12012 %}
12013 
12014 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
12015   match(Set dst (ConvL2I src));
12016 
12017   ins_cost(INSN_COST);
12018   format %{ "movw  $dst, $src \t// l2i" %}
12019 
12020   ins_encode %{
12021     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
12022   %}
12023 
12024   ins_pipe(ialu_reg);
12025 %}
12026 
12027 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
12028 %{
12029   match(Set dst (Conv2B src));
12030   effect(KILL cr);
12031 
12032   format %{
12033     "cmpw $src, zr\n\t"
12034     "cset $dst, ne"
12035   %}
12036 
12037   ins_encode %{
12038     __ cmpw(as_Register($src$$reg), zr);
12039     __ cset(as_Register($dst$$reg), Assembler::NE);
12040   %}
12041 
12042   ins_pipe(ialu_reg);
12043 %}
12044 
12045 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
12046 %{
12047   match(Set dst (Conv2B src));
12048   effect(KILL cr);
12049 
12050   format %{
12051     "cmp  $src, zr\n\t"
12052     "cset $dst, ne"
12053   %}
12054 
12055   ins_encode %{
12056     __ cmp(as_Register($src$$reg), zr);
12057     __ cset(as_Register($dst$$reg), Assembler::NE);
12058   %}
12059 
12060   ins_pipe(ialu_reg);
12061 %}
12062 
12063 instruct convD2F_reg(vRegF dst, vRegD src) %{
12064   match(Set dst (ConvD2F src));
12065 
12066   ins_cost(INSN_COST * 5);
12067   format %{ "fcvtd  $dst, $src \t// d2f" %}
12068 
12069   ins_encode %{
12070     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12071   %}
12072 
12073   ins_pipe(fp_d2f);
12074 %}
12075 
12076 instruct convF2D_reg(vRegD dst, vRegF src) %{
12077   match(Set dst (ConvF2D src));
12078 
12079   ins_cost(INSN_COST * 5);
12080   format %{ "fcvts  $dst, $src \t// f2d" %}
12081 
12082   ins_encode %{
12083     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12084   %}
12085 
12086   ins_pipe(fp_f2d);
12087 %}
12088 
12089 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12090   match(Set dst (ConvF2I src));
12091 
12092   ins_cost(INSN_COST * 5);
12093   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
12094 
12095   ins_encode %{
12096     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12097   %}
12098 
12099   ins_pipe(fp_f2i);
12100 %}
12101 
12102 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
12103   match(Set dst (ConvF2L src));
12104 
12105   ins_cost(INSN_COST * 5);
12106   format %{ "fcvtzs  $dst, $src \t// f2l" %}
12107 
12108   ins_encode %{
12109     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12110   %}
12111 
12112   ins_pipe(fp_f2l);
12113 %}
12114 
12115 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
12116   match(Set dst (ConvI2F src));
12117 
12118   ins_cost(INSN_COST * 5);
12119   format %{ "scvtfws  $dst, $src \t// i2f" %}
12120 
12121   ins_encode %{
12122     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12123   %}
12124 
12125   ins_pipe(fp_i2f);
12126 %}
12127 
12128 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
12129   match(Set dst (ConvL2F src));
12130 
12131   ins_cost(INSN_COST * 5);
12132   format %{ "scvtfs  $dst, $src \t// l2f" %}
12133 
12134   ins_encode %{
12135     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12136   %}
12137 
12138   ins_pipe(fp_l2f);
12139 %}
12140 
12141 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
12142   match(Set dst (ConvD2I src));
12143 
12144   ins_cost(INSN_COST * 5);
12145   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
12146 
12147   ins_encode %{
12148     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12149   %}
12150 
12151   ins_pipe(fp_d2i);
12152 %}
12153 
12154 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12155   match(Set dst (ConvD2L src));
12156 
12157   ins_cost(INSN_COST * 5);
12158   format %{ "fcvtzd  $dst, $src \t// d2l" %}
12159 
12160   ins_encode %{
12161     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12162   %}
12163 
12164   ins_pipe(fp_d2l);
12165 %}
12166 
12167 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
12168   match(Set dst (ConvI2D src));
12169 
12170   ins_cost(INSN_COST * 5);
12171   format %{ "scvtfwd  $dst, $src \t// i2d" %}
12172 
12173   ins_encode %{
12174     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12175   %}
12176 
12177   ins_pipe(fp_i2d);
12178 %}
12179 
12180 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
12181   match(Set dst (ConvL2D src));
12182 
12183   ins_cost(INSN_COST * 5);
12184   format %{ "scvtfd  $dst, $src \t// l2d" %}
12185 
12186   ins_encode %{
12187     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12188   %}
12189 
12190   ins_pipe(fp_l2d);
12191 %}
12192 
12193 // stack <-> reg and reg <-> reg shuffles with no conversion
12194 
12195 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
12196 
12197   match(Set dst (MoveF2I src));
12198 
12199   effect(DEF dst, USE src);
12200 
12201   ins_cost(4 * INSN_COST);
12202 
12203   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
12204 
12205   ins_encode %{
12206     __ ldrw($dst$$Register, Address(sp, $src$$disp));
12207   %}
12208 
12209   ins_pipe(iload_reg_reg);
12210 
12211 %}
12212 
12213 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
12214 
12215   match(Set dst (MoveI2F src));
12216 
12217   effect(DEF dst, USE src);
12218 
12219   ins_cost(4 * INSN_COST);
12220 
12221   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
12222 
12223   ins_encode %{
12224     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12225   %}
12226 
12227   ins_pipe(pipe_class_memory);
12228 
12229 %}
12230 
12231 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
12232 
12233   match(Set dst (MoveD2L src));
12234 
12235   effect(DEF dst, USE src);
12236 
12237   ins_cost(4 * INSN_COST);
12238 
12239   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
12240 
12241   ins_encode %{
12242     __ ldr($dst$$Register, Address(sp, $src$$disp));
12243   %}
12244 
12245   ins_pipe(iload_reg_reg);
12246 
12247 %}
12248 
12249 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
12250 
12251   match(Set dst (MoveL2D src));
12252 
12253   effect(DEF dst, USE src);
12254 
12255   ins_cost(4 * INSN_COST);
12256 
12257   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
12258 
12259   ins_encode %{
12260     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12261   %}
12262 
12263   ins_pipe(pipe_class_memory);
12264 
12265 %}
12266 
12267 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
12268 
12269   match(Set dst (MoveF2I src));
12270 
12271   effect(DEF dst, USE src);
12272 
12273   ins_cost(INSN_COST);
12274 
12275   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
12276 
12277   ins_encode %{
12278     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12279   %}
12280 
12281   ins_pipe(pipe_class_memory);
12282 
12283 %}
12284 
12285 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
12286 
12287   match(Set dst (MoveI2F src));
12288 
12289   effect(DEF dst, USE src);
12290 
12291   ins_cost(INSN_COST);
12292 
12293   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
12294 
12295   ins_encode %{
12296     __ strw($src$$Register, Address(sp, $dst$$disp));
12297   %}
12298 
12299   ins_pipe(istore_reg_reg);
12300 
12301 %}
12302 
12303 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
12304 
12305   match(Set dst (MoveD2L src));
12306 
12307   effect(DEF dst, USE src);
12308 
12309   ins_cost(INSN_COST);
12310 
12311   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
12312 
12313   ins_encode %{
12314     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12315   %}
12316 
12317   ins_pipe(pipe_class_memory);
12318 
12319 %}
12320 
12321 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
12322 
12323   match(Set dst (MoveL2D src));
12324 
12325   effect(DEF dst, USE src);
12326 
12327   ins_cost(INSN_COST);
12328 
12329   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
12330 
12331   ins_encode %{
12332     __ str($src$$Register, Address(sp, $dst$$disp));
12333   %}
12334 
12335   ins_pipe(istore_reg_reg);
12336 
12337 %}
12338 
12339 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12340 
12341   match(Set dst (MoveF2I src));
12342 
12343   effect(DEF dst, USE src);
12344 
12345   ins_cost(INSN_COST);
12346 
12347   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
12348 
12349   ins_encode %{
12350     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
12351   %}
12352 
12353   ins_pipe(fp_f2i);
12354 
12355 %}
12356 
12357 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
12358 
12359   match(Set dst (MoveI2F src));
12360 
12361   effect(DEF dst, USE src);
12362 
12363   ins_cost(INSN_COST);
12364 
12365   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
12366 
12367   ins_encode %{
12368     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
12369   %}
12370 
12371   ins_pipe(fp_i2f);
12372 
12373 %}
12374 
12375 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12376 
12377   match(Set dst (MoveD2L src));
12378 
12379   effect(DEF dst, USE src);
12380 
12381   ins_cost(INSN_COST);
12382 
12383   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
12384 
12385   ins_encode %{
12386     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
12387   %}
12388 
12389   ins_pipe(fp_d2l);
12390 
12391 %}
12392 
12393 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
12394 
12395   match(Set dst (MoveL2D src));
12396 
12397   effect(DEF dst, USE src);
12398 
12399   ins_cost(INSN_COST);
12400 
12401   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
12402 
12403   ins_encode %{
12404     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
12405   %}
12406 
12407   ins_pipe(fp_l2d);
12408 
12409 %}
12410 
12411 // ============================================================================
12412 // clearing of an array
12413 
12414 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
12415 %{
12416   match(Set dummy (ClearArray cnt base));
12417   effect(USE_KILL cnt, USE_KILL base, KILL cr);
12418 
12419   ins_cost(4 * INSN_COST);
12420   format %{ "ClearArray $cnt, $base" %}
12421 
12422   ins_encode %{
12423     __ zero_words($base$$Register, $cnt$$Register);
12424   %}
12425 
12426   ins_pipe(pipe_class_memory);
12427 %}
12428 
12429 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 tmp, Universe dummy, rFlagsReg cr)
12430 %{
12431   match(Set dummy (ClearArray cnt base));
12432   effect(USE_KILL base, TEMP tmp, KILL cr);
12433 
12434   ins_cost(4 * INSN_COST);
12435   format %{ "ClearArray $cnt, $base" %}
12436 
12437   ins_encode %{
12438     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
12439   %}
12440 
12441   ins_pipe(pipe_class_memory);
12442 %}
12443 
12444 // ============================================================================
12445 // Overflow Math Instructions
12446 
12447 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12448 %{
12449   match(Set cr (OverflowAddI op1 op2));
12450 
12451   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12452   ins_cost(INSN_COST);
12453   ins_encode %{
12454     __ cmnw($op1$$Register, $op2$$Register);
12455   %}
12456 
12457   ins_pipe(icmp_reg_reg);
12458 %}
12459 
12460 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12461 %{
12462   match(Set cr (OverflowAddI op1 op2));
12463 
12464   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12465   ins_cost(INSN_COST);
12466   ins_encode %{
12467     __ cmnw($op1$$Register, $op2$$constant);
12468   %}
12469 
12470   ins_pipe(icmp_reg_imm);
12471 %}
12472 
12473 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12474 %{
12475   match(Set cr (OverflowAddL op1 op2));
12476 
12477   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12478   ins_cost(INSN_COST);
12479   ins_encode %{
12480     __ cmn($op1$$Register, $op2$$Register);
12481   %}
12482 
12483   ins_pipe(icmp_reg_reg);
12484 %}
12485 
12486 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12487 %{
12488   match(Set cr (OverflowAddL op1 op2));
12489 
12490   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12491   ins_cost(INSN_COST);
12492   ins_encode %{
12493     __ cmn($op1$$Register, $op2$$constant);
12494   %}
12495 
12496   ins_pipe(icmp_reg_imm);
12497 %}
12498 
12499 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12500 %{
12501   match(Set cr (OverflowSubI op1 op2));
12502 
12503   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12504   ins_cost(INSN_COST);
12505   ins_encode %{
12506     __ cmpw($op1$$Register, $op2$$Register);
12507   %}
12508 
12509   ins_pipe(icmp_reg_reg);
12510 %}
12511 
12512 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12513 %{
12514   match(Set cr (OverflowSubI op1 op2));
12515 
12516   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12517   ins_cost(INSN_COST);
12518   ins_encode %{
12519     __ cmpw($op1$$Register, $op2$$constant);
12520   %}
12521 
12522   ins_pipe(icmp_reg_imm);
12523 %}
12524 
12525 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12526 %{
12527   match(Set cr (OverflowSubL op1 op2));
12528 
12529   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12530   ins_cost(INSN_COST);
12531   ins_encode %{
12532     __ cmp($op1$$Register, $op2$$Register);
12533   %}
12534 
12535   ins_pipe(icmp_reg_reg);
12536 %}
12537 
12538 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12539 %{
12540   match(Set cr (OverflowSubL op1 op2));
12541 
12542   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12543   ins_cost(INSN_COST);
12544   ins_encode %{
12545     __ cmp($op1$$Register, $op2$$constant);
12546   %}
12547 
12548   ins_pipe(icmp_reg_imm);
12549 %}
12550 
12551 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
12552 %{
12553   match(Set cr (OverflowSubI zero op1));
12554 
12555   format %{ "cmpw  zr, $op1\t# overflow check int" %}
12556   ins_cost(INSN_COST);
12557   ins_encode %{
12558     __ cmpw(zr, $op1$$Register);
12559   %}
12560 
12561   ins_pipe(icmp_reg_imm);
12562 %}
12563 
12564 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
12565 %{
12566   match(Set cr (OverflowSubL zero op1));
12567 
12568   format %{ "cmp   zr, $op1\t# overflow check long" %}
12569   ins_cost(INSN_COST);
12570   ins_encode %{
12571     __ cmp(zr, $op1$$Register);
12572   %}
12573 
12574   ins_pipe(icmp_reg_imm);
12575 %}
12576 
12577 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12578 %{
12579   match(Set cr (OverflowMulI op1 op2));
12580 
12581   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12582             "cmp   rscratch1, rscratch1, sxtw\n\t"
12583             "movw  rscratch1, #0x80000000\n\t"
12584             "cselw rscratch1, rscratch1, zr, NE\n\t"
12585             "cmpw  rscratch1, #1" %}
12586   ins_cost(5 * INSN_COST);
12587   ins_encode %{
12588     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12589     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12590     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12591     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12592     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12593   %}
12594 
12595   ins_pipe(pipe_slow);
12596 %}
12597 
12598 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
12599 %{
12600   match(If cmp (OverflowMulI op1 op2));
12601   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12602             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12603   effect(USE labl, KILL cr);
12604 
12605   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12606             "cmp   rscratch1, rscratch1, sxtw\n\t"
12607             "b$cmp   $labl" %}
12608   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
12609   ins_encode %{
12610     Label* L = $labl$$label;
12611     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12612     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12613     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12614     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12615   %}
12616 
12617   ins_pipe(pipe_serial);
12618 %}
12619 
12620 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12621 %{
12622   match(Set cr (OverflowMulL op1 op2));
12623 
12624   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12625             "smulh rscratch2, $op1, $op2\n\t"
12626             "cmp   rscratch2, rscratch1, ASR #63\n\t"
12627             "movw  rscratch1, #0x80000000\n\t"
12628             "cselw rscratch1, rscratch1, zr, NE\n\t"
12629             "cmpw  rscratch1, #1" %}
12630   ins_cost(6 * INSN_COST);
12631   ins_encode %{
12632     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12633     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12634     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
12635     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12636     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12637     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12638   %}
12639 
12640   ins_pipe(pipe_slow);
12641 %}
12642 
12643 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
12644 %{
12645   match(If cmp (OverflowMulL op1 op2));
12646   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12647             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12648   effect(USE labl, KILL cr);
12649 
12650   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12651             "smulh rscratch2, $op1, $op2\n\t"
12652             "cmp   rscratch2, rscratch1, ASR #63\n\t"
12653             "b$cmp $labl" %}
12654   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
12655   ins_encode %{
12656     Label* L = $labl$$label;
12657     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12658     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12659     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12660     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
12661     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12662   %}
12663 
12664   ins_pipe(pipe_serial);
12665 %}
12666 
12667 // ============================================================================
12668 // Compare Instructions
12669 
12670 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
12671 %{
12672   match(Set cr (CmpI op1 op2));
12673 
12674   effect(DEF cr, USE op1, USE op2);
12675 
12676   ins_cost(INSN_COST);
12677   format %{ "cmpw  $op1, $op2" %}
12678 
12679   ins_encode(aarch64_enc_cmpw(op1, op2));
12680 
12681   ins_pipe(icmp_reg_reg);
12682 %}
12683 
12684 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
12685 %{
12686   match(Set cr (CmpI op1 zero));
12687 
12688   effect(DEF cr, USE op1);
12689 
12690   ins_cost(INSN_COST);
12691   format %{ "cmpw $op1, 0" %}
12692 
12693   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12694 
12695   ins_pipe(icmp_reg_imm);
12696 %}
12697 
12698 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
12699 %{
12700   match(Set cr (CmpI op1 op2));
12701 
12702   effect(DEF cr, USE op1);
12703 
12704   ins_cost(INSN_COST);
12705   format %{ "cmpw  $op1, $op2" %}
12706 
12707   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12708 
12709   ins_pipe(icmp_reg_imm);
12710 %}
12711 
12712 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
12713 %{
12714   match(Set cr (CmpI op1 op2));
12715 
12716   effect(DEF cr, USE op1);
12717 
12718   ins_cost(INSN_COST * 2);
12719   format %{ "cmpw  $op1, $op2" %}
12720 
12721   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12722 
12723   ins_pipe(icmp_reg_imm);
12724 %}
12725 
12726 // Unsigned compare Instructions; really, same as signed compare
12727 // except it should only be used to feed an If or a CMovI which takes a
12728 // cmpOpU.
12729 
12730 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
12731 %{
12732   match(Set cr (CmpU op1 op2));
12733 
12734   effect(DEF cr, USE op1, USE op2);
12735 
12736   ins_cost(INSN_COST);
12737   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12738 
12739   ins_encode(aarch64_enc_cmpw(op1, op2));
12740 
12741   ins_pipe(icmp_reg_reg);
12742 %}
12743 
12744 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
12745 %{
12746   match(Set cr (CmpU op1 zero));
12747 
12748   effect(DEF cr, USE op1);
12749 
12750   ins_cost(INSN_COST);
12751   format %{ "cmpw $op1, #0\t# unsigned" %}
12752 
12753   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12754 
12755   ins_pipe(icmp_reg_imm);
12756 %}
12757 
12758 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
12759 %{
12760   match(Set cr (CmpU op1 op2));
12761 
12762   effect(DEF cr, USE op1);
12763 
12764   ins_cost(INSN_COST);
12765   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12766 
12767   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12768 
12769   ins_pipe(icmp_reg_imm);
12770 %}
12771 
12772 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
12773 %{
12774   match(Set cr (CmpU op1 op2));
12775 
12776   effect(DEF cr, USE op1);
12777 
12778   ins_cost(INSN_COST * 2);
12779   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12780 
12781   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12782 
12783   ins_pipe(icmp_reg_imm);
12784 %}
12785 
12786 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12787 %{
12788   match(Set cr (CmpL op1 op2));
12789 
12790   effect(DEF cr, USE op1, USE op2);
12791 
12792   ins_cost(INSN_COST);
12793   format %{ "cmp  $op1, $op2" %}
12794 
12795   ins_encode(aarch64_enc_cmp(op1, op2));
12796 
12797   ins_pipe(icmp_reg_reg);
12798 %}
12799 
12800 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
12801 %{
12802   match(Set cr (CmpL op1 zero));
12803 
12804   effect(DEF cr, USE op1);
12805 
12806   ins_cost(INSN_COST);
12807   format %{ "tst  $op1" %}
12808 
12809   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
12810 
12811   ins_pipe(icmp_reg_imm);
12812 %}
12813 
12814 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
12815 %{
12816   match(Set cr (CmpL op1 op2));
12817 
12818   effect(DEF cr, USE op1);
12819 
12820   ins_cost(INSN_COST);
12821   format %{ "cmp  $op1, $op2" %}
12822 
12823   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
12824 
12825   ins_pipe(icmp_reg_imm);
12826 %}
12827 
12828 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
12829 %{
12830   match(Set cr (CmpL op1 op2));
12831 
12832   effect(DEF cr, USE op1);
12833 
12834   ins_cost(INSN_COST * 2);
12835   format %{ "cmp  $op1, $op2" %}
12836 
12837   ins_encode(aarch64_enc_cmp_imm(op1, op2));
12838 
12839   ins_pipe(icmp_reg_imm);
12840 %}
12841 
12842 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
12843 %{
12844   match(Set cr (CmpUL op1 op2));
12845 
12846   effect(DEF cr, USE op1, USE op2);
12847 
12848   ins_cost(INSN_COST);
12849   format %{ "cmp  $op1, $op2" %}
12850 
12851   ins_encode(aarch64_enc_cmp(op1, op2));
12852 
12853   ins_pipe(icmp_reg_reg);
12854 %}
12855 
12856 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
12857 %{
12858   match(Set cr (CmpUL op1 zero));
12859 
12860   effect(DEF cr, USE op1);
12861 
12862   ins_cost(INSN_COST);
12863   format %{ "tst  $op1" %}
12864 
12865   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
12866 
12867   ins_pipe(icmp_reg_imm);
12868 %}
12869 
12870 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
12871 %{
12872   match(Set cr (CmpUL op1 op2));
12873 
12874   effect(DEF cr, USE op1);
12875 
12876   ins_cost(INSN_COST);
12877   format %{ "cmp  $op1, $op2" %}
12878 
12879   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
12880 
12881   ins_pipe(icmp_reg_imm);
12882 %}
12883 
12884 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
12885 %{
12886   match(Set cr (CmpUL op1 op2));
12887 
12888   effect(DEF cr, USE op1);
12889 
12890   ins_cost(INSN_COST * 2);
12891   format %{ "cmp  $op1, $op2" %}
12892 
12893   ins_encode(aarch64_enc_cmp_imm(op1, op2));
12894 
12895   ins_pipe(icmp_reg_imm);
12896 %}
12897 
12898 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
12899 %{
12900   match(Set cr (CmpP op1 op2));
12901 
12902   effect(DEF cr, USE op1, USE op2);
12903 
12904   ins_cost(INSN_COST);
12905   format %{ "cmp  $op1, $op2\t // ptr" %}
12906 
12907   ins_encode(aarch64_enc_cmpp(op1, op2));
12908 
12909   ins_pipe(icmp_reg_reg);
12910 %}
12911 
12912 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
12913 %{
12914   match(Set cr (CmpN op1 op2));
12915 
12916   effect(DEF cr, USE op1, USE op2);
12917 
12918   ins_cost(INSN_COST);
12919   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
12920 
12921   ins_encode(aarch64_enc_cmpn(op1, op2));
12922 
12923   ins_pipe(icmp_reg_reg);
12924 %}
12925 
12926 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
12927 %{
12928   match(Set cr (CmpP op1 zero));
12929 
12930   effect(DEF cr, USE op1, USE zero);
12931 
12932   ins_cost(INSN_COST);
12933   format %{ "cmp  $op1, 0\t // ptr" %}
12934 
12935   ins_encode(aarch64_enc_testp(op1));
12936 
12937   ins_pipe(icmp_reg_imm);
12938 %}
12939 
12940 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
12941 %{
12942   match(Set cr (CmpN op1 zero));
12943 
12944   effect(DEF cr, USE op1, USE zero);
12945 
12946   ins_cost(INSN_COST);
12947   format %{ "cmp  $op1, 0\t // compressed ptr" %}
12948 
12949   ins_encode(aarch64_enc_testn(op1));
12950 
12951   ins_pipe(icmp_reg_imm);
12952 %}
12953 
12954 // FP comparisons
12955 //
12956 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
12957 // using normal cmpOp. See declaration of rFlagsReg for details.
12958 
12959 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
12960 %{
12961   match(Set cr (CmpF src1 src2));
12962 
12963   ins_cost(3 * INSN_COST);
12964   format %{ "fcmps $src1, $src2" %}
12965 
12966   ins_encode %{
12967     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12968   %}
12969 
12970   ins_pipe(pipe_class_compare);
12971 %}
12972 
12973 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
12974 %{
12975   match(Set cr (CmpF src1 src2));
12976 
12977   ins_cost(3 * INSN_COST);
12978   format %{ "fcmps $src1, 0.0" %}
12979 
12980   ins_encode %{
12981     __ fcmps(as_FloatRegister($src1$$reg), 0.0);
12982   %}
12983 
12984   ins_pipe(pipe_class_compare);
12985 %}
12986 // FROM HERE
12987 
12988 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
12989 %{
12990   match(Set cr (CmpD src1 src2));
12991 
12992   ins_cost(3 * INSN_COST);
12993   format %{ "fcmpd $src1, $src2" %}
12994 
12995   ins_encode %{
12996     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12997   %}
12998 
12999   ins_pipe(pipe_class_compare);
13000 %}
13001 
13002 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
13003 %{
13004   match(Set cr (CmpD src1 src2));
13005 
13006   ins_cost(3 * INSN_COST);
13007   format %{ "fcmpd $src1, 0.0" %}
13008 
13009   ins_encode %{
13010     __ fcmpd(as_FloatRegister($src1$$reg), 0.0);
13011   %}
13012 
13013   ins_pipe(pipe_class_compare);
13014 %}
13015 
13016 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
13017 %{
13018   match(Set dst (CmpF3 src1 src2));
13019   effect(KILL cr);
13020 
13021   ins_cost(5 * INSN_COST);
13022   format %{ "fcmps $src1, $src2\n\t"
13023             "csinvw($dst, zr, zr, eq\n\t"
13024             "csnegw($dst, $dst, $dst, lt)"
13025   %}
13026 
13027   ins_encode %{
13028     Label done;
13029     FloatRegister s1 = as_FloatRegister($src1$$reg);
13030     FloatRegister s2 = as_FloatRegister($src2$$reg);
13031     Register d = as_Register($dst$$reg);
13032     __ fcmps(s1, s2);
13033     // installs 0 if EQ else -1
13034     __ csinvw(d, zr, zr, Assembler::EQ);
13035     // keeps -1 if less or unordered else installs 1
13036     __ csnegw(d, d, d, Assembler::LT);
13037     __ bind(done);
13038   %}
13039 
13040   ins_pipe(pipe_class_default);
13041 
13042 %}
13043 
13044 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
13045 %{
13046   match(Set dst (CmpD3 src1 src2));
13047   effect(KILL cr);
13048 
13049   ins_cost(5 * INSN_COST);
13050   format %{ "fcmpd $src1, $src2\n\t"
13051             "csinvw($dst, zr, zr, eq\n\t"
13052             "csnegw($dst, $dst, $dst, lt)"
13053   %}
13054 
13055   ins_encode %{
13056     Label done;
13057     FloatRegister s1 = as_FloatRegister($src1$$reg);
13058     FloatRegister s2 = as_FloatRegister($src2$$reg);
13059     Register d = as_Register($dst$$reg);
13060     __ fcmpd(s1, s2);
13061     // installs 0 if EQ else -1
13062     __ csinvw(d, zr, zr, Assembler::EQ);
13063     // keeps -1 if less or unordered else installs 1
13064     __ csnegw(d, d, d, Assembler::LT);
13065     __ bind(done);
13066   %}
13067   ins_pipe(pipe_class_default);
13068 
13069 %}
13070 
13071 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
13072 %{
13073   match(Set dst (CmpF3 src1 zero));
13074   effect(KILL cr);
13075 
13076   ins_cost(5 * INSN_COST);
13077   format %{ "fcmps $src1, 0.0\n\t"
13078             "csinvw($dst, zr, zr, eq\n\t"
13079             "csnegw($dst, $dst, $dst, lt)"
13080   %}
13081 
13082   ins_encode %{
13083     Label done;
13084     FloatRegister s1 = as_FloatRegister($src1$$reg);
13085     Register d = as_Register($dst$$reg);
13086     __ fcmps(s1, 0.0);
13087     // installs 0 if EQ else -1
13088     __ csinvw(d, zr, zr, Assembler::EQ);
13089     // keeps -1 if less or unordered else installs 1
13090     __ csnegw(d, d, d, Assembler::LT);
13091     __ bind(done);
13092   %}
13093 
13094   ins_pipe(pipe_class_default);
13095 
13096 %}
13097 
13098 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
13099 %{
13100   match(Set dst (CmpD3 src1 zero));
13101   effect(KILL cr);
13102 
13103   ins_cost(5 * INSN_COST);
13104   format %{ "fcmpd $src1, 0.0\n\t"
13105             "csinvw($dst, zr, zr, eq\n\t"
13106             "csnegw($dst, $dst, $dst, lt)"
13107   %}
13108 
13109   ins_encode %{
13110     Label done;
13111     FloatRegister s1 = as_FloatRegister($src1$$reg);
13112     Register d = as_Register($dst$$reg);
13113     __ fcmpd(s1, 0.0);
13114     // installs 0 if EQ else -1
13115     __ csinvw(d, zr, zr, Assembler::EQ);
13116     // keeps -1 if less or unordered else installs 1
13117     __ csnegw(d, d, d, Assembler::LT);
13118     __ bind(done);
13119   %}
13120   ins_pipe(pipe_class_default);
13121 
13122 %}
13123 
13124 // Manifest a CmpL result in an integer register.
13125 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
13126 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
13127 %{
13128   match(Set dst (CmpL3 src1 src2));
13129   effect(KILL flags);
13130 
13131   ins_cost(INSN_COST * 6);
13132   format %{
13133       "cmp $src1, $src2"
13134       "csetw $dst, ne"
13135       "cnegw $dst, lt"
13136   %}
13137   // format %{ "CmpL3 $dst, $src1, $src2" %}
13138   ins_encode %{
13139     __ cmp($src1$$Register, $src2$$Register);
13140     __ csetw($dst$$Register, Assembler::NE);
13141     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
13142   %}
13143 
13144   ins_pipe(ialu_reg_reg);
13145 %}
13146 
13147 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
13148 %{
13149   match(Set dst (CmpLTMask p q));
13150   effect(KILL cr);
13151 
13152   ins_cost(3 * INSN_COST);
13153 
13154   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
13155             "csetw $dst, lt\n\t"
13156             "subw $dst, zr, $dst"
13157   %}
13158 
13159   ins_encode %{
13160     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
13161     __ csetw(as_Register($dst$$reg), Assembler::LT);
13162     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
13163   %}
13164 
13165   ins_pipe(ialu_reg_reg);
13166 %}
13167 
13168 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
13169 %{
13170   match(Set dst (CmpLTMask src zero));
13171   effect(KILL cr);
13172 
13173   ins_cost(INSN_COST);
13174 
13175   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
13176 
13177   ins_encode %{
13178     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
13179   %}
13180 
13181   ins_pipe(ialu_reg_shift);
13182 %}
13183 
13184 // ============================================================================
13185 // Max and Min
13186 
13187 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13188 %{
13189   match(Set dst (MinI src1 src2));
13190 
13191   effect(DEF dst, USE src1, USE src2, KILL cr);
13192   size(8);
13193 
13194   ins_cost(INSN_COST * 3);
13195   format %{
13196     "cmpw $src1 $src2\t signed int\n\t"
13197     "cselw $dst, $src1, $src2 lt\t"
13198   %}
13199 
13200   ins_encode %{
13201     __ cmpw(as_Register($src1$$reg),
13202             as_Register($src2$$reg));
13203     __ cselw(as_Register($dst$$reg),
13204              as_Register($src1$$reg),
13205              as_Register($src2$$reg),
13206              Assembler::LT);
13207   %}
13208 
13209   ins_pipe(ialu_reg_reg);
13210 %}
13211 // FROM HERE
13212 
13213 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13214 %{
13215   match(Set dst (MaxI src1 src2));
13216 
13217   effect(DEF dst, USE src1, USE src2, KILL cr);
13218   size(8);
13219 
13220   ins_cost(INSN_COST * 3);
13221   format %{
13222     "cmpw $src1 $src2\t signed int\n\t"
13223     "cselw $dst, $src1, $src2 gt\t"
13224   %}
13225 
13226   ins_encode %{
13227     __ cmpw(as_Register($src1$$reg),
13228             as_Register($src2$$reg));
13229     __ cselw(as_Register($dst$$reg),
13230              as_Register($src1$$reg),
13231              as_Register($src2$$reg),
13232              Assembler::GT);
13233   %}
13234 
13235   ins_pipe(ialu_reg_reg);
13236 %}
13237 
13238 // ============================================================================
13239 // Branch Instructions
13240 
13241 // Direct Branch.
13242 instruct branch(label lbl)
13243 %{
13244   match(Goto);
13245 
13246   effect(USE lbl);
13247 
13248   ins_cost(BRANCH_COST);
13249   format %{ "b  $lbl" %}
13250 
13251   ins_encode(aarch64_enc_b(lbl));
13252 
13253   ins_pipe(pipe_branch);
13254 %}
13255 
13256 // Conditional Near Branch
13257 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
13258 %{
13259   // Same match rule as `branchConFar'.
13260   match(If cmp cr);
13261 
13262   effect(USE lbl);
13263 
13264   ins_cost(BRANCH_COST);
13265   // If set to 1 this indicates that the current instruction is a
13266   // short variant of a long branch. This avoids using this
13267   // instruction in first-pass matching. It will then only be used in
13268   // the `Shorten_branches' pass.
13269   // ins_short_branch(1);
13270   format %{ "b$cmp  $lbl" %}
13271 
13272   ins_encode(aarch64_enc_br_con(cmp, lbl));
13273 
13274   ins_pipe(pipe_branch_cond);
13275 %}
13276 
13277 // Conditional Near Branch Unsigned
13278 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13279 %{
13280   // Same match rule as `branchConFar'.
13281   match(If cmp cr);
13282 
13283   effect(USE lbl);
13284 
13285   ins_cost(BRANCH_COST);
13286   // If set to 1 this indicates that the current instruction is a
13287   // short variant of a long branch. This avoids using this
13288   // instruction in first-pass matching. It will then only be used in
13289   // the `Shorten_branches' pass.
13290   // ins_short_branch(1);
13291   format %{ "b$cmp  $lbl\t# unsigned" %}
13292 
13293   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13294 
13295   ins_pipe(pipe_branch_cond);
13296 %}
13297 
13298 // Make use of CBZ and CBNZ.  These instructions, as well as being
13299 // shorter than (cmp; branch), have the additional benefit of not
13300 // killing the flags.
13301 
13302 instruct cmpI_imm0_branch(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
13303   match(If cmp (CmpI op1 op2));
13304   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13305             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13306   effect(USE labl);
13307 
13308   ins_cost(BRANCH_COST);
13309   format %{ "cbw$cmp   $op1, $labl" %}
13310   ins_encode %{
13311     Label* L = $labl$$label;
13312     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13313     if (cond == Assembler::EQ)
13314       __ cbzw($op1$$Register, *L);
13315     else
13316       __ cbnzw($op1$$Register, *L);
13317   %}
13318   ins_pipe(pipe_cmp_branch);
13319 %}
13320 
13321 instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
13322   match(If cmp (CmpL op1 op2));
13323   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13324             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13325   effect(USE labl);
13326 
13327   ins_cost(BRANCH_COST);
13328   format %{ "cb$cmp   $op1, $labl" %}
13329   ins_encode %{
13330     Label* L = $labl$$label;
13331     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13332     if (cond == Assembler::EQ)
13333       __ cbz($op1$$Register, *L);
13334     else
13335       __ cbnz($op1$$Register, *L);
13336   %}
13337   ins_pipe(pipe_cmp_branch);
13338 %}
13339 
13340 instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
13341   match(If cmp (CmpP op1 op2));
13342   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13343             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13344   effect(USE labl);
13345 
13346   ins_cost(BRANCH_COST);
13347   format %{ "cb$cmp   $op1, $labl" %}
13348   ins_encode %{
13349     Label* L = $labl$$label;
13350     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13351     if (cond == Assembler::EQ)
13352       __ cbz($op1$$Register, *L);
13353     else
13354       __ cbnz($op1$$Register, *L);
13355   %}
13356   ins_pipe(pipe_cmp_branch);
13357 %}
13358 
13359 instruct cmpN_imm0_branch(cmpOp cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
13360   match(If cmp (CmpN op1 op2));
13361   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13362             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13363   effect(USE labl);
13364 
13365   ins_cost(BRANCH_COST);
13366   format %{ "cbw$cmp   $op1, $labl" %}
13367   ins_encode %{
13368     Label* L = $labl$$label;
13369     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13370     if (cond == Assembler::EQ)
13371       __ cbzw($op1$$Register, *L);
13372     else
13373       __ cbnzw($op1$$Register, *L);
13374   %}
13375   ins_pipe(pipe_cmp_branch);
13376 %}
13377 
13378 instruct cmpP_narrowOop_imm0_branch(cmpOp cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
13379   match(If cmp (CmpP (DecodeN oop) zero));
13380   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13381             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13382   effect(USE labl);
13383 
13384   ins_cost(BRANCH_COST);
13385   format %{ "cb$cmp   $oop, $labl" %}
13386   ins_encode %{
13387     Label* L = $labl$$label;
13388     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13389     if (cond == Assembler::EQ)
13390       __ cbzw($oop$$Register, *L);
13391     else
13392       __ cbnzw($oop$$Register, *L);
13393   %}
13394   ins_pipe(pipe_cmp_branch);
13395 %}
13396 
13397 instruct cmpUI_imm0_branch(cmpOpU cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
13398   match(If cmp (CmpU op1 op2));
13399   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13400             || n->in(1)->as_Bool()->_test._test == BoolTest::eq
13401             || n->in(1)->as_Bool()->_test._test == BoolTest::gt
13402             ||  n->in(1)->as_Bool()->_test._test == BoolTest::le);
13403   effect(USE labl);
13404 
13405   ins_cost(BRANCH_COST);
13406   format %{ "cbw$cmp   $op1, $labl" %}
13407   ins_encode %{
13408     Label* L = $labl$$label;
13409     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13410     if (cond == Assembler::EQ || cond == Assembler::LS)
13411       __ cbzw($op1$$Register, *L);
13412     else
13413       __ cbnzw($op1$$Register, *L);
13414   %}
13415   ins_pipe(pipe_cmp_branch);
13416 %}
13417 
13418 instruct cmpUL_imm0_branch(cmpOpU cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
13419   match(If cmp (CmpUL op1 op2));
13420   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13421             || n->in(1)->as_Bool()->_test._test == BoolTest::eq
13422             || n->in(1)->as_Bool()->_test._test == BoolTest::gt
13423             || n->in(1)->as_Bool()->_test._test == BoolTest::le);
13424   effect(USE labl);
13425 
13426   ins_cost(BRANCH_COST);
13427   format %{ "cb$cmp   $op1, $labl" %}
13428   ins_encode %{
13429     Label* L = $labl$$label;
13430     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13431     if (cond == Assembler::EQ || cond == Assembler::LS)
13432       __ cbz($op1$$Register, *L);
13433     else
13434       __ cbnz($op1$$Register, *L);
13435   %}
13436   ins_pipe(pipe_cmp_branch);
13437 %}
13438 
13439 // Test bit and Branch
13440 
13441 // Patterns for short (< 32KiB) variants
13442 instruct cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{
13443   match(If cmp (CmpL op1 op2));
13444   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13445             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13446   effect(USE labl);
13447 
13448   ins_cost(BRANCH_COST);
13449   format %{ "cb$cmp   $op1, $labl # long" %}
13450   ins_encode %{
13451     Label* L = $labl$$label;
13452     Assembler::Condition cond =
13453       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13454     __ tbr(cond, $op1$$Register, 63, *L);
13455   %}
13456   ins_pipe(pipe_cmp_branch);
13457   ins_short_branch(1);
13458 %}
13459 
13460 instruct cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13461   match(If cmp (CmpI op1 op2));
13462   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13463             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13464   effect(USE labl);
13465 
13466   ins_cost(BRANCH_COST);
13467   format %{ "cb$cmp   $op1, $labl # int" %}
13468   ins_encode %{
13469     Label* L = $labl$$label;
13470     Assembler::Condition cond =
13471       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13472     __ tbr(cond, $op1$$Register, 31, *L);
13473   %}
13474   ins_pipe(pipe_cmp_branch);
13475   ins_short_branch(1);
13476 %}
13477 
13478 instruct cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13479   match(If cmp (CmpL (AndL op1 op2) op3));
13480   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13481             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13482             && is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13483   effect(USE labl);
13484 
13485   ins_cost(BRANCH_COST);
13486   format %{ "tb$cmp   $op1, $op2, $labl" %}
13487   ins_encode %{
13488     Label* L = $labl$$label;
13489     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13490     int bit = exact_log2($op2$$constant);
13491     __ tbr(cond, $op1$$Register, bit, *L);
13492   %}
13493   ins_pipe(pipe_cmp_branch);
13494   ins_short_branch(1);
13495 %}
13496 
13497 instruct cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
13498   match(If cmp (CmpI (AndI op1 op2) op3));
13499   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13500             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13501             && is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
13502   effect(USE labl);
13503 
13504   ins_cost(BRANCH_COST);
13505   format %{ "tb$cmp   $op1, $op2, $labl" %}
13506   ins_encode %{
13507     Label* L = $labl$$label;
13508     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13509     int bit = exact_log2($op2$$constant);
13510     __ tbr(cond, $op1$$Register, bit, *L);
13511   %}
13512   ins_pipe(pipe_cmp_branch);
13513   ins_short_branch(1);
13514 %}
13515 
13516 // And far variants
13517 instruct far_cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{
13518   match(If cmp (CmpL op1 op2));
13519   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13520             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13521   effect(USE labl);
13522 
13523   ins_cost(BRANCH_COST);
13524   format %{ "cb$cmp   $op1, $labl # long" %}
13525   ins_encode %{
13526     Label* L = $labl$$label;
13527     Assembler::Condition cond =
13528       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13529     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
13530   %}
13531   ins_pipe(pipe_cmp_branch);
13532 %}
13533 
13534 instruct far_cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13535   match(If cmp (CmpI op1 op2));
13536   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13537             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13538   effect(USE labl);
13539 
13540   ins_cost(BRANCH_COST);
13541   format %{ "cb$cmp   $op1, $labl # int" %}
13542   ins_encode %{
13543     Label* L = $labl$$label;
13544     Assembler::Condition cond =
13545       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13546     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
13547   %}
13548   ins_pipe(pipe_cmp_branch);
13549 %}
13550 
13551 instruct far_cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13552   match(If cmp (CmpL (AndL op1 op2) op3));
13553   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13554             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13555             && is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13556   effect(USE labl);
13557 
13558   ins_cost(BRANCH_COST);
13559   format %{ "tb$cmp   $op1, $op2, $labl" %}
13560   ins_encode %{
13561     Label* L = $labl$$label;
13562     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13563     int bit = exact_log2($op2$$constant);
13564     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
13565   %}
13566   ins_pipe(pipe_cmp_branch);
13567 %}
13568 
13569 instruct far_cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
13570   match(If cmp (CmpI (AndI op1 op2) op3));
13571   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13572             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13573             && is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
13574   effect(USE labl);
13575 
13576   ins_cost(BRANCH_COST);
13577   format %{ "tb$cmp   $op1, $op2, $labl" %}
13578   ins_encode %{
13579     Label* L = $labl$$label;
13580     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13581     int bit = exact_log2($op2$$constant);
13582     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
13583   %}
13584   ins_pipe(pipe_cmp_branch);
13585 %}
13586 
13587 // Test bits
13588 
13589 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
13590   match(Set cr (CmpL (AndL op1 op2) op3));
13591   predicate(Assembler::operand_valid_for_logical_immediate
13592             (/*is_32*/false, n->in(1)->in(2)->get_long()));
13593 
13594   ins_cost(INSN_COST);
13595   format %{ "tst $op1, $op2 # long" %}
13596   ins_encode %{
13597     __ tst($op1$$Register, $op2$$constant);
13598   %}
13599   ins_pipe(ialu_reg_reg);
13600 %}
13601 
13602 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
13603   match(Set cr (CmpI (AndI op1 op2) op3));
13604   predicate(Assembler::operand_valid_for_logical_immediate
13605             (/*is_32*/true, n->in(1)->in(2)->get_int()));
13606 
13607   ins_cost(INSN_COST);
13608   format %{ "tst $op1, $op2 # int" %}
13609   ins_encode %{
13610     __ tstw($op1$$Register, $op2$$constant);
13611   %}
13612   ins_pipe(ialu_reg_reg);
13613 %}
13614 
13615 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
13616   match(Set cr (CmpL (AndL op1 op2) op3));
13617 
13618   ins_cost(INSN_COST);
13619   format %{ "tst $op1, $op2 # long" %}
13620   ins_encode %{
13621     __ tst($op1$$Register, $op2$$Register);
13622   %}
13623   ins_pipe(ialu_reg_reg);
13624 %}
13625 
13626 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
13627   match(Set cr (CmpI (AndI op1 op2) op3));
13628 
13629   ins_cost(INSN_COST);
13630   format %{ "tstw $op1, $op2 # int" %}
13631   ins_encode %{
13632     __ tstw($op1$$Register, $op2$$Register);
13633   %}
13634   ins_pipe(ialu_reg_reg);
13635 %}
13636 
13637 
13638 // Conditional Far Branch
13639 // Conditional Far Branch Unsigned
13640 // TODO: fixme
13641 
13642 // counted loop end branch near
13643 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
13644 %{
13645   match(CountedLoopEnd cmp cr);
13646 
13647   effect(USE lbl);
13648 
13649   ins_cost(BRANCH_COST);
13650   // short variant.
13651   // ins_short_branch(1);
13652   format %{ "b$cmp $lbl \t// counted loop end" %}
13653 
13654   ins_encode(aarch64_enc_br_con(cmp, lbl));
13655 
13656   ins_pipe(pipe_branch);
13657 %}
13658 
13659 // counted loop end branch near Unsigned
13660 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13661 %{
13662   match(CountedLoopEnd cmp cr);
13663 
13664   effect(USE lbl);
13665 
13666   ins_cost(BRANCH_COST);
13667   // short variant.
13668   // ins_short_branch(1);
13669   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
13670 
13671   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13672 
13673   ins_pipe(pipe_branch);
13674 %}
13675 
13676 // counted loop end branch far
13677 // counted loop end branch far unsigned
13678 // TODO: fixme
13679 
13680 // ============================================================================
13681 // inlined locking and unlocking
13682 
13683 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13684 %{
13685   match(Set cr (FastLock object box));
13686   effect(TEMP tmp, TEMP tmp2);
13687 
13688   // TODO
13689   // identify correct cost
13690   ins_cost(5 * INSN_COST);
13691   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
13692 
13693   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
13694 
13695   ins_pipe(pipe_serial);
13696 %}
13697 
13698 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13699 %{
13700   match(Set cr (FastUnlock object box));
13701   effect(TEMP tmp, TEMP tmp2);
13702 
13703   ins_cost(5 * INSN_COST);
13704   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
13705 
13706   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
13707 
13708   ins_pipe(pipe_serial);
13709 %}
13710 
13711 
13712 // ============================================================================
13713 // Safepoint Instructions
13714 
13715 // TODO
13716 // provide a near and far version of this code
13717 
13718 instruct safePoint(rFlagsReg cr, iRegP poll)
13719 %{
13720   match(SafePoint poll);
13721   effect(KILL cr);
13722 
13723   format %{
13724     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
13725   %}
13726   ins_encode %{
13727     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
13728   %}
13729   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
13730 %}
13731 
13732 
13733 // ============================================================================
13734 // Procedure Call/Return Instructions
13735 
13736 // Call Java Static Instruction
13737 
13738 instruct CallStaticJavaDirect(method meth)
13739 %{
13740   match(CallStaticJava);
13741 
13742   effect(USE meth);
13743 
13744   predicate(!((CallStaticJavaNode*)n)->is_method_handle_invoke());
13745 
13746   ins_cost(CALL_COST);
13747 
13748   format %{ "call,static $meth \t// ==> " %}
13749 
13750   ins_encode( aarch64_enc_java_static_call(meth),
13751               aarch64_enc_call_epilog );
13752 
13753   ins_pipe(pipe_class_call);
13754 %}
13755 
13756 // TO HERE
13757 
13758 // Call Java Static Instruction (method handle version)
13759 
13760 instruct CallStaticJavaDirectHandle(method meth, iRegP_FP reg_mh_save)
13761 %{
13762   match(CallStaticJava);
13763 
13764   effect(USE meth);
13765 
13766   predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
13767 
13768   ins_cost(CALL_COST);
13769 
13770   format %{ "call,static $meth \t// (methodhandle) ==> " %}
13771 
13772   ins_encode( aarch64_enc_java_handle_call(meth),
13773               aarch64_enc_call_epilog );
13774 
13775   ins_pipe(pipe_class_call);
13776 %}
13777 
13778 // Call Java Dynamic Instruction
13779 instruct CallDynamicJavaDirect(method meth)
13780 %{
13781   match(CallDynamicJava);
13782 
13783   effect(USE meth);
13784 
13785   ins_cost(CALL_COST);
13786 
13787   format %{ "CALL,dynamic $meth \t// ==> " %}
13788 
13789   ins_encode( aarch64_enc_java_dynamic_call(meth),
13790                aarch64_enc_call_epilog );
13791 
13792   ins_pipe(pipe_class_call);
13793 %}
13794 
13795 // Call Runtime Instruction
13796 
13797 instruct CallRuntimeDirect(method meth)
13798 %{
13799   match(CallRuntime);
13800 
13801   effect(USE meth);
13802 
13803   ins_cost(CALL_COST);
13804 
13805   format %{ "CALL, runtime $meth" %}
13806 
13807   ins_encode( aarch64_enc_java_to_runtime(meth) );
13808 
13809   ins_pipe(pipe_class_call);
13810 %}
13811 
13812 // Call Runtime Instruction
13813 
13814 instruct CallLeafDirect(method meth)
13815 %{
13816   match(CallLeaf);
13817 
13818   effect(USE meth);
13819 
13820   ins_cost(CALL_COST);
13821 
13822   format %{ "CALL, runtime leaf $meth" %}
13823 
13824   ins_encode( aarch64_enc_java_to_runtime(meth) );
13825 
13826   ins_pipe(pipe_class_call);
13827 %}
13828 
13829 // Call Runtime Instruction
13830 
13831 instruct CallLeafNoFPDirect(method meth)
13832 %{
13833   match(CallLeafNoFP);
13834 
13835   effect(USE meth);
13836 
13837   ins_cost(CALL_COST);
13838 
13839   format %{ "CALL, runtime leaf nofp $meth" %}
13840 
13841   ins_encode( aarch64_enc_java_to_runtime(meth) );
13842 
13843   ins_pipe(pipe_class_call);
13844 %}
13845 
13846 // Tail Call; Jump from runtime stub to Java code.
13847 // Also known as an 'interprocedural jump'.
13848 // Target of jump will eventually return to caller.
13849 // TailJump below removes the return address.
13850 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
13851 %{
13852   match(TailCall jump_target method_oop);
13853 
13854   ins_cost(CALL_COST);
13855 
13856   format %{ "br $jump_target\t# $method_oop holds method oop" %}
13857 
13858   ins_encode(aarch64_enc_tail_call(jump_target));
13859 
13860   ins_pipe(pipe_class_call);
13861 %}
13862 
13863 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
13864 %{
13865   match(TailJump jump_target ex_oop);
13866 
13867   ins_cost(CALL_COST);
13868 
13869   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
13870 
13871   ins_encode(aarch64_enc_tail_jmp(jump_target));
13872 
13873   ins_pipe(pipe_class_call);
13874 %}
13875 
13876 // Create exception oop: created by stack-crawling runtime code.
13877 // Created exception is now available to this handler, and is setup
13878 // just prior to jumping to this handler. No code emitted.
13879 // TODO check
13880 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
13881 instruct CreateException(iRegP_R0 ex_oop)
13882 %{
13883   match(Set ex_oop (CreateEx));
13884 
13885   format %{ " -- \t// exception oop; no code emitted" %}
13886 
13887   size(0);
13888 
13889   ins_encode( /*empty*/ );
13890 
13891   ins_pipe(pipe_class_empty);
13892 %}
13893 
13894 // Rethrow exception: The exception oop will come in the first
13895 // argument position. Then JUMP (not call) to the rethrow stub code.
13896 instruct RethrowException() %{
13897   match(Rethrow);
13898   ins_cost(CALL_COST);
13899 
13900   format %{ "b rethrow_stub" %}
13901 
13902   ins_encode( aarch64_enc_rethrow() );
13903 
13904   ins_pipe(pipe_class_call);
13905 %}
13906 
13907 
13908 // Return Instruction
13909 // epilog node loads ret address into lr as part of frame pop
13910 instruct Ret()
13911 %{
13912   match(Return);
13913 
13914   format %{ "ret\t// return register" %}
13915 
13916   ins_encode( aarch64_enc_ret() );
13917 
13918   ins_pipe(pipe_branch);
13919 %}
13920 
13921 // Die now.
13922 instruct ShouldNotReachHere() %{
13923   match(Halt);
13924 
13925   ins_cost(CALL_COST);
13926   format %{ "ShouldNotReachHere" %}
13927 
13928   ins_encode %{
13929     // TODO
13930     // implement proper trap call here
13931     __ brk(999);
13932   %}
13933 
13934   ins_pipe(pipe_class_default);
13935 %}
13936 
13937 // ============================================================================
13938 // Partial Subtype Check
13939 // 
13940 // superklass array for an instance of the superklass.  Set a hidden
13941 // internal cache on a hit (cache is checked with exposed code in
13942 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
13943 // encoding ALSO sets flags.
13944 
13945 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
13946 %{
13947   match(Set result (PartialSubtypeCheck sub super));
13948   effect(KILL cr, KILL temp);
13949 
13950   ins_cost(1100);  // slightly larger than the next version
13951   format %{ "partialSubtypeCheck $result, $sub, $super" %}
13952 
13953   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13954 
13955   opcode(0x1); // Force zero of result reg on hit
13956 
13957   ins_pipe(pipe_class_memory);
13958 %}
13959 
13960 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
13961 %{
13962   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13963   effect(KILL temp, KILL result);
13964 
13965   ins_cost(1100);  // slightly larger than the next version
13966   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
13967 
13968   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13969 
13970   opcode(0x0); // Don't zero result reg on hit
13971 
13972   ins_pipe(pipe_class_memory);
13973 %}
13974 
13975 instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
13976                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
13977 %{
13978   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
13979   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
13980 
13981   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
13982   ins_encode %{
13983     __ string_compare($str1$$Register, $str2$$Register,
13984                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
13985                       $tmp1$$Register);
13986   %}
13987   ins_pipe(pipe_class_memory);
13988 %}
13989 
13990 instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
13991        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
13992 %{
13993   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
13994   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
13995          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13996   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %}
13997 
13998   ins_encode %{
13999     __ string_indexof($str1$$Register, $str2$$Register,
14000                       $cnt1$$Register, $cnt2$$Register,
14001                       $tmp1$$Register, $tmp2$$Register,
14002                       $tmp3$$Register, $tmp4$$Register,
14003                       -1, $result$$Register);
14004   %}
14005   ins_pipe(pipe_class_memory);
14006 %}
14007 
14008 instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14009                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14010                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14011 %{
14012   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14013   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14014          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14015   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %}
14016 
14017   ins_encode %{
14018     int icnt2 = (int)$int_cnt2$$constant;
14019     __ string_indexof($str1$$Register, $str2$$Register,
14020                       $cnt1$$Register, zr,
14021                       $tmp1$$Register, $tmp2$$Register,
14022                       $tmp3$$Register, $tmp4$$Register,
14023                       icnt2, $result$$Register);
14024   %}
14025   ins_pipe(pipe_class_memory);
14026 %}
14027 
14028 instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
14029                         iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
14030 %{
14031   match(Set result (StrEquals (Binary str1 str2) cnt));
14032   effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
14033 
14034   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp" %}
14035   ins_encode %{
14036     __ string_equals($str1$$Register, $str2$$Register,
14037                       $cnt$$Register, $result$$Register,
14038                       $tmp$$Register);
14039   %}
14040   ins_pipe(pipe_class_memory);
14041 %}
14042 
14043 instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
14044                       iRegP_R10 tmp, rFlagsReg cr)
14045 %{
14046   match(Set result (AryEq ary1 ary2));
14047   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
14048 
14049   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
14050   ins_encode %{
14051     __ char_arrays_equals($ary1$$Register, $ary2$$Register,
14052                           $result$$Register, $tmp$$Register);
14053   %}
14054   ins_pipe(pipe_class_memory);
14055 %}
14056 
14057 // encode char[] to byte[] in ISO_8859_1
14058 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
14059                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
14060                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
14061                           iRegI_R0 result, rFlagsReg cr)
14062 %{
14063   match(Set result (EncodeISOArray src (Binary dst len)));
14064   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
14065          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
14066 
14067   format %{ "Encode array $src,$dst,$len -> $result" %}
14068   ins_encode %{
14069     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
14070          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
14071          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
14072   %}
14073   ins_pipe( pipe_class_memory );
14074 %}
14075 
14076 // ============================================================================
14077 // This name is KNOWN by the ADLC and cannot be changed.
14078 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
14079 // for this guy.
14080 instruct tlsLoadP(thread_RegP dst)
14081 %{
14082   match(Set dst (ThreadLocal));
14083 
14084   ins_cost(0);
14085 
14086   format %{ " -- \t// $dst=Thread::current(), empty" %}
14087 
14088   size(0);
14089 
14090   ins_encode( /*empty*/ );
14091 
14092   ins_pipe(pipe_class_empty);
14093 %}
14094 
14095 // ====================VECTOR INSTRUCTIONS=====================================
14096 
14097 // Load vector (32 bits)
14098 instruct loadV4(vecD dst, vmem4 mem)
14099 %{
14100   predicate(n->as_LoadVector()->memory_size() == 4);
14101   match(Set dst (LoadVector mem));
14102   ins_cost(4 * INSN_COST);
14103   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
14104   ins_encode( aarch64_enc_ldrvS(dst, mem) );
14105   ins_pipe(vload_reg_mem64);
14106 %}
14107 
14108 // Load vector (64 bits)
14109 instruct loadV8(vecD dst, vmem8 mem)
14110 %{
14111   predicate(n->as_LoadVector()->memory_size() == 8);
14112   match(Set dst (LoadVector mem));
14113   ins_cost(4 * INSN_COST);
14114   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
14115   ins_encode( aarch64_enc_ldrvD(dst, mem) );
14116   ins_pipe(vload_reg_mem64);
14117 %}
14118 
14119 // Load Vector (128 bits)
14120 instruct loadV16(vecX dst, vmem16 mem)
14121 %{
14122   predicate(n->as_LoadVector()->memory_size() == 16);
14123   match(Set dst (LoadVector mem));
14124   ins_cost(4 * INSN_COST);
14125   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
14126   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
14127   ins_pipe(vload_reg_mem128);
14128 %}
14129 
14130 // Store Vector (32 bits)
14131 instruct storeV4(vecD src, vmem4 mem)
14132 %{
14133   predicate(n->as_StoreVector()->memory_size() == 4);
14134   match(Set mem (StoreVector mem src));
14135   ins_cost(4 * INSN_COST);
14136   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
14137   ins_encode( aarch64_enc_strvS(src, mem) );
14138   ins_pipe(vstore_reg_mem64);
14139 %}
14140 
14141 // Store Vector (64 bits)
14142 instruct storeV8(vecD src, vmem8 mem)
14143 %{
14144   predicate(n->as_StoreVector()->memory_size() == 8);
14145   match(Set mem (StoreVector mem src));
14146   ins_cost(4 * INSN_COST);
14147   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
14148   ins_encode( aarch64_enc_strvD(src, mem) );
14149   ins_pipe(vstore_reg_mem64);
14150 %}
14151 
14152 // Store Vector (128 bits)
14153 instruct storeV16(vecX src, vmem16 mem)
14154 %{
14155   predicate(n->as_StoreVector()->memory_size() == 16);
14156   match(Set mem (StoreVector mem src));
14157   ins_cost(4 * INSN_COST);
14158   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
14159   ins_encode( aarch64_enc_strvQ(src, mem) );
14160   ins_pipe(vstore_reg_mem128);
14161 %}
14162 
14163 instruct replicate8B(vecD dst, iRegIorL2I src)
14164 %{
14165   predicate(n->as_Vector()->length() == 4 ||
14166             n->as_Vector()->length() == 8);
14167   match(Set dst (ReplicateB src));
14168   ins_cost(INSN_COST);
14169   format %{ "dup  $dst, $src\t# vector (8B)" %}
14170   ins_encode %{
14171     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
14172   %}
14173   ins_pipe(vdup_reg_reg64);
14174 %}
14175 
14176 instruct replicate16B(vecX dst, iRegIorL2I src)
14177 %{
14178   predicate(n->as_Vector()->length() == 16);
14179   match(Set dst (ReplicateB src));
14180   ins_cost(INSN_COST);
14181   format %{ "dup  $dst, $src\t# vector (16B)" %}
14182   ins_encode %{
14183     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
14184   %}
14185   ins_pipe(vdup_reg_reg128);
14186 %}
14187 
14188 instruct replicate8B_imm(vecD dst, immI con)
14189 %{
14190   predicate(n->as_Vector()->length() == 4 ||
14191             n->as_Vector()->length() == 8);
14192   match(Set dst (ReplicateB con));
14193   ins_cost(INSN_COST);
14194   format %{ "movi  $dst, $con\t# vector(8B)" %}
14195   ins_encode %{
14196     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
14197   %}
14198   ins_pipe(vmovi_reg_imm64);
14199 %}
14200 
14201 instruct replicate16B_imm(vecX dst, immI con)
14202 %{
14203   predicate(n->as_Vector()->length() == 16);
14204   match(Set dst (ReplicateB con));
14205   ins_cost(INSN_COST);
14206   format %{ "movi  $dst, $con\t# vector(16B)" %}
14207   ins_encode %{
14208     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
14209   %}
14210   ins_pipe(vmovi_reg_imm128);
14211 %}
14212 
14213 instruct replicate4S(vecD dst, iRegIorL2I src)
14214 %{
14215   predicate(n->as_Vector()->length() == 2 ||
14216             n->as_Vector()->length() == 4);
14217   match(Set dst (ReplicateS src));
14218   ins_cost(INSN_COST);
14219   format %{ "dup  $dst, $src\t# vector (4S)" %}
14220   ins_encode %{
14221     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
14222   %}
14223   ins_pipe(vdup_reg_reg64);
14224 %}
14225 
14226 instruct replicate8S(vecX dst, iRegIorL2I src)
14227 %{
14228   predicate(n->as_Vector()->length() == 8);
14229   match(Set dst (ReplicateS src));
14230   ins_cost(INSN_COST);
14231   format %{ "dup  $dst, $src\t# vector (8S)" %}
14232   ins_encode %{
14233     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
14234   %}
14235   ins_pipe(vdup_reg_reg128);
14236 %}
14237 
14238 instruct replicate4S_imm(vecD dst, immI con)
14239 %{
14240   predicate(n->as_Vector()->length() == 2 ||
14241             n->as_Vector()->length() == 4);
14242   match(Set dst (ReplicateS con));
14243   ins_cost(INSN_COST);
14244   format %{ "movi  $dst, $con\t# vector(4H)" %}
14245   ins_encode %{
14246     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
14247   %}
14248   ins_pipe(vmovi_reg_imm64);
14249 %}
14250 
14251 instruct replicate8S_imm(vecX dst, immI con)
14252 %{
14253   predicate(n->as_Vector()->length() == 8);
14254   match(Set dst (ReplicateS con));
14255   ins_cost(INSN_COST);
14256   format %{ "movi  $dst, $con\t# vector(8H)" %}
14257   ins_encode %{
14258     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
14259   %}
14260   ins_pipe(vmovi_reg_imm128);
14261 %}
14262 
14263 instruct replicate2I(vecD dst, iRegIorL2I src)
14264 %{
14265   predicate(n->as_Vector()->length() == 2);
14266   match(Set dst (ReplicateI src));
14267   ins_cost(INSN_COST);
14268   format %{ "dup  $dst, $src\t# vector (2I)" %}
14269   ins_encode %{
14270     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
14271   %}
14272   ins_pipe(vdup_reg_reg64);
14273 %}
14274 
14275 instruct replicate4I(vecX dst, iRegIorL2I src)
14276 %{
14277   predicate(n->as_Vector()->length() == 4);
14278   match(Set dst (ReplicateI src));
14279   ins_cost(INSN_COST);
14280   format %{ "dup  $dst, $src\t# vector (4I)" %}
14281   ins_encode %{
14282     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
14283   %}
14284   ins_pipe(vdup_reg_reg128);
14285 %}
14286 
14287 instruct replicate2I_imm(vecD dst, immI con)
14288 %{
14289   predicate(n->as_Vector()->length() == 2);
14290   match(Set dst (ReplicateI con));
14291   ins_cost(INSN_COST);
14292   format %{ "movi  $dst, $con\t# vector(2I)" %}
14293   ins_encode %{
14294     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
14295   %}
14296   ins_pipe(vmovi_reg_imm64);
14297 %}
14298 
14299 instruct replicate4I_imm(vecX dst, immI con)
14300 %{
14301   predicate(n->as_Vector()->length() == 4);
14302   match(Set dst (ReplicateI con));
14303   ins_cost(INSN_COST);
14304   format %{ "movi  $dst, $con\t# vector(4I)" %}
14305   ins_encode %{
14306     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
14307   %}
14308   ins_pipe(vmovi_reg_imm128);
14309 %}
14310 
14311 instruct replicate2L(vecX dst, iRegL src)
14312 %{
14313   predicate(n->as_Vector()->length() == 2);
14314   match(Set dst (ReplicateL src));
14315   ins_cost(INSN_COST);
14316   format %{ "dup  $dst, $src\t# vector (2L)" %}
14317   ins_encode %{
14318     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
14319   %}
14320   ins_pipe(vdup_reg_reg128);
14321 %}
14322 
14323 instruct replicate2L_zero(vecX dst, immI0 zero)
14324 %{
14325   predicate(n->as_Vector()->length() == 2);
14326   match(Set dst (ReplicateI zero));
14327   ins_cost(INSN_COST);
14328   format %{ "movi  $dst, $zero\t# vector(4I)" %}
14329   ins_encode %{
14330     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14331            as_FloatRegister($dst$$reg),
14332            as_FloatRegister($dst$$reg));
14333   %}
14334   ins_pipe(vmovi_reg_imm128);
14335 %}
14336 
14337 instruct replicate2F(vecD dst, vRegF src)
14338 %{
14339   predicate(n->as_Vector()->length() == 2);
14340   match(Set dst (ReplicateF src));
14341   ins_cost(INSN_COST);
14342   format %{ "dup  $dst, $src\t# vector (2F)" %}
14343   ins_encode %{
14344     __ dup(as_FloatRegister($dst$$reg), __ T2S,
14345            as_FloatRegister($src$$reg));
14346   %}
14347   ins_pipe(vdup_reg_freg64);
14348 %}
14349 
14350 instruct replicate4F(vecX dst, vRegF src)
14351 %{
14352   predicate(n->as_Vector()->length() == 4);
14353   match(Set dst (ReplicateF src));
14354   ins_cost(INSN_COST);
14355   format %{ "dup  $dst, $src\t# vector (4F)" %}
14356   ins_encode %{
14357     __ dup(as_FloatRegister($dst$$reg), __ T4S,
14358            as_FloatRegister($src$$reg));
14359   %}
14360   ins_pipe(vdup_reg_freg128);
14361 %}
14362 
14363 instruct replicate2D(vecX dst, vRegD src)
14364 %{
14365   predicate(n->as_Vector()->length() == 2);
14366   match(Set dst (ReplicateD src));
14367   ins_cost(INSN_COST);
14368   format %{ "dup  $dst, $src\t# vector (2D)" %}
14369   ins_encode %{
14370     __ dup(as_FloatRegister($dst$$reg), __ T2D,
14371            as_FloatRegister($src$$reg));
14372   %}
14373   ins_pipe(vdup_reg_dreg128);
14374 %}
14375 
14376 // ====================VECTOR ARITHMETIC=======================================
14377 
14378 // --------------------------------- ADD --------------------------------------
14379 
14380 instruct vadd8B(vecD dst, vecD src1, vecD src2)
14381 %{
14382   predicate(n->as_Vector()->length() == 4 ||
14383             n->as_Vector()->length() == 8);
14384   match(Set dst (AddVB src1 src2));
14385   ins_cost(INSN_COST);
14386   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
14387   ins_encode %{
14388     __ addv(as_FloatRegister($dst$$reg), __ T8B,
14389             as_FloatRegister($src1$$reg),
14390             as_FloatRegister($src2$$reg));
14391   %}
14392   ins_pipe(vdop64);
14393 %}
14394 
14395 instruct vadd16B(vecX dst, vecX src1, vecX src2)
14396 %{
14397   predicate(n->as_Vector()->length() == 16);
14398   match(Set dst (AddVB src1 src2));
14399   ins_cost(INSN_COST);
14400   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
14401   ins_encode %{
14402     __ addv(as_FloatRegister($dst$$reg), __ T16B,
14403             as_FloatRegister($src1$$reg),
14404             as_FloatRegister($src2$$reg));
14405   %}
14406   ins_pipe(vdop128);
14407 %}
14408 
14409 instruct vadd4S(vecD dst, vecD src1, vecD src2)
14410 %{
14411   predicate(n->as_Vector()->length() == 2 ||
14412             n->as_Vector()->length() == 4);
14413   match(Set dst (AddVS src1 src2));
14414   ins_cost(INSN_COST);
14415   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
14416   ins_encode %{
14417     __ addv(as_FloatRegister($dst$$reg), __ T4H,
14418             as_FloatRegister($src1$$reg),
14419             as_FloatRegister($src2$$reg));
14420   %}
14421   ins_pipe(vdop64);
14422 %}
14423 
14424 instruct vadd8S(vecX dst, vecX src1, vecX src2)
14425 %{
14426   predicate(n->as_Vector()->length() == 8);
14427   match(Set dst (AddVS src1 src2));
14428   ins_cost(INSN_COST);
14429   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
14430   ins_encode %{
14431     __ addv(as_FloatRegister($dst$$reg), __ T8H,
14432             as_FloatRegister($src1$$reg),
14433             as_FloatRegister($src2$$reg));
14434   %}
14435   ins_pipe(vdop128);
14436 %}
14437 
14438 instruct vadd2I(vecD dst, vecD src1, vecD src2)
14439 %{
14440   predicate(n->as_Vector()->length() == 2);
14441   match(Set dst (AddVI src1 src2));
14442   ins_cost(INSN_COST);
14443   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
14444   ins_encode %{
14445     __ addv(as_FloatRegister($dst$$reg), __ T2S,
14446             as_FloatRegister($src1$$reg),
14447             as_FloatRegister($src2$$reg));
14448   %}
14449   ins_pipe(vdop64);
14450 %}
14451 
14452 instruct vadd4I(vecX dst, vecX src1, vecX src2)
14453 %{
14454   predicate(n->as_Vector()->length() == 4);
14455   match(Set dst (AddVI src1 src2));
14456   ins_cost(INSN_COST);
14457   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
14458   ins_encode %{
14459     __ addv(as_FloatRegister($dst$$reg), __ T4S,
14460             as_FloatRegister($src1$$reg),
14461             as_FloatRegister($src2$$reg));
14462   %}
14463   ins_pipe(vdop128);
14464 %}
14465 
14466 instruct vadd2L(vecX dst, vecX src1, vecX src2)
14467 %{
14468   predicate(n->as_Vector()->length() == 2);
14469   match(Set dst (AddVL src1 src2));
14470   ins_cost(INSN_COST);
14471   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
14472   ins_encode %{
14473     __ addv(as_FloatRegister($dst$$reg), __ T2D,
14474             as_FloatRegister($src1$$reg),
14475             as_FloatRegister($src2$$reg));
14476   %}
14477   ins_pipe(vdop128);
14478 %}
14479 
14480 instruct vadd2F(vecD dst, vecD src1, vecD src2)
14481 %{
14482   predicate(n->as_Vector()->length() == 2);
14483   match(Set dst (AddVF src1 src2));
14484   ins_cost(INSN_COST);
14485   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
14486   ins_encode %{
14487     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
14488             as_FloatRegister($src1$$reg),
14489             as_FloatRegister($src2$$reg));
14490   %}
14491   ins_pipe(vdop_fp64);
14492 %}
14493 
14494 instruct vadd4F(vecX dst, vecX src1, vecX src2)
14495 %{
14496   predicate(n->as_Vector()->length() == 4);
14497   match(Set dst (AddVF src1 src2));
14498   ins_cost(INSN_COST);
14499   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
14500   ins_encode %{
14501     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
14502             as_FloatRegister($src1$$reg),
14503             as_FloatRegister($src2$$reg));
14504   %}
14505   ins_pipe(vdop_fp128);
14506 %}
14507 
14508 instruct vadd2D(vecX dst, vecX src1, vecX src2)
14509 %{
14510   match(Set dst (AddVD src1 src2));
14511   ins_cost(INSN_COST);
14512   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
14513   ins_encode %{
14514     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
14515             as_FloatRegister($src1$$reg),
14516             as_FloatRegister($src2$$reg));
14517   %}
14518   ins_pipe(vdop_fp128);
14519 %}
14520 
14521 // --------------------------------- SUB --------------------------------------
14522 
14523 instruct vsub8B(vecD dst, vecD src1, vecD src2)
14524 %{
14525   predicate(n->as_Vector()->length() == 4 ||
14526             n->as_Vector()->length() == 8);
14527   match(Set dst (SubVB src1 src2));
14528   ins_cost(INSN_COST);
14529   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
14530   ins_encode %{
14531     __ subv(as_FloatRegister($dst$$reg), __ T8B,
14532             as_FloatRegister($src1$$reg),
14533             as_FloatRegister($src2$$reg));
14534   %}
14535   ins_pipe(vdop64);
14536 %}
14537 
14538 instruct vsub16B(vecX dst, vecX src1, vecX src2)
14539 %{
14540   predicate(n->as_Vector()->length() == 16);
14541   match(Set dst (SubVB src1 src2));
14542   ins_cost(INSN_COST);
14543   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
14544   ins_encode %{
14545     __ subv(as_FloatRegister($dst$$reg), __ T16B,
14546             as_FloatRegister($src1$$reg),
14547             as_FloatRegister($src2$$reg));
14548   %}
14549   ins_pipe(vdop128);
14550 %}
14551 
14552 instruct vsub4S(vecD dst, vecD src1, vecD src2)
14553 %{
14554   predicate(n->as_Vector()->length() == 2 ||
14555             n->as_Vector()->length() == 4);
14556   match(Set dst (SubVS src1 src2));
14557   ins_cost(INSN_COST);
14558   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
14559   ins_encode %{
14560     __ subv(as_FloatRegister($dst$$reg), __ T4H,
14561             as_FloatRegister($src1$$reg),
14562             as_FloatRegister($src2$$reg));
14563   %}
14564   ins_pipe(vdop64);
14565 %}
14566 
14567 instruct vsub8S(vecX dst, vecX src1, vecX src2)
14568 %{
14569   predicate(n->as_Vector()->length() == 8);
14570   match(Set dst (SubVS src1 src2));
14571   ins_cost(INSN_COST);
14572   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
14573   ins_encode %{
14574     __ subv(as_FloatRegister($dst$$reg), __ T8H,
14575             as_FloatRegister($src1$$reg),
14576             as_FloatRegister($src2$$reg));
14577   %}
14578   ins_pipe(vdop128);
14579 %}
14580 
14581 instruct vsub2I(vecD dst, vecD src1, vecD src2)
14582 %{
14583   predicate(n->as_Vector()->length() == 2);
14584   match(Set dst (SubVI src1 src2));
14585   ins_cost(INSN_COST);
14586   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
14587   ins_encode %{
14588     __ subv(as_FloatRegister($dst$$reg), __ T2S,
14589             as_FloatRegister($src1$$reg),
14590             as_FloatRegister($src2$$reg));
14591   %}
14592   ins_pipe(vdop64);
14593 %}
14594 
14595 instruct vsub4I(vecX dst, vecX src1, vecX src2)
14596 %{
14597   predicate(n->as_Vector()->length() == 4);
14598   match(Set dst (SubVI src1 src2));
14599   ins_cost(INSN_COST);
14600   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
14601   ins_encode %{
14602     __ subv(as_FloatRegister($dst$$reg), __ T4S,
14603             as_FloatRegister($src1$$reg),
14604             as_FloatRegister($src2$$reg));
14605   %}
14606   ins_pipe(vdop128);
14607 %}
14608 
14609 instruct vsub2L(vecX dst, vecX src1, vecX src2)
14610 %{
14611   predicate(n->as_Vector()->length() == 2);
14612   match(Set dst (SubVL src1 src2));
14613   ins_cost(INSN_COST);
14614   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
14615   ins_encode %{
14616     __ subv(as_FloatRegister($dst$$reg), __ T2D,
14617             as_FloatRegister($src1$$reg),
14618             as_FloatRegister($src2$$reg));
14619   %}
14620   ins_pipe(vdop128);
14621 %}
14622 
14623 instruct vsub2F(vecD dst, vecD src1, vecD src2)
14624 %{
14625   predicate(n->as_Vector()->length() == 2);
14626   match(Set dst (SubVF src1 src2));
14627   ins_cost(INSN_COST);
14628   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
14629   ins_encode %{
14630     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
14631             as_FloatRegister($src1$$reg),
14632             as_FloatRegister($src2$$reg));
14633   %}
14634   ins_pipe(vdop_fp64);
14635 %}
14636 
14637 instruct vsub4F(vecX dst, vecX src1, vecX src2)
14638 %{
14639   predicate(n->as_Vector()->length() == 4);
14640   match(Set dst (SubVF src1 src2));
14641   ins_cost(INSN_COST);
14642   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
14643   ins_encode %{
14644     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
14645             as_FloatRegister($src1$$reg),
14646             as_FloatRegister($src2$$reg));
14647   %}
14648   ins_pipe(vdop_fp128);
14649 %}
14650 
14651 instruct vsub2D(vecX dst, vecX src1, vecX src2)
14652 %{
14653   predicate(n->as_Vector()->length() == 2);
14654   match(Set dst (SubVD src1 src2));
14655   ins_cost(INSN_COST);
14656   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
14657   ins_encode %{
14658     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
14659             as_FloatRegister($src1$$reg),
14660             as_FloatRegister($src2$$reg));
14661   %}
14662   ins_pipe(vdop_fp128);
14663 %}
14664 
14665 // --------------------------------- MUL --------------------------------------
14666 
14667 instruct vmul4S(vecD dst, vecD src1, vecD src2)
14668 %{
14669   predicate(n->as_Vector()->length() == 2 ||
14670             n->as_Vector()->length() == 4);
14671   match(Set dst (MulVS src1 src2));
14672   ins_cost(INSN_COST);
14673   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
14674   ins_encode %{
14675     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
14676             as_FloatRegister($src1$$reg),
14677             as_FloatRegister($src2$$reg));
14678   %}
14679   ins_pipe(vmul64);
14680 %}
14681 
14682 instruct vmul8S(vecX dst, vecX src1, vecX src2)
14683 %{
14684   predicate(n->as_Vector()->length() == 8);
14685   match(Set dst (MulVS src1 src2));
14686   ins_cost(INSN_COST);
14687   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
14688   ins_encode %{
14689     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
14690             as_FloatRegister($src1$$reg),
14691             as_FloatRegister($src2$$reg));
14692   %}
14693   ins_pipe(vmul128);
14694 %}
14695 
14696 instruct vmul2I(vecD dst, vecD src1, vecD src2)
14697 %{
14698   predicate(n->as_Vector()->length() == 2);
14699   match(Set dst (MulVI src1 src2));
14700   ins_cost(INSN_COST);
14701   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
14702   ins_encode %{
14703     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
14704             as_FloatRegister($src1$$reg),
14705             as_FloatRegister($src2$$reg));
14706   %}
14707   ins_pipe(vmul64);
14708 %}
14709 
14710 instruct vmul4I(vecX dst, vecX src1, vecX src2)
14711 %{
14712   predicate(n->as_Vector()->length() == 4);
14713   match(Set dst (MulVI src1 src2));
14714   ins_cost(INSN_COST);
14715   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
14716   ins_encode %{
14717     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
14718             as_FloatRegister($src1$$reg),
14719             as_FloatRegister($src2$$reg));
14720   %}
14721   ins_pipe(vmul128);
14722 %}
14723 
14724 instruct vmul2F(vecD dst, vecD src1, vecD src2)
14725 %{
14726   predicate(n->as_Vector()->length() == 2);
14727   match(Set dst (MulVF src1 src2));
14728   ins_cost(INSN_COST);
14729   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
14730   ins_encode %{
14731     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
14732             as_FloatRegister($src1$$reg),
14733             as_FloatRegister($src2$$reg));
14734   %}
14735   ins_pipe(vmuldiv_fp64);
14736 %}
14737 
14738 instruct vmul4F(vecX dst, vecX src1, vecX src2)
14739 %{
14740   predicate(n->as_Vector()->length() == 4);
14741   match(Set dst (MulVF src1 src2));
14742   ins_cost(INSN_COST);
14743   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
14744   ins_encode %{
14745     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
14746             as_FloatRegister($src1$$reg),
14747             as_FloatRegister($src2$$reg));
14748   %}
14749   ins_pipe(vmuldiv_fp128);
14750 %}
14751 
14752 instruct vmul2D(vecX dst, vecX src1, vecX src2)
14753 %{
14754   predicate(n->as_Vector()->length() == 2);
14755   match(Set dst (MulVD src1 src2));
14756   ins_cost(INSN_COST);
14757   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
14758   ins_encode %{
14759     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
14760             as_FloatRegister($src1$$reg),
14761             as_FloatRegister($src2$$reg));
14762   %}
14763   ins_pipe(vmuldiv_fp128);
14764 %}
14765 
14766 // --------------------------------- MLA --------------------------------------
14767 
14768 instruct vmla4S(vecD dst, vecD src1, vecD src2)
14769 %{
14770   predicate(n->as_Vector()->length() == 2 ||
14771             n->as_Vector()->length() == 4);
14772   match(Set dst (AddVS dst (MulVS src1 src2)));
14773   ins_cost(INSN_COST);
14774   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
14775   ins_encode %{
14776     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
14777             as_FloatRegister($src1$$reg),
14778             as_FloatRegister($src2$$reg));
14779   %}
14780   ins_pipe(vmla64);
14781 %}
14782 
14783 instruct vmla8S(vecX dst, vecX src1, vecX src2)
14784 %{
14785   predicate(n->as_Vector()->length() == 8);
14786   match(Set dst (AddVS dst (MulVS src1 src2)));
14787   ins_cost(INSN_COST);
14788   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
14789   ins_encode %{
14790     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
14791             as_FloatRegister($src1$$reg),
14792             as_FloatRegister($src2$$reg));
14793   %}
14794   ins_pipe(vmla128);
14795 %}
14796 
14797 instruct vmla2I(vecD dst, vecD src1, vecD src2)
14798 %{
14799   predicate(n->as_Vector()->length() == 2);
14800   match(Set dst (AddVI dst (MulVI src1 src2)));
14801   ins_cost(INSN_COST);
14802   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
14803   ins_encode %{
14804     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
14805             as_FloatRegister($src1$$reg),
14806             as_FloatRegister($src2$$reg));
14807   %}
14808   ins_pipe(vmla64);
14809 %}
14810 
14811 instruct vmla4I(vecX dst, vecX src1, vecX src2)
14812 %{
14813   predicate(n->as_Vector()->length() == 4);
14814   match(Set dst (AddVI dst (MulVI src1 src2)));
14815   ins_cost(INSN_COST);
14816   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
14817   ins_encode %{
14818     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
14819             as_FloatRegister($src1$$reg),
14820             as_FloatRegister($src2$$reg));
14821   %}
14822   ins_pipe(vmla128);
14823 %}
14824 
14825 // --------------------------------- MLS --------------------------------------
14826 
14827 instruct vmls4S(vecD dst, vecD src1, vecD src2)
14828 %{
14829   predicate(n->as_Vector()->length() == 2 ||
14830             n->as_Vector()->length() == 4);
14831   match(Set dst (SubVS dst (MulVS src1 src2)));
14832   ins_cost(INSN_COST);
14833   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
14834   ins_encode %{
14835     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
14836             as_FloatRegister($src1$$reg),
14837             as_FloatRegister($src2$$reg));
14838   %}
14839   ins_pipe(vmla64);
14840 %}
14841 
14842 instruct vmls8S(vecX dst, vecX src1, vecX src2)
14843 %{
14844   predicate(n->as_Vector()->length() == 8);
14845   match(Set dst (SubVS dst (MulVS src1 src2)));
14846   ins_cost(INSN_COST);
14847   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
14848   ins_encode %{
14849     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
14850             as_FloatRegister($src1$$reg),
14851             as_FloatRegister($src2$$reg));
14852   %}
14853   ins_pipe(vmla128);
14854 %}
14855 
14856 instruct vmls2I(vecD dst, vecD src1, vecD src2)
14857 %{
14858   predicate(n->as_Vector()->length() == 2);
14859   match(Set dst (SubVI dst (MulVI src1 src2)));
14860   ins_cost(INSN_COST);
14861   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
14862   ins_encode %{
14863     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
14864             as_FloatRegister($src1$$reg),
14865             as_FloatRegister($src2$$reg));
14866   %}
14867   ins_pipe(vmla64);
14868 %}
14869 
14870 instruct vmls4I(vecX dst, vecX src1, vecX src2)
14871 %{
14872   predicate(n->as_Vector()->length() == 4);
14873   match(Set dst (SubVI dst (MulVI src1 src2)));
14874   ins_cost(INSN_COST);
14875   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
14876   ins_encode %{
14877     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
14878             as_FloatRegister($src1$$reg),
14879             as_FloatRegister($src2$$reg));
14880   %}
14881   ins_pipe(vmla128);
14882 %}
14883 
14884 // --------------------------------- DIV --------------------------------------
14885 
14886 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
14887 %{
14888   predicate(n->as_Vector()->length() == 2);
14889   match(Set dst (DivVF src1 src2));
14890   ins_cost(INSN_COST);
14891   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
14892   ins_encode %{
14893     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
14894             as_FloatRegister($src1$$reg),
14895             as_FloatRegister($src2$$reg));
14896   %}
14897   ins_pipe(vmuldiv_fp64);
14898 %}
14899 
14900 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
14901 %{
14902   predicate(n->as_Vector()->length() == 4);
14903   match(Set dst (DivVF src1 src2));
14904   ins_cost(INSN_COST);
14905   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
14906   ins_encode %{
14907     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
14908             as_FloatRegister($src1$$reg),
14909             as_FloatRegister($src2$$reg));
14910   %}
14911   ins_pipe(vmuldiv_fp128);
14912 %}
14913 
14914 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
14915 %{
14916   predicate(n->as_Vector()->length() == 2);
14917   match(Set dst (DivVD src1 src2));
14918   ins_cost(INSN_COST);
14919   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
14920   ins_encode %{
14921     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
14922             as_FloatRegister($src1$$reg),
14923             as_FloatRegister($src2$$reg));
14924   %}
14925   ins_pipe(vmuldiv_fp128);
14926 %}
14927 
14928 // --------------------------------- AND --------------------------------------
14929 
14930 instruct vand8B(vecD dst, vecD src1, vecD src2)
14931 %{
14932   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14933             n->as_Vector()->length_in_bytes() == 8);
14934   match(Set dst (AndV src1 src2));
14935   ins_cost(INSN_COST);
14936   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14937   ins_encode %{
14938     __ andr(as_FloatRegister($dst$$reg), __ T8B,
14939             as_FloatRegister($src1$$reg),
14940             as_FloatRegister($src2$$reg));
14941   %}
14942   ins_pipe(vlogical64);
14943 %}
14944 
14945 instruct vand16B(vecX dst, vecX src1, vecX src2)
14946 %{
14947   predicate(n->as_Vector()->length_in_bytes() == 16);
14948   match(Set dst (AndV src1 src2));
14949   ins_cost(INSN_COST);
14950   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
14951   ins_encode %{
14952     __ andr(as_FloatRegister($dst$$reg), __ T16B,
14953             as_FloatRegister($src1$$reg),
14954             as_FloatRegister($src2$$reg));
14955   %}
14956   ins_pipe(vlogical128);
14957 %}
14958 
14959 // --------------------------------- OR ---------------------------------------
14960 
14961 instruct vor8B(vecD dst, vecD src1, vecD src2)
14962 %{
14963   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14964             n->as_Vector()->length_in_bytes() == 8);
14965   match(Set dst (OrV src1 src2));
14966   ins_cost(INSN_COST);
14967   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14968   ins_encode %{
14969     __ orr(as_FloatRegister($dst$$reg), __ T8B,
14970             as_FloatRegister($src1$$reg),
14971             as_FloatRegister($src2$$reg));
14972   %}
14973   ins_pipe(vlogical64);
14974 %}
14975 
14976 instruct vor16B(vecX dst, vecX src1, vecX src2)
14977 %{
14978   predicate(n->as_Vector()->length_in_bytes() == 16);
14979   match(Set dst (OrV src1 src2));
14980   ins_cost(INSN_COST);
14981   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
14982   ins_encode %{
14983     __ orr(as_FloatRegister($dst$$reg), __ T16B,
14984             as_FloatRegister($src1$$reg),
14985             as_FloatRegister($src2$$reg));
14986   %}
14987   ins_pipe(vlogical128);
14988 %}
14989 
14990 // --------------------------------- XOR --------------------------------------
14991 
14992 instruct vxor8B(vecD dst, vecD src1, vecD src2)
14993 %{
14994   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14995             n->as_Vector()->length_in_bytes() == 8);
14996   match(Set dst (XorV src1 src2));
14997   ins_cost(INSN_COST);
14998   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
14999   ins_encode %{
15000     __ eor(as_FloatRegister($dst$$reg), __ T8B,
15001             as_FloatRegister($src1$$reg),
15002             as_FloatRegister($src2$$reg));
15003   %}
15004   ins_pipe(vlogical64);
15005 %}
15006 
15007 instruct vxor16B(vecX dst, vecX src1, vecX src2)
15008 %{
15009   predicate(n->as_Vector()->length_in_bytes() == 16);
15010   match(Set dst (XorV src1 src2));
15011   ins_cost(INSN_COST);
15012   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
15013   ins_encode %{
15014     __ eor(as_FloatRegister($dst$$reg), __ T16B,
15015             as_FloatRegister($src1$$reg),
15016             as_FloatRegister($src2$$reg));
15017   %}
15018   ins_pipe(vlogical128);
15019 %}
15020 
15021 // ------------------------------ Shift ---------------------------------------
15022 instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
15023   predicate(n->as_Vector()->length_in_bytes() == 8);
15024   match(Set dst (LShiftCntV cnt));
15025   match(Set dst (RShiftCntV cnt));
15026   format %{ "dup  $dst, $cnt\t# shift count vector (8B)" %}
15027   ins_encode %{
15028     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg));
15029   %}
15030   ins_pipe(vdup_reg_reg64);
15031 %}
15032 
15033 instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{
15034   predicate(n->as_Vector()->length_in_bytes() == 16);
15035   match(Set dst (LShiftCntV cnt));
15036   match(Set dst (RShiftCntV cnt));
15037   format %{ "dup  $dst, $cnt\t# shift count vector (16B)" %}
15038   ins_encode %{
15039     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
15040   %}
15041   ins_pipe(vdup_reg_reg128);
15042 %}
15043 
15044 instruct vsll8B(vecD dst, vecD src, vecD shift) %{
15045   predicate(n->as_Vector()->length() == 4 ||
15046             n->as_Vector()->length() == 8);
15047   match(Set dst (LShiftVB src shift));
15048   ins_cost(INSN_COST);
15049   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
15050   ins_encode %{
15051     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
15052             as_FloatRegister($src$$reg),
15053             as_FloatRegister($shift$$reg));
15054   %}
15055   ins_pipe(vshift64);
15056 %}
15057 
15058 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
15059   predicate(n->as_Vector()->length() == 16);
15060   match(Set dst (LShiftVB src shift));
15061   ins_cost(INSN_COST);
15062   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
15063   ins_encode %{
15064     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
15065             as_FloatRegister($src$$reg),
15066             as_FloatRegister($shift$$reg));
15067   %}
15068   ins_pipe(vshift128);
15069 %}
15070 
15071 // Right shifts with vector shift count on aarch64 SIMD are implemented
15072 // as left shift by negative shift count.
15073 // There are two cases for vector shift count.
15074 //
15075 // Case 1: The vector shift count is from replication.
15076 //        |            |
15077 //    LoadVector  RShiftCntV
15078 //        |       /
15079 //     RShiftVI
15080 // Note: In inner loop, multiple neg instructions are used, which can be
15081 // moved to outer loop and merge into one neg instruction.
15082 //
15083 // Case 2: The vector shift count is from loading.
15084 // This case isn't supported by middle-end now. But it's supported by
15085 // panama/vectorIntrinsics(JEP 338: Vector API).
15086 //        |            |
15087 //    LoadVector  LoadVector
15088 //        |       /
15089 //     RShiftVI
15090 //
15091 
15092 instruct vsra8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
15093   predicate(n->as_Vector()->length() == 4 ||
15094             n->as_Vector()->length() == 8);
15095   match(Set dst (RShiftVB src shift));
15096   ins_cost(INSN_COST);
15097   effect(TEMP tmp);
15098   format %{ "negr  $tmp,$shift\t"
15099             "sshl  $dst,$src,$tmp\t# vector (8B)" %}
15100   ins_encode %{
15101     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15102             as_FloatRegister($shift$$reg));
15103     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
15104             as_FloatRegister($src$$reg),
15105             as_FloatRegister($tmp$$reg));
15106   %}
15107   ins_pipe(vshift64);
15108 %}
15109 
15110 instruct vsra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
15111   predicate(n->as_Vector()->length() == 16);
15112   match(Set dst (RShiftVB src shift));
15113   ins_cost(INSN_COST);
15114   effect(TEMP tmp);
15115   format %{ "negr  $tmp,$shift\t"
15116             "sshl  $dst,$src,$tmp\t# vector (16B)" %}
15117   ins_encode %{
15118     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15119             as_FloatRegister($shift$$reg));
15120     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
15121             as_FloatRegister($src$$reg),
15122             as_FloatRegister($tmp$$reg));
15123   %}
15124   ins_pipe(vshift128);
15125 %}
15126 
15127 instruct vsrl8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
15128   predicate(n->as_Vector()->length() == 4 ||
15129             n->as_Vector()->length() == 8);
15130   match(Set dst (URShiftVB src shift));
15131   ins_cost(INSN_COST);
15132   effect(TEMP tmp);
15133   format %{ "negr  $tmp,$shift\t"
15134             "ushl  $dst,$src,$tmp\t# vector (8B)" %}
15135   ins_encode %{
15136     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15137             as_FloatRegister($shift$$reg));
15138     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
15139             as_FloatRegister($src$$reg),
15140             as_FloatRegister($tmp$$reg));
15141   %}
15142   ins_pipe(vshift64);
15143 %}
15144 
15145 instruct vsrl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
15146   predicate(n->as_Vector()->length() == 16);
15147   match(Set dst (URShiftVB src shift));
15148   ins_cost(INSN_COST);
15149   effect(TEMP tmp);
15150   format %{ "negr  $tmp,$shift\t"
15151             "ushl  $dst,$src,$tmp\t# vector (16B)" %}
15152   ins_encode %{
15153     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15154             as_FloatRegister($shift$$reg));
15155     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
15156             as_FloatRegister($src$$reg),
15157             as_FloatRegister($tmp$$reg));
15158   %}
15159   ins_pipe(vshift128);
15160 %}
15161 
15162 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
15163   predicate(n->as_Vector()->length() == 4 ||
15164             n->as_Vector()->length() == 8);
15165   match(Set dst (LShiftVB src shift));
15166   ins_cost(INSN_COST);
15167   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
15168   ins_encode %{
15169     int sh = (int)$shift$$constant & 31;
15170     if (sh >= 8) {
15171       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15172              as_FloatRegister($src$$reg),
15173              as_FloatRegister($src$$reg));
15174     } else {
15175       __ shl(as_FloatRegister($dst$$reg), __ T8B,
15176              as_FloatRegister($src$$reg), sh);
15177     }
15178   %}
15179   ins_pipe(vshift64_imm);
15180 %}
15181 
15182 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
15183   predicate(n->as_Vector()->length() == 16);
15184   match(Set dst (LShiftVB src shift));
15185   ins_cost(INSN_COST);
15186   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
15187   ins_encode %{
15188     int sh = (int)$shift$$constant & 31;
15189     if (sh >= 8) {
15190       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15191              as_FloatRegister($src$$reg),
15192              as_FloatRegister($src$$reg));
15193     } else {
15194       __ shl(as_FloatRegister($dst$$reg), __ T16B,
15195              as_FloatRegister($src$$reg), sh);
15196     }
15197   %}
15198   ins_pipe(vshift128_imm);
15199 %}
15200 
15201 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
15202   predicate(n->as_Vector()->length() == 4 ||
15203             n->as_Vector()->length() == 8);
15204   match(Set dst (RShiftVB src shift));
15205   ins_cost(INSN_COST);
15206   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
15207   ins_encode %{
15208     int sh = (int)$shift$$constant & 31;
15209     if (sh >= 8) sh = 7;
15210     sh = -sh & 7;
15211     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
15212            as_FloatRegister($src$$reg), sh);
15213   %}
15214   ins_pipe(vshift64_imm);
15215 %}
15216 
15217 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
15218   predicate(n->as_Vector()->length() == 16);
15219   match(Set dst (RShiftVB src shift));
15220   ins_cost(INSN_COST);
15221   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
15222   ins_encode %{
15223     int sh = (int)$shift$$constant & 31;
15224     if (sh >= 8) sh = 7;
15225     sh = -sh & 7;
15226     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
15227            as_FloatRegister($src$$reg), sh);
15228   %}
15229   ins_pipe(vshift128_imm);
15230 %}
15231 
15232 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
15233   predicate(n->as_Vector()->length() == 4 ||
15234             n->as_Vector()->length() == 8);
15235   match(Set dst (URShiftVB src shift));
15236   ins_cost(INSN_COST);
15237   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
15238   ins_encode %{
15239     int sh = (int)$shift$$constant & 31;
15240     if (sh >= 8) {
15241       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15242              as_FloatRegister($src$$reg),
15243              as_FloatRegister($src$$reg));
15244     } else {
15245       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
15246              as_FloatRegister($src$$reg), -sh & 7);
15247     }
15248   %}
15249   ins_pipe(vshift64_imm);
15250 %}
15251 
15252 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
15253   predicate(n->as_Vector()->length() == 16);
15254   match(Set dst (URShiftVB src shift));
15255   ins_cost(INSN_COST);
15256   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
15257   ins_encode %{
15258     int sh = (int)$shift$$constant & 31;
15259     if (sh >= 8) {
15260       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15261              as_FloatRegister($src$$reg),
15262              as_FloatRegister($src$$reg));
15263     } else {
15264       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
15265              as_FloatRegister($src$$reg), -sh & 7);
15266     }
15267   %}
15268   ins_pipe(vshift128_imm);
15269 %}
15270 
15271 instruct vsll4S(vecD dst, vecD src, vecD shift) %{
15272   predicate(n->as_Vector()->length() == 2 ||
15273             n->as_Vector()->length() == 4);
15274   match(Set dst (LShiftVS src shift));
15275   ins_cost(INSN_COST);
15276   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
15277   ins_encode %{
15278     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
15279             as_FloatRegister($src$$reg),
15280             as_FloatRegister($shift$$reg));
15281   %}
15282   ins_pipe(vshift64);
15283 %}
15284 
15285 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
15286   predicate(n->as_Vector()->length() == 8);
15287   match(Set dst (LShiftVS src shift));
15288   ins_cost(INSN_COST);
15289   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
15290   ins_encode %{
15291     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
15292             as_FloatRegister($src$$reg),
15293             as_FloatRegister($shift$$reg));
15294   %}
15295   ins_pipe(vshift128);
15296 %}
15297 
15298 instruct vsra4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
15299   predicate(n->as_Vector()->length() == 2 ||
15300             n->as_Vector()->length() == 4);
15301   match(Set dst (RShiftVS src shift));
15302   ins_cost(INSN_COST);
15303   effect(TEMP tmp);
15304   format %{ "negr  $tmp,$shift\t"
15305             "sshl  $dst,$src,$tmp\t# vector (4H)" %}
15306   ins_encode %{
15307     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15308             as_FloatRegister($shift$$reg));
15309     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
15310             as_FloatRegister($src$$reg),
15311             as_FloatRegister($tmp$$reg));
15312   %}
15313   ins_pipe(vshift64);
15314 %}
15315 
15316 instruct vsra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
15317   predicate(n->as_Vector()->length() == 8);
15318   match(Set dst (RShiftVS src shift));
15319   ins_cost(INSN_COST);
15320   effect(TEMP tmp);
15321   format %{ "negr  $tmp,$shift\t"
15322             "sshl  $dst,$src,$tmp\t# vector (8H)" %}
15323   ins_encode %{
15324     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15325             as_FloatRegister($shift$$reg));
15326     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
15327             as_FloatRegister($src$$reg),
15328             as_FloatRegister($tmp$$reg));
15329   %}
15330   ins_pipe(vshift128);
15331 %}
15332 
15333 instruct vsrl4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
15334   predicate(n->as_Vector()->length() == 2 ||
15335             n->as_Vector()->length() == 4);
15336   match(Set dst (URShiftVS src shift));
15337   ins_cost(INSN_COST);
15338   effect(TEMP tmp);
15339   format %{ "negr  $tmp,$shift\t"
15340             "ushl  $dst,$src,$tmp\t# vector (4H)" %}
15341   ins_encode %{
15342     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15343             as_FloatRegister($shift$$reg));
15344     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
15345             as_FloatRegister($src$$reg),
15346             as_FloatRegister($tmp$$reg));
15347   %}
15348   ins_pipe(vshift64);
15349 %}
15350 
15351 instruct vsrl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
15352   predicate(n->as_Vector()->length() == 8);
15353   match(Set dst (URShiftVS src shift));
15354   ins_cost(INSN_COST);
15355   effect(TEMP tmp);
15356   format %{ "negr  $tmp,$shift\t"
15357             "ushl  $dst,$src,$tmp\t# vector (8H)" %}
15358   ins_encode %{
15359     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15360             as_FloatRegister($shift$$reg));
15361     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
15362             as_FloatRegister($src$$reg),
15363             as_FloatRegister($tmp$$reg));
15364   %}
15365   ins_pipe(vshift128);
15366 %}
15367 
15368 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
15369   predicate(n->as_Vector()->length() == 2 ||
15370             n->as_Vector()->length() == 4);
15371   match(Set dst (LShiftVS src shift));
15372   ins_cost(INSN_COST);
15373   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
15374   ins_encode %{
15375     int sh = (int)$shift$$constant & 31;
15376     if (sh >= 16) {
15377       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15378              as_FloatRegister($src$$reg),
15379              as_FloatRegister($src$$reg));
15380     } else {
15381       __ shl(as_FloatRegister($dst$$reg), __ T4H,
15382              as_FloatRegister($src$$reg), sh);
15383     }
15384   %}
15385   ins_pipe(vshift64_imm);
15386 %}
15387 
15388 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
15389   predicate(n->as_Vector()->length() == 8);
15390   match(Set dst (LShiftVS src shift));
15391   ins_cost(INSN_COST);
15392   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
15393   ins_encode %{
15394     int sh = (int)$shift$$constant & 31;
15395     if (sh >= 16) {
15396       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15397              as_FloatRegister($src$$reg),
15398              as_FloatRegister($src$$reg));
15399     } else {
15400       __ shl(as_FloatRegister($dst$$reg), __ T8H,
15401              as_FloatRegister($src$$reg), sh);
15402     }
15403   %}
15404   ins_pipe(vshift128_imm);
15405 %}
15406 
15407 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
15408   predicate(n->as_Vector()->length() == 2 ||
15409             n->as_Vector()->length() == 4);
15410   match(Set dst (RShiftVS src shift));
15411   ins_cost(INSN_COST);
15412   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
15413   ins_encode %{
15414     int sh = (int)$shift$$constant & 31;
15415     if (sh >= 16) sh = 15;
15416     sh = -sh & 15;
15417     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
15418            as_FloatRegister($src$$reg), sh);
15419   %}
15420   ins_pipe(vshift64_imm);
15421 %}
15422 
15423 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
15424   predicate(n->as_Vector()->length() == 8);
15425   match(Set dst (RShiftVS src shift));
15426   ins_cost(INSN_COST);
15427   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
15428   ins_encode %{
15429     int sh = (int)$shift$$constant & 31;
15430     if (sh >= 16) sh = 15;
15431     sh = -sh & 15;
15432     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
15433            as_FloatRegister($src$$reg), sh);
15434   %}
15435   ins_pipe(vshift128_imm);
15436 %}
15437 
15438 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
15439   predicate(n->as_Vector()->length() == 2 ||
15440             n->as_Vector()->length() == 4);
15441   match(Set dst (URShiftVS src shift));
15442   ins_cost(INSN_COST);
15443   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
15444   ins_encode %{
15445     int sh = (int)$shift$$constant & 31;
15446     if (sh >= 16) {
15447       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15448              as_FloatRegister($src$$reg),
15449              as_FloatRegister($src$$reg));
15450     } else {
15451       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
15452              as_FloatRegister($src$$reg), -sh & 15);
15453     }
15454   %}
15455   ins_pipe(vshift64_imm);
15456 %}
15457 
15458 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
15459   predicate(n->as_Vector()->length() == 8);
15460   match(Set dst (URShiftVS src shift));
15461   ins_cost(INSN_COST);
15462   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
15463   ins_encode %{
15464     int sh = (int)$shift$$constant & 31;
15465     if (sh >= 16) {
15466       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15467              as_FloatRegister($src$$reg),
15468              as_FloatRegister($src$$reg));
15469     } else {
15470       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
15471              as_FloatRegister($src$$reg), -sh & 15);
15472     }
15473   %}
15474   ins_pipe(vshift128_imm);
15475 %}
15476 
15477 instruct vsll2I(vecD dst, vecD src, vecD shift) %{
15478   predicate(n->as_Vector()->length() == 2);
15479   match(Set dst (LShiftVI src shift));
15480   ins_cost(INSN_COST);
15481   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
15482   ins_encode %{
15483     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
15484             as_FloatRegister($src$$reg),
15485             as_FloatRegister($shift$$reg));
15486   %}
15487   ins_pipe(vshift64);
15488 %}
15489 
15490 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
15491   predicate(n->as_Vector()->length() == 4);
15492   match(Set dst (LShiftVI src shift));
15493   ins_cost(INSN_COST);
15494   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
15495   ins_encode %{
15496     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
15497             as_FloatRegister($src$$reg),
15498             as_FloatRegister($shift$$reg));
15499   %}
15500   ins_pipe(vshift128);
15501 %}
15502 
15503 instruct vsra2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
15504   predicate(n->as_Vector()->length() == 2);
15505   match(Set dst (RShiftVI src shift));
15506   ins_cost(INSN_COST);
15507   effect(TEMP tmp);
15508   format %{ "negr  $tmp,$shift\t"
15509             "sshl  $dst,$src,$tmp\t# vector (2S)" %}
15510   ins_encode %{
15511     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15512             as_FloatRegister($shift$$reg));
15513     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
15514             as_FloatRegister($src$$reg),
15515             as_FloatRegister($tmp$$reg));
15516   %}
15517   ins_pipe(vshift64);
15518 %}
15519 
15520 instruct vsra4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
15521   predicate(n->as_Vector()->length() == 4);
15522   match(Set dst (RShiftVI src shift));
15523   ins_cost(INSN_COST);
15524   effect(TEMP tmp);
15525   format %{ "negr  $tmp,$shift\t"
15526             "sshl  $dst,$src,$tmp\t# vector (4S)" %}
15527   ins_encode %{
15528     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15529             as_FloatRegister($shift$$reg));
15530     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
15531             as_FloatRegister($src$$reg),
15532             as_FloatRegister($tmp$$reg));
15533   %}
15534   ins_pipe(vshift128);
15535 %}
15536 
15537 instruct vsrl2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
15538   predicate(n->as_Vector()->length() == 2);
15539   match(Set dst (URShiftVI src shift));
15540   ins_cost(INSN_COST);
15541   effect(TEMP tmp);
15542   format %{ "negr  $tmp,$shift\t"
15543             "ushl  $dst,$src,$tmp\t# vector (2S)" %}
15544   ins_encode %{
15545     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15546             as_FloatRegister($shift$$reg));
15547     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
15548             as_FloatRegister($src$$reg),
15549             as_FloatRegister($tmp$$reg));
15550   %}
15551   ins_pipe(vshift64);
15552 %}
15553 
15554 instruct vsrl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
15555   predicate(n->as_Vector()->length() == 4);
15556   match(Set dst (URShiftVI src shift));
15557   ins_cost(INSN_COST);
15558   effect(TEMP tmp);
15559   format %{ "negr  $tmp,$shift\t"
15560             "ushl  $dst,$src,$tmp\t# vector (4S)" %}
15561   ins_encode %{
15562     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15563             as_FloatRegister($shift$$reg));
15564     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
15565             as_FloatRegister($src$$reg),
15566             as_FloatRegister($tmp$$reg));
15567   %}
15568   ins_pipe(vshift128);
15569 %}
15570 
15571 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
15572   predicate(n->as_Vector()->length() == 2);
15573   match(Set dst (LShiftVI src shift));
15574   ins_cost(INSN_COST);
15575   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
15576   ins_encode %{
15577     __ shl(as_FloatRegister($dst$$reg), __ T2S,
15578            as_FloatRegister($src$$reg),
15579            (int)$shift$$constant & 31);
15580   %}
15581   ins_pipe(vshift64_imm);
15582 %}
15583 
15584 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
15585   predicate(n->as_Vector()->length() == 4);
15586   match(Set dst (LShiftVI src shift));
15587   ins_cost(INSN_COST);
15588   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
15589   ins_encode %{
15590     __ shl(as_FloatRegister($dst$$reg), __ T4S,
15591            as_FloatRegister($src$$reg),
15592            (int)$shift$$constant & 31);
15593   %}
15594   ins_pipe(vshift128_imm);
15595 %}
15596 
15597 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
15598   predicate(n->as_Vector()->length() == 2);
15599   match(Set dst (RShiftVI src shift));
15600   ins_cost(INSN_COST);
15601   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
15602   ins_encode %{
15603     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
15604             as_FloatRegister($src$$reg),
15605             -(int)$shift$$constant & 31);
15606   %}
15607   ins_pipe(vshift64_imm);
15608 %}
15609 
15610 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
15611   predicate(n->as_Vector()->length() == 4);
15612   match(Set dst (RShiftVI src shift));
15613   ins_cost(INSN_COST);
15614   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
15615   ins_encode %{
15616     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
15617             as_FloatRegister($src$$reg),
15618             -(int)$shift$$constant & 31);
15619   %}
15620   ins_pipe(vshift128_imm);
15621 %}
15622 
15623 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
15624   predicate(n->as_Vector()->length() == 2);
15625   match(Set dst (URShiftVI src shift));
15626   ins_cost(INSN_COST);
15627   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
15628   ins_encode %{
15629     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
15630             as_FloatRegister($src$$reg),
15631             -(int)$shift$$constant & 31);
15632   %}
15633   ins_pipe(vshift64_imm);
15634 %}
15635 
15636 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
15637   predicate(n->as_Vector()->length() == 4);
15638   match(Set dst (URShiftVI src shift));
15639   ins_cost(INSN_COST);
15640   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
15641   ins_encode %{
15642     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
15643             as_FloatRegister($src$$reg),
15644             -(int)$shift$$constant & 31);
15645   %}
15646   ins_pipe(vshift128_imm);
15647 %}
15648 
15649 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
15650   predicate(n->as_Vector()->length() == 2);
15651   match(Set dst (LShiftVL src shift));
15652   ins_cost(INSN_COST);
15653   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
15654   ins_encode %{
15655     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
15656             as_FloatRegister($src$$reg),
15657             as_FloatRegister($shift$$reg));
15658   %}
15659   ins_pipe(vshift128);
15660 %}
15661 
15662 instruct vsra2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
15663   predicate(n->as_Vector()->length() == 2);
15664   match(Set dst (RShiftVL src shift));
15665   ins_cost(INSN_COST);
15666   effect(TEMP tmp);
15667   format %{ "negr  $tmp,$shift\t"
15668             "sshl  $dst,$src,$tmp\t# vector (2D)" %}
15669   ins_encode %{
15670     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15671             as_FloatRegister($shift$$reg));
15672     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
15673             as_FloatRegister($src$$reg),
15674             as_FloatRegister($tmp$$reg));
15675   %}
15676   ins_pipe(vshift128);
15677 %}
15678 
15679 instruct vsrl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
15680   predicate(n->as_Vector()->length() == 2);
15681   match(Set dst (URShiftVL src shift));
15682   ins_cost(INSN_COST);
15683   effect(TEMP tmp);
15684   format %{ "negr  $tmp,$shift\t"
15685             "ushl  $dst,$src,$tmp\t# vector (2D)" %}
15686   ins_encode %{
15687     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15688             as_FloatRegister($shift$$reg));
15689     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
15690             as_FloatRegister($src$$reg),
15691             as_FloatRegister($tmp$$reg));
15692   %}
15693   ins_pipe(vshift128);
15694 %}
15695 
15696 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
15697   predicate(n->as_Vector()->length() == 2);
15698   match(Set dst (LShiftVL src shift));
15699   ins_cost(INSN_COST);
15700   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
15701   ins_encode %{
15702     __ shl(as_FloatRegister($dst$$reg), __ T2D,
15703            as_FloatRegister($src$$reg),
15704            (int)$shift$$constant & 63);
15705   %}
15706   ins_pipe(vshift128_imm);
15707 %}
15708 
15709 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
15710   predicate(n->as_Vector()->length() == 2);
15711   match(Set dst (RShiftVL src shift));
15712   ins_cost(INSN_COST);
15713   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
15714   ins_encode %{
15715     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
15716             as_FloatRegister($src$$reg),
15717             -(int)$shift$$constant & 63);
15718   %}
15719   ins_pipe(vshift128_imm);
15720 %}
15721 
15722 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
15723   predicate(n->as_Vector()->length() == 2);
15724   match(Set dst (URShiftVL src shift));
15725   ins_cost(INSN_COST);
15726   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
15727   ins_encode %{
15728     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
15729             as_FloatRegister($src$$reg),
15730             -(int)$shift$$constant & 63);
15731   %}
15732   ins_pipe(vshift128_imm);
15733 %}
15734 
15735 //----------PEEPHOLE RULES-----------------------------------------------------
15736 // These must follow all instruction definitions as they use the names
15737 // defined in the instructions definitions.
15738 //
15739 // peepmatch ( root_instr_name [preceding_instruction]* );
15740 //
15741 // peepconstraint %{
15742 // (instruction_number.operand_name relational_op instruction_number.operand_name
15743 //  [, ...] );
15744 // // instruction numbers are zero-based using left to right order in peepmatch
15745 //
15746 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
15747 // // provide an instruction_number.operand_name for each operand that appears
15748 // // in the replacement instruction's match rule
15749 //
15750 // ---------VM FLAGS---------------------------------------------------------
15751 //
15752 // All peephole optimizations can be turned off using -XX:-OptoPeephole
15753 //
15754 // Each peephole rule is given an identifying number starting with zero and
15755 // increasing by one in the order seen by the parser.  An individual peephole
15756 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
15757 // on the command-line.
15758 //
15759 // ---------CURRENT LIMITATIONS----------------------------------------------
15760 //
15761 // Only match adjacent instructions in same basic block
15762 // Only equality constraints
15763 // Only constraints between operands, not (0.dest_reg == RAX_enc)
15764 // Only one replacement instruction
15765 //
15766 // ---------EXAMPLE----------------------------------------------------------
15767 //
15768 // // pertinent parts of existing instructions in architecture description
15769 // instruct movI(iRegINoSp dst, iRegI src)
15770 // %{
15771 //   match(Set dst (CopyI src));
15772 // %}
15773 //
15774 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
15775 // %{
15776 //   match(Set dst (AddI dst src));
15777 //   effect(KILL cr);
15778 // %}
15779 //
15780 // // Change (inc mov) to lea
15781 // peephole %{
15782 //   // increment preceeded by register-register move
15783 //   peepmatch ( incI_iReg movI );
15784 //   // require that the destination register of the increment
15785 //   // match the destination register of the move
15786 //   peepconstraint ( 0.dst == 1.dst );
15787 //   // construct a replacement instruction that sets
15788 //   // the destination to ( move's source register + one )
15789 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
15790 // %}
15791 //
15792 
15793 // Implementation no longer uses movX instructions since
15794 // machine-independent system no longer uses CopyX nodes.
15795 //
15796 // peephole
15797 // %{
15798 //   peepmatch (incI_iReg movI);
15799 //   peepconstraint (0.dst == 1.dst);
15800 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15801 // %}
15802 
15803 // peephole
15804 // %{
15805 //   peepmatch (decI_iReg movI);
15806 //   peepconstraint (0.dst == 1.dst);
15807 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15808 // %}
15809 
15810 // peephole
15811 // %{
15812 //   peepmatch (addI_iReg_imm movI);
15813 //   peepconstraint (0.dst == 1.dst);
15814 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15815 // %}
15816 
15817 // peephole
15818 // %{
15819 //   peepmatch (incL_iReg movL);
15820 //   peepconstraint (0.dst == 1.dst);
15821 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15822 // %}
15823 
15824 // peephole
15825 // %{
15826 //   peepmatch (decL_iReg movL);
15827 //   peepconstraint (0.dst == 1.dst);
15828 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15829 // %}
15830 
15831 // peephole
15832 // %{
15833 //   peepmatch (addL_iReg_imm movL);
15834 //   peepconstraint (0.dst == 1.dst);
15835 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15836 // %}
15837 
15838 // peephole
15839 // %{
15840 //   peepmatch (addP_iReg_imm movP);
15841 //   peepconstraint (0.dst == 1.dst);
15842 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
15843 // %}
15844 
15845 // // Change load of spilled value to only a spill
15846 // instruct storeI(memory mem, iRegI src)
15847 // %{
15848 //   match(Set mem (StoreI mem src));
15849 // %}
15850 //
15851 // instruct loadI(iRegINoSp dst, memory mem)
15852 // %{
15853 //   match(Set dst (LoadI mem));
15854 // %}
15855 //
15856 
15857 //----------SMARTSPILL RULES---------------------------------------------------
15858 // These must follow all instruction definitions as they use the names
15859 // defined in the instructions definitions.
15860 
15861 // Local Variables:
15862 // mode: c++
15863 // End: