1 //
   2 // Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, 2019, Red Hat Inc.
   4 // All rights reserved.
   5 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6 //
   7 // This code is free software; you can redistribute it and/or modify it
   8 // under the terms of the GNU General Public License version 2 only, as
   9 // published by the Free Software Foundation.
  10 //
  11 // This code is distributed in the hope that it will be useful, but WITHOUT
  12 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 // version 2 for more details (a copy is included in the LICENSE file that
  15 // accompanied this code).
  16 //
  17 // You should have received a copy of the GNU General Public License version
  18 // 2 along with this work; if not, write to the Free Software Foundation,
  19 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20 //
  21 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22 // or visit www.oracle.com if you need additional information or have any
  23 // questions.
  24 //
  25 //
  26 
  27 // AArch64 Architecture Description File
  28 
  29 //----------REGISTER DEFINITION BLOCK------------------------------------------
  30 // This information is used by the matcher and the register allocator to
  31 // describe individual registers and classes of registers within the target
  32 // archtecture.
  33 
  34 register %{
  35 //----------Architecture Description Register Definitions----------------------
  36 // General Registers
  37 // "reg_def"  name ( register save type, C convention save type,
  38 //                   ideal register type, encoding );
  39 // Register Save Types:
  40 //
  41 // NS  = No-Save:       The register allocator assumes that these registers
  42 //                      can be used without saving upon entry to the method, &
  43 //                      that they do not need to be saved at call sites.
  44 //
  45 // SOC = Save-On-Call:  The register allocator assumes that these registers
  46 //                      can be used without saving upon entry to the method,
  47 //                      but that they must be saved at call sites.
  48 //
  49 // SOE = Save-On-Entry: The register allocator assumes that these registers
  50 //                      must be saved before using them upon entry to the
  51 //                      method, but they do not need to be saved at call
  52 //                      sites.
  53 //
  54 // AS  = Always-Save:   The register allocator assumes that these registers
  55 //                      must be saved before using them upon entry to the
  56 //                      method, & that they must be saved at call sites.
  57 //
  58 // Ideal Register Type is used to determine how to save & restore a
  59 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  60 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  61 //
  62 // The encoding number is the actual bit-pattern placed into the opcodes.
  63 
  64 // We must define the 64 bit int registers in two 32 bit halves, the
  65 // real lower register and a virtual upper half register. upper halves
  66 // are used by the register allocator but are not actually supplied as
  67 // operands to memory ops.
  68 //
  69 // follow the C1 compiler in making registers
  70 //
  71 //   r0-r7,r10-r26 volatile (caller save)
  72 //   r27-r32 system (no save, no allocate)
  73 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  74 //
  75 // as regards Java usage. we don't use any callee save registers
  76 // because this makes it difficult to de-optimise a frame (see comment
  77 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  78 //
  79 
  80 // General Registers
  81 
  82 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  83 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  84 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  85 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  86 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  87 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  88 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  89 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  90 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  91 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  92 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  93 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  94 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  95 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  96 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  97 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  98 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  99 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 100 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 101 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 102 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 103 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 104 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 105 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 106 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 107 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 108 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 109 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 110 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 111 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 112 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 113 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 114 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 115 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 116 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 117 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 118 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 119 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 120 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 121 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 122 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 123 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 124 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 125 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 126 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 127 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 128 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 129 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 130 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 131 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 132 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 133 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());  
 134 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 135 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 136 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 137 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 138 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 139 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 140 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 141 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 142 
 143 // ----------------------------
 144 // Float/Double Registers
 145 // ----------------------------
 146 
 147 // Double Registers
 148 
 149 // The rules of ADL require that double registers be defined in pairs.
 150 // Each pair must be two 32-bit values, but not necessarily a pair of
 151 // single float registers. In each pair, ADLC-assigned register numbers
 152 // must be adjacent, with the lower number even. Finally, when the
 153 // CPU stores such a register pair to memory, the word associated with
 154 // the lower ADLC-assigned number must be stored to the lower address.
 155 
 156 // AArch64 has 32 floating-point registers. Each can store a vector of
 157 // single or double precision floating-point values up to 8 * 32
 158 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 159 // use the first float or double element of the vector.
 160 
 161 // for Java use float registers v0-v15 are always save on call whereas
 162 // the platform ABI treats v8-v15 as callee save). float registers
 163 // v16-v31 are SOC as per the platform spec
 164 
 165   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 166   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 167   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 168   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 169 
 170   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 171   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 172   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 173   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 174 
 175   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 176   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 177   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 178   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 179 
 180   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 181   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 182   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 183   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 184 
 185   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 186   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 187   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 188   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 189 
 190   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 191   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 192   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 193   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 194 
 195   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 196   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 197   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 198   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 199 
 200   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 201   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 202   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 203   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 204 
 205   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 206   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 207   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 208   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 209 
 210   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 211   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 212   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 213   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 214 
 215   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 216   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 217   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 218   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 219 
 220   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 221   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 222   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 223   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 224 
 225   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 226   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 227   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 228   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 229 
 230   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 231   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 232   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 233   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 234 
 235   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 236   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 237   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 238   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 239 
 240   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 241   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 242   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 243   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 244 
 245   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 246   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 247   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 248   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 249 
 250   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 251   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 252   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 253   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 254 
 255   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 256   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 257   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 258   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 259 
 260   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 261   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 262   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 263   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 264 
 265   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 266   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 267   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 268   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 269 
 270   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 271   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 272   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 273   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 274 
 275   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 276   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 277   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 278   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 279 
 280   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 281   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 282   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 283   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 284 
 285   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 286   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 287   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 288   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 289 
 290   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 291   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 292   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 293   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 294 
 295   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 296   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 297   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 298   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 299 
 300   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 301   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 302   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 303   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 304 
 305   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 306   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 307   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 308   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 309 
 310   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 311   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 312   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 313   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 314 
 315   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 316   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 317   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 318   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 319 
 320   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 321   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 322   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 323   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 324 
 325 // ----------------------------
 326 // Special Registers
 327 // ----------------------------
 328 
 329 // the AArch64 CSPR status flag register is not directly acessible as
 330 // instruction operand. the FPSR status flag register is a system
 331 // register which can be written/read using MSR/MRS but again does not
 332 // appear as an operand (a code identifying the FSPR occurs as an
 333 // immediate value in the instruction).
 334 
 335 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 336 
 337 
 338 // Specify priority of register selection within phases of register
 339 // allocation.  Highest priority is first.  A useful heuristic is to
 340 // give registers a low priority when they are required by machine
 341 // instructions, like EAX and EDX on I486, and choose no-save registers
 342 // before save-on-call, & save-on-call before save-on-entry.  Registers
 343 // which participate in fixed calling sequences should come last.
 344 // Registers which are used as pairs must fall on an even boundary.
 345 
 346 alloc_class chunk0(
 347     // volatiles
 348     R10, R10_H,
 349     R11, R11_H,
 350     R12, R12_H,
 351     R13, R13_H,
 352     R14, R14_H,
 353     R15, R15_H,
 354     R16, R16_H,
 355     R17, R17_H,
 356     R18, R18_H,
 357 
 358     // arg registers
 359     R0, R0_H,
 360     R1, R1_H,
 361     R2, R2_H,
 362     R3, R3_H,
 363     R4, R4_H,
 364     R5, R5_H,
 365     R6, R6_H,
 366     R7, R7_H,
 367 
 368     // non-volatiles
 369     R19, R19_H,
 370     R20, R20_H,
 371     R21, R21_H,
 372     R22, R22_H,
 373     R23, R23_H,
 374     R24, R24_H,
 375     R25, R25_H,
 376     R26, R26_H,
 377     
 378     // non-allocatable registers
 379 
 380     R27, R27_H, // heapbase
 381     R28, R28_H, // thread
 382     R29, R29_H, // fp
 383     R30, R30_H, // lr
 384     R31, R31_H, // sp
 385 );
 386 
 387 alloc_class chunk1(
 388 
 389     // no save
 390     V16, V16_H, V16_J, V16_K,
 391     V17, V17_H, V17_J, V17_K,
 392     V18, V18_H, V18_J, V18_K,
 393     V19, V19_H, V19_J, V19_K,
 394     V20, V20_H, V20_J, V20_K,
 395     V21, V21_H, V21_J, V21_K,
 396     V22, V22_H, V22_J, V22_K,
 397     V23, V23_H, V23_J, V23_K,
 398     V24, V24_H, V24_J, V24_K,
 399     V25, V25_H, V25_J, V25_K,
 400     V26, V26_H, V26_J, V26_K,
 401     V27, V27_H, V27_J, V27_K,
 402     V28, V28_H, V28_J, V28_K,
 403     V29, V29_H, V29_J, V29_K,
 404     V30, V30_H, V30_J, V30_K,
 405     V31, V31_H, V31_J, V31_K,
 406 
 407     // arg registers
 408     V0, V0_H, V0_J, V0_K,
 409     V1, V1_H, V1_J, V1_K,
 410     V2, V2_H, V2_J, V2_K,
 411     V3, V3_H, V3_J, V3_K,
 412     V4, V4_H, V4_J, V4_K,
 413     V5, V5_H, V5_J, V5_K,
 414     V6, V6_H, V6_J, V6_K,
 415     V7, V7_H, V7_J, V7_K,
 416 
 417     // non-volatiles
 418     V8, V8_H, V8_J, V8_K,
 419     V9, V9_H, V9_J, V9_K,
 420     V10, V10_H, V10_J, V10_K,
 421     V11, V11_H, V11_J, V11_K,
 422     V12, V12_H, V12_J, V12_K,
 423     V13, V13_H, V13_J, V13_K,
 424     V14, V14_H, V14_J, V14_K,
 425     V15, V15_H, V15_J, V15_K,
 426 );
 427 
 428 alloc_class chunk2(RFLAGS);
 429 
 430 //----------Architecture Description Register Classes--------------------------
 431 // Several register classes are automatically defined based upon information in
 432 // this architecture description.
 433 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 434 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 435 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 436 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 437 //
 438 
 439 // Class for all 32 bit integer registers -- excludes SP which will
 440 // never be used as an integer register
 441 reg_class any_reg32(
 442     R0,
 443     R1,
 444     R2,
 445     R3,
 446     R4,
 447     R5,
 448     R6,
 449     R7,
 450     R10,
 451     R11,
 452     R12,
 453     R13,
 454     R14,
 455     R15,
 456     R16,
 457     R17,
 458     R18,
 459     R19,
 460     R20,
 461     R21,
 462     R22,
 463     R23,
 464     R24,
 465     R25,
 466     R26,
 467     R27,
 468     R28,
 469     R29,
 470     R30
 471 );
 472 
 473 // Singleton class for R0 int register
 474 reg_class int_r0_reg(R0);
 475 
 476 // Singleton class for R2 int register
 477 reg_class int_r2_reg(R2);
 478 
 479 // Singleton class for R3 int register
 480 reg_class int_r3_reg(R3);
 481 
 482 // Singleton class for R4 int register
 483 reg_class int_r4_reg(R4);
 484 
 485 // Class for all long integer registers (including RSP)
 486 reg_class any_reg(
 487     R0, R0_H,
 488     R1, R1_H,
 489     R2, R2_H,
 490     R3, R3_H,
 491     R4, R4_H,
 492     R5, R5_H,
 493     R6, R6_H,
 494     R7, R7_H,
 495     R10, R10_H,
 496     R11, R11_H,
 497     R12, R12_H,
 498     R13, R13_H,
 499     R14, R14_H,
 500     R15, R15_H,
 501     R16, R16_H,
 502     R17, R17_H,
 503     R18, R18_H,
 504     R19, R19_H,
 505     R20, R20_H,
 506     R21, R21_H,
 507     R22, R22_H,
 508     R23, R23_H,
 509     R24, R24_H,
 510     R25, R25_H,
 511     R26, R26_H,
 512     R27, R27_H,
 513     R28, R28_H,
 514     R29, R29_H,
 515     R30, R30_H,
 516     R31, R31_H
 517 );
 518 
 519 // Class for all non-special integer registers
 520 reg_class no_special_reg32(
 521     R0,
 522     R1,
 523     R2,
 524     R3,
 525     R4,
 526     R5,
 527     R6,
 528     R7,
 529     R10,
 530     R11,
 531     R12,                        // rmethod
 532     R13,
 533     R14,
 534     R15,
 535     R16,
 536     R17,
 537     R18,
 538     R19,
 539     R20,
 540     R21,
 541     R22,
 542     R23,
 543     R24,
 544     R25,
 545     R26
 546  /* R27, */                     // heapbase
 547  /* R28, */                     // thread
 548  /* R29, */                     // fp
 549  /* R30, */                     // lr
 550  /* R31 */                      // sp
 551 );
 552 
 553 // Class for all non-special long integer registers
 554 reg_class no_special_reg(
 555     R0, R0_H,
 556     R1, R1_H,
 557     R2, R2_H,
 558     R3, R3_H,
 559     R4, R4_H,
 560     R5, R5_H,
 561     R6, R6_H,
 562     R7, R7_H,
 563     R10, R10_H,
 564     R11, R11_H,
 565     R12, R12_H,                 // rmethod
 566     R13, R13_H,
 567     R14, R14_H,
 568     R15, R15_H,
 569     R16, R16_H,
 570     R17, R17_H,
 571     R18, R18_H,
 572     R19, R19_H,
 573     R20, R20_H,
 574     R21, R21_H,
 575     R22, R22_H,
 576     R23, R23_H,
 577     R24, R24_H,
 578     R25, R25_H,
 579     R26, R26_H,
 580  /* R27, R27_H, */              // heapbase
 581  /* R28, R28_H, */              // thread
 582  /* R29, R29_H, */              // fp
 583  /* R30, R30_H, */              // lr
 584  /* R31, R31_H */               // sp
 585 );
 586 
 587 // Class for 64 bit register r0
 588 reg_class r0_reg(
 589     R0, R0_H
 590 );
 591 
 592 // Class for 64 bit register r1
 593 reg_class r1_reg(
 594     R1, R1_H
 595 );
 596 
 597 // Class for 64 bit register r2
 598 reg_class r2_reg(
 599     R2, R2_H
 600 );
 601 
 602 // Class for 64 bit register r3
 603 reg_class r3_reg(
 604     R3, R3_H
 605 );
 606 
 607 // Class for 64 bit register r4
 608 reg_class r4_reg(
 609     R4, R4_H
 610 );
 611 
 612 // Class for 64 bit register r5
 613 reg_class r5_reg(
 614     R5, R5_H
 615 );
 616 
 617 // Class for 64 bit register r10
 618 reg_class r10_reg(
 619     R10, R10_H
 620 );
 621 
 622 // Class for 64 bit register r11
 623 reg_class r11_reg(
 624     R11, R11_H
 625 );
 626 
 627 // Class for method register
 628 reg_class method_reg(
 629     R12, R12_H
 630 );
 631 
 632 // Class for heapbase register
 633 reg_class heapbase_reg(
 634     R27, R27_H
 635 );
 636 
 637 // Class for thread register
 638 reg_class thread_reg(
 639     R28, R28_H
 640 );
 641 
 642 // Class for frame pointer register
 643 reg_class fp_reg(
 644     R29, R29_H
 645 );
 646 
 647 // Class for link register
 648 reg_class lr_reg(
 649     R30, R30_H
 650 );
 651 
 652 // Class for long sp register
 653 reg_class sp_reg(
 654   R31, R31_H
 655 );
 656 
 657 // Class for all pointer registers
 658 reg_class ptr_reg(
 659     R0, R0_H,
 660     R1, R1_H,
 661     R2, R2_H,
 662     R3, R3_H,
 663     R4, R4_H,
 664     R5, R5_H,
 665     R6, R6_H,
 666     R7, R7_H,
 667     R10, R10_H,
 668     R11, R11_H,
 669     R12, R12_H,
 670     R13, R13_H,
 671     R14, R14_H,
 672     R15, R15_H,
 673     R16, R16_H,
 674     R17, R17_H,
 675     R18, R18_H,
 676     R19, R19_H,
 677     R20, R20_H,
 678     R21, R21_H,
 679     R22, R22_H,
 680     R23, R23_H,
 681     R24, R24_H,
 682     R25, R25_H,
 683     R26, R26_H,
 684     R27, R27_H,
 685     R28, R28_H,
 686     R29, R29_H,
 687     R30, R30_H,
 688     R31, R31_H
 689 );
 690 
 691 // Class for all non_special pointer registers
 692 reg_class no_special_ptr_reg(
 693     R0, R0_H,
 694     R1, R1_H,
 695     R2, R2_H,
 696     R3, R3_H,
 697     R4, R4_H,
 698     R5, R5_H,
 699     R6, R6_H,
 700     R7, R7_H,
 701     R10, R10_H,
 702     R11, R11_H,
 703     R12, R12_H,
 704     R13, R13_H,
 705     R14, R14_H,
 706     R15, R15_H,
 707     R16, R16_H,
 708     R17, R17_H,
 709     R18, R18_H,
 710     R19, R19_H,
 711     R20, R20_H,
 712     R21, R21_H,
 713     R22, R22_H,
 714     R23, R23_H,
 715     R24, R24_H,
 716     R25, R25_H,
 717     R26, R26_H,
 718  /* R27, R27_H, */              // heapbase
 719  /* R28, R28_H, */              // thread
 720  /* R29, R29_H, */              // fp
 721  /* R30, R30_H, */              // lr
 722  /* R31, R31_H */               // sp
 723 );
 724 
 725 // Class for all float registers
 726 reg_class float_reg(
 727     V0,
 728     V1,
 729     V2,
 730     V3,
 731     V4,
 732     V5,
 733     V6,
 734     V7,
 735     V8,
 736     V9,
 737     V10,
 738     V11,
 739     V12,
 740     V13,
 741     V14,
 742     V15,
 743     V16,
 744     V17,
 745     V18,
 746     V19,
 747     V20,
 748     V21,
 749     V22,
 750     V23,
 751     V24,
 752     V25,
 753     V26,
 754     V27,
 755     V28,
 756     V29,
 757     V30,
 758     V31
 759 );
 760 
 761 // Double precision float registers have virtual `high halves' that
 762 // are needed by the allocator.
 763 // Class for all double registers
 764 reg_class double_reg(
 765     V0, V0_H, 
 766     V1, V1_H, 
 767     V2, V2_H, 
 768     V3, V3_H, 
 769     V4, V4_H, 
 770     V5, V5_H, 
 771     V6, V6_H, 
 772     V7, V7_H, 
 773     V8, V8_H, 
 774     V9, V9_H, 
 775     V10, V10_H, 
 776     V11, V11_H, 
 777     V12, V12_H, 
 778     V13, V13_H, 
 779     V14, V14_H, 
 780     V15, V15_H, 
 781     V16, V16_H, 
 782     V17, V17_H, 
 783     V18, V18_H, 
 784     V19, V19_H, 
 785     V20, V20_H, 
 786     V21, V21_H, 
 787     V22, V22_H, 
 788     V23, V23_H, 
 789     V24, V24_H, 
 790     V25, V25_H, 
 791     V26, V26_H, 
 792     V27, V27_H, 
 793     V28, V28_H, 
 794     V29, V29_H, 
 795     V30, V30_H, 
 796     V31, V31_H
 797 );
 798 
 799 // Class for all 64bit vector registers
 800 reg_class vectord_reg(
 801     V0, V0_H,
 802     V1, V1_H,
 803     V2, V2_H,
 804     V3, V3_H,
 805     V4, V4_H,
 806     V5, V5_H,
 807     V6, V6_H,
 808     V7, V7_H,
 809     V8, V8_H,
 810     V9, V9_H,
 811     V10, V10_H,
 812     V11, V11_H,
 813     V12, V12_H,
 814     V13, V13_H,
 815     V14, V14_H,
 816     V15, V15_H,
 817     V16, V16_H,
 818     V17, V17_H,
 819     V18, V18_H,
 820     V19, V19_H,
 821     V20, V20_H,
 822     V21, V21_H,
 823     V22, V22_H,
 824     V23, V23_H,
 825     V24, V24_H,
 826     V25, V25_H,
 827     V26, V26_H,
 828     V27, V27_H,
 829     V28, V28_H,
 830     V29, V29_H,
 831     V30, V30_H,
 832     V31, V31_H
 833 );
 834 
 835 // Class for all 128bit vector registers
 836 reg_class vectorx_reg(
 837     V0, V0_H, V0_J, V0_K,
 838     V1, V1_H, V1_J, V1_K,
 839     V2, V2_H, V2_J, V2_K,
 840     V3, V3_H, V3_J, V3_K,
 841     V4, V4_H, V4_J, V4_K,
 842     V5, V5_H, V5_J, V5_K,
 843     V6, V6_H, V6_J, V6_K,
 844     V7, V7_H, V7_J, V7_K,
 845     V8, V8_H, V8_J, V8_K,
 846     V9, V9_H, V9_J, V9_K,
 847     V10, V10_H, V10_J, V10_K,
 848     V11, V11_H, V11_J, V11_K,
 849     V12, V12_H, V12_J, V12_K,
 850     V13, V13_H, V13_J, V13_K,
 851     V14, V14_H, V14_J, V14_K,
 852     V15, V15_H, V15_J, V15_K,
 853     V16, V16_H, V16_J, V16_K,
 854     V17, V17_H, V17_J, V17_K,
 855     V18, V18_H, V18_J, V18_K,
 856     V19, V19_H, V19_J, V19_K,
 857     V20, V20_H, V20_J, V20_K,
 858     V21, V21_H, V21_J, V21_K,
 859     V22, V22_H, V22_J, V22_K,
 860     V23, V23_H, V23_J, V23_K,
 861     V24, V24_H, V24_J, V24_K,
 862     V25, V25_H, V25_J, V25_K,
 863     V26, V26_H, V26_J, V26_K,
 864     V27, V27_H, V27_J, V27_K,
 865     V28, V28_H, V28_J, V28_K,
 866     V29, V29_H, V29_J, V29_K,
 867     V30, V30_H, V30_J, V30_K,
 868     V31, V31_H, V31_J, V31_K
 869 );
 870 
 871 // Class for 128 bit register v0
 872 reg_class v0_reg(
 873     V0, V0_H
 874 );
 875 
 876 // Class for 128 bit register v1
 877 reg_class v1_reg(
 878     V1, V1_H
 879 );
 880 
 881 // Class for 128 bit register v2
 882 reg_class v2_reg(
 883     V2, V2_H
 884 );
 885 
 886 // Class for 128 bit register v3
 887 reg_class v3_reg(
 888     V3, V3_H
 889 );
 890 
 891 // Singleton class for condition codes
 892 reg_class int_flags(RFLAGS);
 893 
 894 %}
 895 
 896 //----------DEFINITION BLOCK---------------------------------------------------
 897 // Define name --> value mappings to inform the ADLC of an integer valued name
 898 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 899 // Format:
 900 //        int_def  <name>         ( <int_value>, <expression>);
 901 // Generated Code in ad_<arch>.hpp
 902 //        #define  <name>   (<expression>)
 903 //        // value == <int_value>
 904 // Generated code in ad_<arch>.cpp adlc_verification()
 905 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 906 //
 907 
 908 // we follow the ppc-aix port in using a simple cost model which ranks
 909 // register operations as cheap, memory ops as more expensive and
 910 // branches as most expensive. the first two have a low as well as a
 911 // normal cost. huge cost appears to be a way of saying don't do
 912 // something
 913 
 914 definitions %{
 915   // The default cost (of a register move instruction).
 916   int_def INSN_COST            (    100,     100);
 917   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 918   int_def CALL_COST            (    200,     2 * INSN_COST);
 919   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 920 %}
 921 
 922 
 923 //----------SOURCE BLOCK-------------------------------------------------------
 924 // This is a block of C++ code which provides values, functions, and
 925 // definitions necessary in the rest of the architecture description
 926 
 927 source_hpp %{
 928 
 929 #include "opto/addnode.hpp"
 930 
 931 class CallStubImpl {
 932  
 933   //--------------------------------------------------------------
 934   //---<  Used for optimization in Compile::shorten_branches  >---
 935   //--------------------------------------------------------------
 936 
 937  public:
 938   // Size of call trampoline stub.
 939   static uint size_call_trampoline() {
 940     return 0; // no call trampolines on this platform
 941   }
 942   
 943   // number of relocations needed by a call trampoline stub
 944   static uint reloc_call_trampoline() { 
 945     return 0; // no call trampolines on this platform
 946   }
 947 };
 948 
 949 class HandlerImpl {
 950 
 951  public:
 952 
 953   static int emit_exception_handler(CodeBuffer &cbuf);
 954   static int emit_deopt_handler(CodeBuffer& cbuf);
 955 
 956   static uint size_exception_handler() {
 957     return MacroAssembler::far_branch_size();
 958   }
 959 
 960   static uint size_deopt_handler() {
 961     // count one adr and one far branch instruction
 962     // return 4 * NativeInstruction::instruction_size;
 963     return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
 964   }
 965 };
 966 
 967   bool is_CAS(int opcode);
 968 
 969   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
 970 
 971   bool unnecessary_acquire(const Node *barrier);
 972   bool needs_acquiring_load(const Node *load);
 973 
 974   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
 975 
 976   bool unnecessary_release(const Node *barrier);
 977   bool unnecessary_volatile(const Node *barrier);
 978   bool needs_releasing_store(const Node *store);
 979 
 980   // predicate controlling translation of CompareAndSwapX
 981   bool needs_acquiring_load_exclusive(const Node *load);
 982 
 983   // predicate controlling translation of StoreCM
 984   bool unnecessary_storestore(const Node *storecm);
 985 
 986   // predicate controlling addressing modes
 987   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
 988 %}
 989 
 990 source %{
 991 
 992   // Optimizaton of volatile gets and puts
 993   // -------------------------------------
 994   //
 995   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
 996   // use to implement volatile reads and writes. For a volatile read
 997   // we simply need
 998   //
 999   //   ldar<x>
1000   //
1001   // and for a volatile write we need
1002   //
1003   //   stlr<x>
1004   // 
1005   // Alternatively, we can implement them by pairing a normal
1006   // load/store with a memory barrier. For a volatile read we need
1007   // 
1008   //   ldr<x>
1009   //   dmb ishld
1010   //
1011   // for a volatile write
1012   //
1013   //   dmb ish
1014   //   str<x>
1015   //   dmb ish
1016   //
1017   // We can also use ldaxr and stlxr to implement compare and swap CAS
1018   // sequences. These are normally translated to an instruction
1019   // sequence like the following
1020   //
1021   //   dmb      ish
1022   // retry:
1023   //   ldxr<x>   rval raddr
1024   //   cmp       rval rold
1025   //   b.ne done
1026   //   stlxr<x>  rval, rnew, rold
1027   //   cbnz      rval retry
1028   // done:
1029   //   cset      r0, eq
1030   //   dmb ishld
1031   //
1032   // Note that the exclusive store is already using an stlxr
1033   // instruction. That is required to ensure visibility to other
1034   // threads of the exclusive write (assuming it succeeds) before that
1035   // of any subsequent writes.
1036   //
1037   // The following instruction sequence is an improvement on the above
1038   //
1039   // retry:
1040   //   ldaxr<x>  rval raddr
1041   //   cmp       rval rold
1042   //   b.ne done
1043   //   stlxr<x>  rval, rnew, rold
1044   //   cbnz      rval retry
1045   // done:
1046   //   cset      r0, eq
1047   //
1048   // We don't need the leading dmb ish since the stlxr guarantees
1049   // visibility of prior writes in the case that the swap is
1050   // successful. Crucially we don't have to worry about the case where
1051   // the swap is not successful since no valid program should be
1052   // relying on visibility of prior changes by the attempting thread
1053   // in the case where the CAS fails.
1054   //
1055   // Similarly, we don't need the trailing dmb ishld if we substitute
1056   // an ldaxr instruction since that will provide all the guarantees we
1057   // require regarding observation of changes made by other threads
1058   // before any change to the CAS address observed by the load.
1059   //
1060   // In order to generate the desired instruction sequence we need to
1061   // be able to identify specific 'signature' ideal graph node
1062   // sequences which i) occur as a translation of a volatile reads or
1063   // writes or CAS operations and ii) do not occur through any other
1064   // translation or graph transformation. We can then provide
1065   // alternative aldc matching rules which translate these node
1066   // sequences to the desired machine code sequences. Selection of the
1067   // alternative rules can be implemented by predicates which identify
1068   // the relevant node sequences.
1069   //
1070   // The ideal graph generator translates a volatile read to the node
1071   // sequence
1072   //
1073   //   LoadX[mo_acquire]
1074   //   MemBarAcquire
1075   //
1076   // As a special case when using the compressed oops optimization we
1077   // may also see this variant
1078   //
1079   //   LoadN[mo_acquire]
1080   //   DecodeN
1081   //   MemBarAcquire
1082   //
1083   // A volatile write is translated to the node sequence
1084   //
1085   //   MemBarRelease
1086   //   StoreX[mo_release] {CardMark}-optional
1087   //   MemBarVolatile
1088   //
1089   // n.b. the above node patterns are generated with a strict
1090   // 'signature' configuration of input and output dependencies (see
1091   // the predicates below for exact details). The card mark may be as
1092   // simple as a few extra nodes or, in a few GC configurations, may
1093   // include more complex control flow between the leading and
1094   // trailing memory barriers. However, whatever the card mark
1095   // configuration these signatures are unique to translated volatile
1096   // reads/stores -- they will not appear as a result of any other
1097   // bytecode translation or inlining nor as a consequence of
1098   // optimizing transforms.
1099   //
1100   // We also want to catch inlined unsafe volatile gets and puts and
1101   // be able to implement them using either ldar<x>/stlr<x> or some
1102   // combination of ldr<x>/stlr<x> and dmb instructions.
1103   //
1104   // Inlined unsafe volatiles puts manifest as a minor variant of the
1105   // normal volatile put node sequence containing an extra cpuorder
1106   // membar
1107   //
1108   //   MemBarRelease
1109   //   MemBarCPUOrder
1110   //   StoreX[mo_release] {CardMark}-optional
1111   //   MemBarVolatile
1112   //
1113   // n.b. as an aside, the cpuorder membar is not itself subject to
1114   // matching and translation by adlc rules.  However, the rule
1115   // predicates need to detect its presence in order to correctly
1116   // select the desired adlc rules.
1117   //
1118   // Inlined unsafe volatile gets manifest as a somewhat different
1119   // node sequence to a normal volatile get
1120   //
1121   //   MemBarCPUOrder
1122   //        ||       \\
1123   //   MemBarAcquire LoadX[mo_acquire]
1124   //        ||
1125   //   MemBarCPUOrder
1126   //
1127   // In this case the acquire membar does not directly depend on the
1128   // load. However, we can be sure that the load is generated from an
1129   // inlined unsafe volatile get if we see it dependent on this unique
1130   // sequence of membar nodes. Similarly, given an acquire membar we
1131   // can know that it was added because of an inlined unsafe volatile
1132   // get if it is fed and feeds a cpuorder membar and if its feed
1133   // membar also feeds an acquiring load.
1134   //
1135   // Finally an inlined (Unsafe) CAS operation is translated to the
1136   // following ideal graph
1137   //
1138   //   MemBarRelease
1139   //   MemBarCPUOrder
1140   //   CompareAndSwapX {CardMark}-optional
1141   //   MemBarCPUOrder
1142   //   MemBarAcquire
1143   //
1144   // So, where we can identify these volatile read and write
1145   // signatures we can choose to plant either of the above two code
1146   // sequences. For a volatile read we can simply plant a normal
1147   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1148   // also choose to inhibit translation of the MemBarAcquire and
1149   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1150   //
1151   // When we recognise a volatile store signature we can choose to
1152   // plant at a dmb ish as a translation for the MemBarRelease, a
1153   // normal str<x> and then a dmb ish for the MemBarVolatile.
1154   // Alternatively, we can inhibit translation of the MemBarRelease
1155   // and MemBarVolatile and instead plant a simple stlr<x>
1156   // instruction.
1157   //
1158   // when we recognise a CAS signature we can choose to plant a dmb
1159   // ish as a translation for the MemBarRelease, the conventional
1160   // macro-instruction sequence for the CompareAndSwap node (which
1161   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1162   // Alternatively, we can elide generation of the dmb instructions
1163   // and plant the alternative CompareAndSwap macro-instruction
1164   // sequence (which uses ldaxr<x>).
1165   // 
1166   // Of course, the above only applies when we see these signature
1167   // configurations. We still want to plant dmb instructions in any
1168   // other cases where we may see a MemBarAcquire, MemBarRelease or
1169   // MemBarVolatile. For example, at the end of a constructor which
1170   // writes final/volatile fields we will see a MemBarRelease
1171   // instruction and this needs a 'dmb ish' lest we risk the
1172   // constructed object being visible without making the
1173   // final/volatile field writes visible.
1174   //
1175   // n.b. the translation rules below which rely on detection of the
1176   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1177   // If we see anything other than the signature configurations we
1178   // always just translate the loads and stores to ldr<x> and str<x>
1179   // and translate acquire, release and volatile membars to the
1180   // relevant dmb instructions.
1181   //
1182 
1183   // is_CAS(int opcode)
1184   //
1185   // return true if opcode is one of the possible CompareAndSwapX
1186   // values otherwise false.
1187 
1188   bool is_CAS(int opcode)
1189   {
1190     switch(opcode) {
1191     // We handle these
1192     case Op_CompareAndSwapI:
1193     case Op_CompareAndSwapL:
1194     case Op_CompareAndSwapP:
1195     case Op_CompareAndSwapN:
1196     case Op_GetAndSetI:
1197     case Op_GetAndSetL:
1198     case Op_GetAndSetP:
1199     case Op_GetAndSetN:
1200     case Op_GetAndAddI:
1201     case Op_GetAndAddL:
1202       return true;
1203     default:
1204       return false;
1205     }
1206   }
1207 
1208 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1209 
1210 bool unnecessary_acquire(const Node *barrier)
1211 {
1212   assert(barrier->is_MemBar(), "expecting a membar");
1213 
1214   if (UseBarriersForVolatile) {
1215     // we need to plant a dmb
1216     return false;
1217   }
1218 
1219   MemBarNode* mb = barrier->as_MemBar();
1220 
1221   if (mb->trailing_load()) {
1222     return true;
1223   }
1224 
1225   if (mb->trailing_load_store()) {
1226     Node* load_store = mb->in(MemBarNode::Precedent);
1227     assert(load_store->is_LoadStore(), "unexpected graph shape");
1228     return is_CAS(load_store->Opcode());
1229   }
1230 
1231   return false;
1232 }
1233 
1234 bool needs_acquiring_load(const Node *n)
1235 {
1236   assert(n->is_Load(), "expecting a load");
1237   if (UseBarriersForVolatile) {
1238     // we use a normal load and a dmb
1239     return false;
1240   }
1241 
1242   LoadNode *ld = n->as_Load();
1243 
1244   return ld->is_acquire();
1245 }
1246 
1247 bool unnecessary_release(const Node *n)
1248 {
1249   assert((n->is_MemBar() &&
1250           n->Opcode() == Op_MemBarRelease),
1251          "expecting a release membar");
1252 
1253   if (UseBarriersForVolatile) {
1254     // we need to plant a dmb
1255     return false;
1256   }
1257 
1258   MemBarNode *barrier = n->as_MemBar();
1259 
1260   if (!barrier->leading()) {
1261     return false;
1262   } else {
1263     Node* trailing = barrier->trailing_membar();
1264     MemBarNode* trailing_mb = trailing->as_MemBar();
1265     assert(trailing_mb->trailing(), "Not a trailing membar?");
1266     assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
1267 
1268     Node* mem = trailing_mb->in(MemBarNode::Precedent);
1269     if (mem->is_Store()) {
1270       assert(mem->as_Store()->is_release(), "");
1271       assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
1272       return true;
1273     } else {
1274       assert(mem->is_LoadStore(), "");
1275       assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
1276       return is_CAS(mem->Opcode());
1277     }
1278   }
1279 
1280   return false;
1281 }
1282 
1283 bool unnecessary_volatile(const Node *n)
1284 {
1285   // assert n->is_MemBar();
1286   if (UseBarriersForVolatile) {
1287     // we need to plant a dmb
1288     return false;
1289   }
1290 
1291   MemBarNode *mbvol = n->as_MemBar();
1292 
1293   bool release = mbvol->trailing_store();
1294   assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1295 #ifdef ASSERT
1296   if (release) {
1297     Node* leading = mbvol->leading_membar();
1298     assert(leading->Opcode() == Op_MemBarRelease, "");
1299     assert(leading->as_MemBar()->leading_store(), "");
1300     assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1301    }
1302 #endif
1303 
1304   return release;
1305 }
1306 
1307 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1308 
1309 bool needs_releasing_store(const Node *n)
1310 {
1311   // assert n->is_Store();
1312   if (UseBarriersForVolatile) {
1313     // we use a normal store and dmb combination
1314     return false;
1315   }
1316 
1317   StoreNode *st = n->as_Store();
1318 
1319   return st->trailing_membar() != NULL;
1320 }
1321 
1322 // predicate controlling translation of CAS
1323 //
1324 // returns true if CAS needs to use an acquiring load otherwise false
1325 
1326 bool needs_acquiring_load_exclusive(const Node *n)
1327 {
1328   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
1329   if (UseBarriersForVolatile) {
1330     return false;
1331   }
1332 
1333   LoadStoreNode* ldst = n->as_LoadStore();
1334   assert(ldst->trailing_membar() != NULL, "expected trailing membar");
1335 
1336   // so we can just return true here
1337   return true;
1338 }
1339 
1340 // predicate controlling translation of StoreCM
1341 //
1342 // returns true if a StoreStore must precede the card write otherwise
1343 // false
1344 
1345 bool unnecessary_storestore(const Node *storecm)
1346 {
1347   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
1348 
1349   // we need to generate a dmb ishst between an object put and the
1350   // associated card mark when we are using CMS without conditional
1351   // card marking
1352 
1353   if (UseConcMarkSweepGC && !UseCondCardMark) {
1354     return false;
1355   }
1356 
1357   // a storestore is unnecesary in all other cases
1358 
1359   return true;
1360 }
1361 
1362 
1363 #define __ _masm.
1364 
1365 // advance declaratuons for helper functions to convert register
1366 // indices to register objects
1367 
1368 // the ad file has to provide implementations of certain methods
1369 // expected by the generic code
1370 //
1371 // REQUIRED FUNCTIONALITY
1372 
1373 //=============================================================================
1374 
1375 // !!!!! Special hack to get all types of calls to specify the byte offset
1376 //       from the start of the call to the point where the return address
1377 //       will point.
1378 
1379 int MachCallStaticJavaNode::ret_addr_offset()
1380 {
1381   // call should be a simple bl
1382   // unless this is a method handle invoke in which case it is
1383   // mov(rfp, sp), bl, mov(sp, rfp)
1384   int off = 4;
1385   if (_method_handle_invoke) {
1386     off += 4;
1387   }
1388   return off;
1389 }
1390 
1391 int MachCallDynamicJavaNode::ret_addr_offset()
1392 {
1393   return 16; // movz, movk, movk, bl
1394 }
1395 
1396 int MachCallRuntimeNode::ret_addr_offset() {
1397   // for generated stubs the call will be
1398   //   bl(addr)
1399   // for real runtime callouts it will be six instructions
1400   // see aarch64_enc_java_to_runtime
1401   //   adr(rscratch2, retaddr)
1402   //   lea(rscratch1, RuntimeAddress(addr)
1403   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1404   //   blr(rscratch1)
1405   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1406   if (cb) {
1407     return MacroAssembler::far_branch_size();
1408   } else {
1409     return 6 * NativeInstruction::instruction_size;
1410   }
1411 }
1412 
1413 // Indicate if the safepoint node needs the polling page as an input
1414 
1415 // the shared code plants the oop data at the start of the generated
1416 // code for the safepoint node and that needs ot be at the load
1417 // instruction itself. so we cannot plant a mov of the safepoint poll
1418 // address followed by a load. setting this to true means the mov is
1419 // scheduled as a prior instruction. that's better for scheduling
1420 // anyway.
1421 
1422 bool SafePointNode::needs_polling_address_input()
1423 {
1424   return true;
1425 }
1426 
1427 //=============================================================================
1428 
1429 #ifndef PRODUCT
1430 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1431   st->print("BREAKPOINT");
1432 }
1433 #endif
1434 
1435 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1436   MacroAssembler _masm(&cbuf);
1437   __ brk(0);
1438 }
1439 
1440 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1441   return MachNode::size(ra_);
1442 }
1443 
1444 //=============================================================================
1445 
1446 #ifndef PRODUCT
1447   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1448     st->print("nop \t# %d bytes pad for loops and calls", _count);
1449   }
1450 #endif
1451 
1452   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1453     MacroAssembler _masm(&cbuf);
1454     for (int i = 0; i < _count; i++) { 
1455       __ nop();
1456     }
1457   }
1458 
1459   uint MachNopNode::size(PhaseRegAlloc*) const {
1460     return _count * NativeInstruction::instruction_size;
1461   }
1462 
1463 //=============================================================================
1464 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1465 
1466 int Compile::ConstantTable::calculate_table_base_offset() const {
1467   return 0;  // absolute addressing, no offset
1468 }
1469 
1470 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1471 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1472   ShouldNotReachHere();
1473 }
1474 
1475 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1476   // Empty encoding
1477 }
1478 
1479 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1480   return 0;
1481 }
1482 
1483 #ifndef PRODUCT
1484 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1485   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1486 }
1487 #endif
1488 
1489 #ifndef PRODUCT
1490 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1491   Compile* C = ra_->C;
1492 
1493   int framesize = C->frame_slots() << LogBytesPerInt;
1494 
1495   if (C->need_stack_bang(framesize))
1496     st->print("# stack bang size=%d\n\t", framesize);
1497 
1498   if (framesize == 0) {
1499     // Is this even possible?
1500     st->print("stp  lr, rfp, [sp, #%d]!", -(2 * wordSize)); 
1501   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1502     st->print("sub  sp, sp, #%d\n\t", framesize);
1503     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1504   } else {
1505     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize)); 
1506     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1507     st->print("sub  sp, sp, rscratch1");
1508   }
1509 }
1510 #endif
1511 
1512 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1513   Compile* C = ra_->C;
1514   MacroAssembler _masm(&cbuf);
1515 
1516   // n.b. frame size includes space for return pc and rfp
1517   long framesize = ((long)C->frame_slots()) << LogBytesPerInt;
1518   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
1519 
1520   // insert a nop at the start of the prolog so we can patch in a
1521   // branch if we need to invalidate the method later
1522   __ nop();
1523 
1524   if (C->need_stack_bang(framesize))
1525     __ generate_stack_overflow_check(framesize);
1526 
1527   __ build_frame(framesize);
1528 
1529   if (VerifyStackAtCalls) {
1530     Unimplemented();
1531   }
1532 
1533   C->set_frame_complete(cbuf.insts_size());
1534 
1535   if (C->has_mach_constant_base_node()) {
1536     // NOTE: We set the table base offset here because users might be
1537     // emitted before MachConstantBaseNode.
1538     Compile::ConstantTable& constant_table = C->constant_table();
1539     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1540   }
1541 }
1542 
1543 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1544 {
1545   return MachNode::size(ra_); // too many variables; just compute it
1546                               // the hard way
1547 }
1548 
1549 int MachPrologNode::reloc() const
1550 {
1551   return 0;
1552 }
1553 
1554 //=============================================================================
1555 
1556 #ifndef PRODUCT
1557 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1558   Compile* C = ra_->C;
1559   int framesize = C->frame_slots() << LogBytesPerInt;
1560 
1561   st->print("# pop frame %d\n\t",framesize);
1562 
1563   if (framesize == 0) {
1564     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1565   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1566     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1567     st->print("add  sp, sp, #%d\n\t", framesize);
1568   } else {
1569     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1570     st->print("add  sp, sp, rscratch1\n\t");
1571     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1572   }
1573 
1574   if (do_polling() && C->is_method_compilation()) {
1575     st->print("# touch polling page\n\t");
1576     st->print("mov  rscratch1, #" INTPTR_FORMAT "\n\t", p2i(os::get_polling_page()));
1577     st->print("ldr zr, [rscratch1]");
1578   }
1579 }
1580 #endif
1581 
1582 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1583   Compile* C = ra_->C;
1584   MacroAssembler _masm(&cbuf);
1585   int framesize = C->frame_slots() << LogBytesPerInt;
1586 
1587   __ remove_frame(framesize);
1588 
1589   if (do_polling() && C->is_method_compilation()) {
1590     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
1591   }
1592 }
1593 
1594 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1595   // Variable size. Determine dynamically.
1596   return MachNode::size(ra_);
1597 }
1598 
1599 int MachEpilogNode::reloc() const {
1600   // Return number of relocatable values contained in this instruction.
1601   return 1; // 1 for polling page.
1602 }
1603 
1604 const Pipeline * MachEpilogNode::pipeline() const {
1605   return MachNode::pipeline_class();
1606 }
1607 
1608 // This method seems to be obsolete. It is declared in machnode.hpp
1609 // and defined in all *.ad files, but it is never called. Should we
1610 // get rid of it?
1611 int MachEpilogNode::safepoint_offset() const {
1612   assert(do_polling(), "no return for this epilog node");
1613   return 4;
1614 }
1615 
1616 //=============================================================================
1617 
1618 // Figure out which register class each belongs in: rc_int, rc_float or
1619 // rc_stack.
1620 enum RC { rc_bad, rc_int, rc_float, rc_stack };
1621 
1622 static enum RC rc_class(OptoReg::Name reg) {
1623 
1624   if (reg == OptoReg::Bad) {
1625     return rc_bad;
1626   }
1627 
1628   // we have 30 int registers * 2 halves
1629   // (rscratch1 and rscratch2 are omitted)
1630 
1631   if (reg < 60) {
1632     return rc_int;
1633   }
1634 
1635   // we have 32 float register * 2 halves
1636   if (reg < 60 + 128) {
1637     return rc_float;
1638   }
1639 
1640   // Between float regs & stack is the flags regs.
1641   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
1642 
1643   return rc_stack;
1644 }
1645 
1646 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
1647   Compile* C = ra_->C;
1648 
1649   // Get registers to move.
1650   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
1651   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
1652   OptoReg::Name dst_hi = ra_->get_reg_second(this);
1653   OptoReg::Name dst_lo = ra_->get_reg_first(this);
1654 
1655   enum RC src_hi_rc = rc_class(src_hi);
1656   enum RC src_lo_rc = rc_class(src_lo);
1657   enum RC dst_hi_rc = rc_class(dst_hi);
1658   enum RC dst_lo_rc = rc_class(dst_lo);
1659 
1660   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
1661 
1662   if (src_hi != OptoReg::Bad) {
1663     assert((src_lo&1)==0 && src_lo+1==src_hi &&
1664            (dst_lo&1)==0 && dst_lo+1==dst_hi,
1665            "expected aligned-adjacent pairs");
1666   }
1667 
1668   if (src_lo == dst_lo && src_hi == dst_hi) {
1669     return 0;            // Self copy, no move.
1670   }
1671 
1672   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
1673               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
1674   int src_offset = ra_->reg2offset(src_lo);
1675   int dst_offset = ra_->reg2offset(dst_lo);
1676 
1677   if (bottom_type()->isa_vect() != NULL) {
1678     uint ireg = ideal_reg();
1679     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
1680     if (cbuf) {
1681       MacroAssembler _masm(cbuf);
1682       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
1683       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
1684         // stack->stack
1685         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
1686         if (ireg == Op_VecD) {
1687           __ unspill(rscratch1, true, src_offset);
1688           __ spill(rscratch1, true, dst_offset);
1689         } else {
1690           __ spill_copy128(src_offset, dst_offset);
1691         }
1692       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
1693         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1694                ireg == Op_VecD ? __ T8B : __ T16B,
1695                as_FloatRegister(Matcher::_regEncode[src_lo]));
1696       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
1697         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1698                        ireg == Op_VecD ? __ D : __ Q,
1699                        ra_->reg2offset(dst_lo));
1700       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
1701         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1702                        ireg == Op_VecD ? __ D : __ Q,
1703                        ra_->reg2offset(src_lo));
1704       } else {
1705         ShouldNotReachHere();
1706       }
1707     }
1708   } else if (cbuf) {
1709     MacroAssembler _masm(cbuf);
1710     switch (src_lo_rc) {
1711     case rc_int:
1712       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
1713         if (is64) {
1714             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
1715                    as_Register(Matcher::_regEncode[src_lo]));
1716         } else {
1717             MacroAssembler _masm(cbuf);
1718             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
1719                     as_Register(Matcher::_regEncode[src_lo]));
1720         }
1721       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
1722         if (is64) {
1723             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1724                      as_Register(Matcher::_regEncode[src_lo]));
1725         } else {
1726             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1727                      as_Register(Matcher::_regEncode[src_lo]));
1728         }
1729       } else {                    // gpr --> stack spill
1730         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1731         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
1732       }
1733       break;
1734     case rc_float:
1735       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
1736         if (is64) {
1737             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
1738                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1739         } else {
1740             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
1741                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1742         }
1743       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
1744           if (cbuf) {
1745             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1746                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1747         } else {
1748             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1749                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1750         }
1751       } else {                    // fpr --> stack spill
1752         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1753         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1754                  is64 ? __ D : __ S, dst_offset);
1755       }
1756       break;
1757     case rc_stack:
1758       if (dst_lo_rc == rc_int) {  // stack --> gpr load
1759         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
1760       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
1761         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1762                    is64 ? __ D : __ S, src_offset);
1763       } else {                    // stack --> stack copy
1764         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1765         __ unspill(rscratch1, is64, src_offset);
1766         __ spill(rscratch1, is64, dst_offset);
1767       }
1768       break;
1769     default:
1770       assert(false, "bad rc_class for spill");
1771       ShouldNotReachHere();
1772     }
1773   }
1774 
1775   if (st) {
1776     st->print("spill ");
1777     if (src_lo_rc == rc_stack) {
1778       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
1779     } else {
1780       st->print("%s -> ", Matcher::regName[src_lo]);
1781     }
1782     if (dst_lo_rc == rc_stack) {
1783       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
1784     } else {
1785       st->print("%s", Matcher::regName[dst_lo]);
1786     }
1787     if (bottom_type()->isa_vect() != NULL) {
1788       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
1789     } else {
1790       st->print("\t# spill size = %d", is64 ? 64:32);
1791     }
1792   }
1793 
1794   return 0;
1795 
1796 }
1797 
1798 #ifndef PRODUCT
1799 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1800   if (!ra_)
1801     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
1802   else
1803     implementation(NULL, ra_, false, st);
1804 }
1805 #endif
1806 
1807 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1808   implementation(&cbuf, ra_, false, NULL);
1809 }
1810 
1811 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1812   return MachNode::size(ra_);
1813 }
1814 
1815 //=============================================================================
1816 
1817 #ifndef PRODUCT
1818 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1819   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1820   int reg = ra_->get_reg_first(this);
1821   st->print("add %s, rsp, #%d]\t# box lock",
1822             Matcher::regName[reg], offset);
1823 }
1824 #endif
1825 
1826 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1827   MacroAssembler _masm(&cbuf);
1828 
1829   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1830   int reg    = ra_->get_encode(this);
1831 
1832   // This add will handle any 24-bit signed offset. 24 bits allows an
1833   // 8 megabyte stack frame.
1834   __ add(as_Register(reg), sp, offset);
1835 }
1836 
1837 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1838   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
1839   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1840 
1841   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
1842     return NativeInstruction::instruction_size;
1843   } else {
1844     return 2 * NativeInstruction::instruction_size;
1845   }
1846 }
1847 
1848 //=============================================================================
1849 
1850 #ifndef PRODUCT
1851 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1852 {
1853   st->print_cr("# MachUEPNode");
1854   if (UseCompressedClassPointers) {
1855     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1856     if (Universe::narrow_klass_shift() != 0) {
1857       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1858     }
1859   } else {
1860    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1861   }
1862   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
1863   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
1864 }
1865 #endif
1866 
1867 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1868 {
1869   // This is the unverified entry point.
1870   MacroAssembler _masm(&cbuf);
1871 
1872   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
1873   Label skip;
1874   // TODO
1875   // can we avoid this skip and still use a reloc?
1876   __ br(Assembler::EQ, skip);
1877   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1878   __ bind(skip);
1879 }
1880 
1881 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1882 {
1883   return MachNode::size(ra_);
1884 }
1885 
1886 // REQUIRED EMIT CODE
1887 
1888 //=============================================================================
1889 
1890 // Emit exception handler code.
1891 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
1892 {
1893   // mov rscratch1 #exception_blob_entry_point
1894   // br rscratch1
1895   // Note that the code buffer's insts_mark is always relative to insts.
1896   // That's why we must use the macroassembler to generate a handler.
1897   MacroAssembler _masm(&cbuf);
1898   address base = __ start_a_stub(size_exception_handler());
1899   if (base == NULL) {
1900     ciEnv::current()->record_failure("CodeCache is full");
1901     return 0;  // CodeBuffer::expand failed
1902   }
1903   int offset = __ offset();
1904   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1905   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1906   __ end_a_stub();
1907   return offset;
1908 }
1909 
1910 // Emit deopt handler code.
1911 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
1912 {
1913   // Note that the code buffer's insts_mark is always relative to insts.
1914   // That's why we must use the macroassembler to generate a handler.
1915   MacroAssembler _masm(&cbuf);
1916   address base = __ start_a_stub(size_deopt_handler());
1917   if (base == NULL) {
1918     ciEnv::current()->record_failure("CodeCache is full");
1919     return 0;  // CodeBuffer::expand failed
1920   }
1921   int offset = __ offset();
1922 
1923   __ adr(lr, __ pc());
1924   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1925 
1926   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1927   __ end_a_stub();
1928   return offset;
1929 }
1930 
1931 // REQUIRED MATCHER CODE
1932 
1933 //=============================================================================
1934 
1935 const bool Matcher::match_rule_supported(int opcode) {
1936 
1937   // TODO 
1938   // identify extra cases that we might want to provide match rules for
1939   // e.g. Op_StrEquals and other intrinsics
1940   if (!has_match_rule(opcode)) {
1941     return false;
1942   }
1943 
1944   return true;  // Per default match rules are supported.
1945 }
1946 
1947 int Matcher::regnum_to_fpu_offset(int regnum)
1948 {
1949   Unimplemented();
1950   return 0;
1951 }
1952 
1953 // Is this branch offset short enough that a short branch can be used?
1954 //
1955 // NOTE: If the platform does not provide any short branch variants, then
1956 //       this method should return false for offset 0.
1957 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1958   // The passed offset is relative to address of the branch.
1959 
1960   return (-32768 <= offset && offset < 32768);
1961 }
1962 
1963 const bool Matcher::isSimpleConstant64(jlong value) {
1964   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1965   // Probably always true, even if a temp register is required.
1966   return true;
1967 }
1968 
1969 // true just means we have fast l2f conversion
1970 const bool Matcher::convL2FSupported(void) {
1971   return true;
1972 }
1973 
1974 // Vector width in bytes.
1975 const int Matcher::vector_width_in_bytes(BasicType bt) {
1976   int size = MIN2(16,(int)MaxVectorSize);
1977   // Minimum 2 values in vector
1978   if (size < 2*type2aelembytes(bt)) size = 0;
1979   // But never < 4
1980   if (size < 4) size = 0;
1981   return size;
1982 }
1983 
1984 // Limits on vector size (number of elements) loaded into vector.
1985 const int Matcher::max_vector_size(const BasicType bt) {
1986   return vector_width_in_bytes(bt)/type2aelembytes(bt);
1987 }
1988 const int Matcher::min_vector_size(const BasicType bt) {
1989 //  For the moment limit the vector size to 8 bytes
1990     int size = 8 / type2aelembytes(bt);
1991     if (size < 2) size = 2;
1992     return size;
1993 }
1994 
1995 // Vector ideal reg.
1996 const uint Matcher::vector_ideal_reg(int len) {
1997   switch(len) {
1998     case  8: return Op_VecD;
1999     case 16: return Op_VecX;
2000   }
2001   ShouldNotReachHere();
2002   return 0;
2003 }
2004 
2005 const uint Matcher::vector_shift_count_ideal_reg(int size) {
2006   switch(size) {
2007     case  8: return Op_VecD;
2008     case 16: return Op_VecX;
2009   }
2010   ShouldNotReachHere();
2011   return 0;
2012 }
2013 
2014 // AES support not yet implemented
2015 const bool Matcher::pass_original_key_for_aes() {
2016   return false;
2017 }
2018 
2019 // x86 supports misaligned vectors store/load.
2020 const bool Matcher::misaligned_vectors_ok() {
2021   return !AlignVector; // can be changed by flag
2022 }
2023 
2024 // false => size gets scaled to BytesPerLong, ok.
2025 const bool Matcher::init_array_count_is_in_bytes = false;
2026 
2027 // Threshold size for cleararray.
2028 const int Matcher::init_array_short_size = 4 * BytesPerLong;
2029 
2030 // Use conditional move (CMOVL)
2031 const int Matcher::long_cmove_cost() {
2032   // long cmoves are no more expensive than int cmoves
2033   return 0;
2034 }
2035 
2036 const int Matcher::float_cmove_cost() {
2037   // float cmoves are no more expensive than int cmoves
2038   return 0;
2039 }
2040 
2041 // Does the CPU require late expand (see block.cpp for description of late expand)?
2042 const bool Matcher::require_postalloc_expand = false;
2043 
2044 // Should the Matcher clone shifts on addressing modes, expecting them
2045 // to be subsumed into complex addressing expressions or compute them
2046 // into registers?  True for Intel but false for most RISCs
2047 const bool Matcher::clone_shift_expressions = false;
2048 
2049 // Do we need to mask the count passed to shift instructions or does
2050 // the cpu only look at the lower 5/6 bits anyway?
2051 const bool Matcher::need_masked_shift_count = false;
2052 
2053 // This affects two different things:
2054 //  - how Decode nodes are matched
2055 //  - how ImplicitNullCheck opportunities are recognized
2056 // If true, the matcher will try to remove all Decodes and match them
2057 // (as operands) into nodes. NullChecks are not prepared to deal with 
2058 // Decodes by final_graph_reshaping().
2059 // If false, final_graph_reshaping() forces the decode behind the Cmp
2060 // for a NullCheck. The matcher matches the Decode node into a register.
2061 // Implicit_null_check optimization moves the Decode along with the 
2062 // memory operation back up before the NullCheck.
2063 bool Matcher::narrow_oop_use_complex_address() {
2064   return Universe::narrow_oop_shift() == 0;
2065 }
2066 
2067 bool Matcher::narrow_klass_use_complex_address() {
2068 // TODO
2069 // decide whether we need to set this to true
2070   return false;
2071 }
2072 
2073 // Is it better to copy float constants, or load them directly from
2074 // memory?  Intel can load a float constant from a direct address,
2075 // requiring no extra registers.  Most RISCs will have to materialize
2076 // an address into a register first, so they would do better to copy
2077 // the constant from stack.
2078 const bool Matcher::rematerialize_float_constants = false;
2079 
2080 // If CPU can load and store mis-aligned doubles directly then no
2081 // fixup is needed.  Else we split the double into 2 integer pieces
2082 // and move it piece-by-piece.  Only happens when passing doubles into
2083 // C code as the Java calling convention forces doubles to be aligned.
2084 const bool Matcher::misaligned_doubles_ok = true;
2085 
2086 // No-op on amd64
2087 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2088   Unimplemented();
2089 }
2090 
2091 // Advertise here if the CPU requires explicit rounding operations to
2092 // implement the UseStrictFP mode.
2093 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2094 
2095 // Are floats converted to double when stored to stack during
2096 // deoptimization?
2097 bool Matcher::float_in_double() { return true; }
2098 
2099 // Do ints take an entire long register or just half?
2100 // The relevant question is how the int is callee-saved:
2101 // the whole long is written but de-opt'ing will have to extract
2102 // the relevant 32 bits.
2103 const bool Matcher::int_in_long = true;
2104 
2105 // Return whether or not this register is ever used as an argument.
2106 // This function is used on startup to build the trampoline stubs in
2107 // generateOptoStub.  Registers not mentioned will be killed by the VM
2108 // call in the trampoline, and arguments in those registers not be
2109 // available to the callee.
2110 bool Matcher::can_be_java_arg(int reg)
2111 {
2112   return
2113     reg ==  R0_num || reg == R0_H_num ||
2114     reg ==  R1_num || reg == R1_H_num ||
2115     reg ==  R2_num || reg == R2_H_num ||
2116     reg ==  R3_num || reg == R3_H_num ||
2117     reg ==  R4_num || reg == R4_H_num ||
2118     reg ==  R5_num || reg == R5_H_num ||
2119     reg ==  R6_num || reg == R6_H_num ||
2120     reg ==  R7_num || reg == R7_H_num ||
2121     reg ==  V0_num || reg == V0_H_num ||
2122     reg ==  V1_num || reg == V1_H_num ||
2123     reg ==  V2_num || reg == V2_H_num ||
2124     reg ==  V3_num || reg == V3_H_num ||
2125     reg ==  V4_num || reg == V4_H_num ||
2126     reg ==  V5_num || reg == V5_H_num ||
2127     reg ==  V6_num || reg == V6_H_num ||
2128     reg ==  V7_num || reg == V7_H_num;
2129 }
2130 
2131 bool Matcher::is_spillable_arg(int reg)
2132 {
2133   return can_be_java_arg(reg);
2134 }
2135 
2136 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2137   return false;
2138 }
2139 
2140 RegMask Matcher::divI_proj_mask() {
2141   ShouldNotReachHere();
2142   return RegMask();
2143 }
2144 
2145 // Register for MODI projection of divmodI.
2146 RegMask Matcher::modI_proj_mask() {
2147   ShouldNotReachHere();
2148   return RegMask();
2149 }
2150 
2151 // Register for DIVL projection of divmodL.
2152 RegMask Matcher::divL_proj_mask() {
2153   ShouldNotReachHere();
2154   return RegMask();
2155 }
2156 
2157 // Register for MODL projection of divmodL.
2158 RegMask Matcher::modL_proj_mask() {
2159   ShouldNotReachHere();
2160   return RegMask();
2161 }
2162 
2163 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2164   return FP_REG_mask();
2165 }
2166 
2167 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
2168   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
2169     Node* u = addp->fast_out(i);
2170     if (u->is_LoadStore()) {
2171       // On AArch64, LoadStoreNodes (i.e. compare and swap
2172       // instructions) only take register indirect as an operand, so
2173       // any attempt to use an AddPNode as an input to a LoadStoreNode
2174       // must fail.
2175       return false;
2176     }
2177     if (u->is_Mem()) {
2178       int opsize = u->as_Mem()->memory_size();
2179       assert(opsize > 0, "unexpected memory operand size");
2180       if (u->as_Mem()->memory_size() != (1<<shift)) {
2181         return false;
2182       }
2183     }
2184   }
2185   return true;
2186 }
2187 
2188 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2189   MacroAssembler _masm(&cbuf);                                              \
2190   {                                                                     \
2191     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2192     guarantee(DISP == 0, "mode not permitted for volatile");            \
2193     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2194     __ INSN(REG, as_Register(BASE));                                    \
2195   }
2196 
2197 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2198 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2199 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2200                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2201 
2202   // Used for all non-volatile memory accesses.  The use of
2203   // $mem->opcode() to discover whether this pattern uses sign-extended
2204   // offsets is something of a kludge.
2205   static void loadStore(MacroAssembler masm, mem_insn insn,
2206                          Register reg, int opcode,
2207                          Register base, int index, int size, int disp)
2208   {
2209     Address::extend scale;
2210 
2211     // Hooboy, this is fugly.  We need a way to communicate to the
2212     // encoder that the index needs to be sign extended, so we have to
2213     // enumerate all the cases.
2214     switch (opcode) {
2215     case INDINDEXSCALEDOFFSETI2L:
2216     case INDINDEXSCALEDI2L:
2217     case INDINDEXSCALEDOFFSETI2LN:
2218     case INDINDEXSCALEDI2LN:
2219     case INDINDEXOFFSETI2L:
2220     case INDINDEXOFFSETI2LN:
2221       scale = Address::sxtw(size);
2222       break;
2223     default:
2224       scale = Address::lsl(size);
2225     }
2226 
2227     if (index == -1) {
2228       (masm.*insn)(reg, Address(base, disp));
2229     } else {
2230       if (disp == 0) {
2231         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2232       } else {
2233         masm.lea(rscratch1, Address(base, disp));
2234         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2235       }
2236     }
2237   }
2238 
2239   static void loadStore(MacroAssembler masm, mem_float_insn insn,
2240                          FloatRegister reg, int opcode,
2241                          Register base, int index, int size, int disp)
2242   {
2243     Address::extend scale;
2244 
2245     switch (opcode) {
2246     case INDINDEXSCALEDOFFSETI2L:
2247     case INDINDEXSCALEDI2L:
2248     case INDINDEXSCALEDOFFSETI2LN:
2249     case INDINDEXSCALEDI2LN:
2250       scale = Address::sxtw(size);
2251       break;
2252     default:
2253       scale = Address::lsl(size);
2254     }
2255 
2256      if (index == -1) {
2257       (masm.*insn)(reg, Address(base, disp));
2258     } else {
2259       if (disp == 0) {
2260         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2261       } else {
2262         masm.lea(rscratch1, Address(base, disp));
2263         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2264       }
2265     }
2266   }
2267 
2268   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2269                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2270                          int opcode, Register base, int index, int size, int disp)
2271   {
2272     if (index == -1) {
2273       (masm.*insn)(reg, T, Address(base, disp));
2274     } else {
2275       assert(disp == 0, "unsupported address mode");
2276       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2277     }
2278   }
2279 
2280 %}
2281 
2282 
2283 
2284 //----------ENCODING BLOCK-----------------------------------------------------
2285 // This block specifies the encoding classes used by the compiler to
2286 // output byte streams.  Encoding classes are parameterized macros
2287 // used by Machine Instruction Nodes in order to generate the bit
2288 // encoding of the instruction.  Operands specify their base encoding
2289 // interface with the interface keyword.  There are currently
2290 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2291 // COND_INTER.  REG_INTER causes an operand to generate a function
2292 // which returns its register number when queried.  CONST_INTER causes
2293 // an operand to generate a function which returns the value of the
2294 // constant when queried.  MEMORY_INTER causes an operand to generate
2295 // four functions which return the Base Register, the Index Register,
2296 // the Scale Value, and the Offset Value of the operand when queried.
2297 // COND_INTER causes an operand to generate six functions which return
2298 // the encoding code (ie - encoding bits for the instruction)
2299 // associated with each basic boolean condition for a conditional
2300 // instruction.
2301 //
2302 // Instructions specify two basic values for encoding.  Again, a
2303 // function is available to check if the constant displacement is an
2304 // oop. They use the ins_encode keyword to specify their encoding
2305 // classes (which must be a sequence of enc_class names, and their
2306 // parameters, specified in the encoding block), and they use the
2307 // opcode keyword to specify, in order, their primary, secondary, and
2308 // tertiary opcode.  Only the opcode sections which a particular
2309 // instruction needs for encoding need to be specified.
2310 encode %{
2311   // Build emit functions for each basic byte or larger field in the
2312   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2313   // from C++ code in the enc_class source block.  Emit functions will
2314   // live in the main source block for now.  In future, we can
2315   // generalize this by adding a syntax that specifies the sizes of
2316   // fields in an order, so that the adlc can build the emit functions
2317   // automagically
2318 
2319   // catch all for unimplemented encodings
2320   enc_class enc_unimplemented %{
2321     MacroAssembler _masm(&cbuf);
2322     __ unimplemented("C2 catch all");    
2323   %}
2324 
2325   // BEGIN Non-volatile memory access
2326 
2327   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
2328     Register dst_reg = as_Register($dst$$reg);
2329     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
2330                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2331   %}
2332 
2333   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
2334     Register dst_reg = as_Register($dst$$reg);
2335     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
2336                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2337   %}
2338 
2339   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
2340     Register dst_reg = as_Register($dst$$reg);
2341     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2342                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2343   %}
2344 
2345   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
2346     Register dst_reg = as_Register($dst$$reg);
2347     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2348                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2349   %}
2350 
2351   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
2352     Register dst_reg = as_Register($dst$$reg);
2353     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
2354                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2355   %}
2356 
2357   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
2358     Register dst_reg = as_Register($dst$$reg);
2359     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
2360                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2361   %}
2362 
2363   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
2364     Register dst_reg = as_Register($dst$$reg);
2365     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2366                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2367   %}
2368 
2369   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
2370     Register dst_reg = as_Register($dst$$reg);
2371     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2372                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2373   %}
2374 
2375   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
2376     Register dst_reg = as_Register($dst$$reg);
2377     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2378                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2379   %}
2380 
2381   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
2382     Register dst_reg = as_Register($dst$$reg);
2383     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2384                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2385   %}
2386 
2387   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
2388     Register dst_reg = as_Register($dst$$reg);
2389     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
2390                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2391   %}
2392 
2393   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
2394     Register dst_reg = as_Register($dst$$reg);
2395     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2396                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2397   %}
2398 
2399   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
2400     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2401     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2402                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2403   %}
2404 
2405   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
2406     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2407     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2408                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2409   %}
2410 
2411   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2412     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2413     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2414        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2415   %}
2416 
2417   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2418     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2419     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2420        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2421   %}
2422 
2423   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2424     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2425     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2426        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2427   %}
2428 
2429   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
2430     Register src_reg = as_Register($src$$reg);
2431     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2432                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2433   %}
2434 
2435   enc_class aarch64_enc_strb0(memory mem) %{
2436     MacroAssembler _masm(&cbuf);
2437     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2438                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2439   %}
2440 
2441   enc_class aarch64_enc_strb0_ordered(memory mem) %{
2442     MacroAssembler _masm(&cbuf);
2443     __ membar(Assembler::StoreStore);
2444     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2445                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2446   %}
2447 
2448   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
2449     Register src_reg = as_Register($src$$reg);
2450     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2451                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2452   %}
2453 
2454   enc_class aarch64_enc_strh0(memory mem) %{
2455     MacroAssembler _masm(&cbuf);
2456     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
2457                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2458   %}
2459 
2460   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
2461     Register src_reg = as_Register($src$$reg);
2462     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
2463                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2464   %}
2465 
2466   enc_class aarch64_enc_strw0(memory mem) %{
2467     MacroAssembler _masm(&cbuf);
2468     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
2469                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2470   %}
2471 
2472   enc_class aarch64_enc_str(iRegL src, memory mem) %{
2473     Register src_reg = as_Register($src$$reg);
2474     // we sometimes get asked to store the stack pointer into the
2475     // current thread -- we cannot do that directly on AArch64
2476     if (src_reg == r31_sp) {
2477       MacroAssembler _masm(&cbuf);
2478       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2479       __ mov(rscratch2, sp);
2480       src_reg = rscratch2;
2481     }
2482     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
2483                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2484   %}
2485 
2486   enc_class aarch64_enc_str0(memory mem) %{
2487     MacroAssembler _masm(&cbuf);
2488     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2489                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2490   %}
2491 
2492   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
2493     FloatRegister src_reg = as_FloatRegister($src$$reg);
2494     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2495                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2496   %}
2497 
2498   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
2499     FloatRegister src_reg = as_FloatRegister($src$$reg);
2500     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
2501                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2502   %}
2503 
2504   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
2505     FloatRegister src_reg = as_FloatRegister($src$$reg);
2506     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
2507        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2508   %}
2509 
2510   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
2511     FloatRegister src_reg = as_FloatRegister($src$$reg);
2512     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
2513        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2514   %}
2515 
2516   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
2517     FloatRegister src_reg = as_FloatRegister($src$$reg);
2518     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
2519        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2520   %}
2521 
2522   // END Non-volatile memory access
2523 
2524   // this encoding writes the address of the first instruction in the
2525   // call sequence for the runtime call into the anchor pc slot. this
2526   // address allows the runtime to i) locate the code buffer for the
2527   // caller (any address in the buffer would do) and ii) find the oop
2528   // map associated with the call (has to address the instruction
2529   // following the call). note that we have to store the address which
2530   // follows the actual call.
2531   // 
2532   // the offset from the current pc can be computed by considering
2533   // what gets generated between this point up to and including the
2534   // call. it looks like this
2535   //
2536   //   movz xscratch1 0xnnnn        <-- current pc is here
2537   //   movk xscratch1 0xnnnn
2538   //   movk xscratch1 0xnnnn
2539   //   str xscratch1, [xthread,#anchor_pc_off]
2540   //   mov xscratch2, sp
2541   //   str xscratch2, [xthread,#anchor_sp_off
2542   //   mov x0, x1
2543   //   . . .
2544   //   mov xn-1, xn
2545   //   mov xn, thread            <-- always passed
2546   //   mov xn+1, rfp             <-- optional iff primary == 1
2547   //   movz xscratch1 0xnnnn
2548   //   movk xscratch1 0xnnnn
2549   //   movk xscratch1 0xnnnn
2550   //   blr  xscratch1
2551   //   . . .
2552   //
2553   // where the called routine has n args (including the thread and,
2554   // possibly the stub's caller return address currently in rfp).  we
2555   // can compute n by looking at the number of args passed into the
2556   // stub. we assert that nargs is < 7.
2557   //
2558   // so the offset we need to add to the pc (in 32-bit words) is
2559   //   3 +        <-- load 48-bit constant return pc
2560   //   1 +        <-- write anchor pc
2561   //   1 +        <-- copy sp
2562   //   1 +        <-- write anchor sp
2563   //   nargs +    <-- java stub arg count
2564   //   1 +        <-- extra thread arg
2565   // [ 1 + ]      <-- optional ret address of stub caller
2566   //   3 +        <-- load 64 bit call target address
2567   //   1          <-- blr instruction
2568   //
2569   // i.e we need to add (nargs + 11) * 4 bytes or (nargs + 12) * 4 bytes
2570   //
2571 
2572   enc_class aarch64_enc_save_pc() %{
2573     Compile* C = ra_->C;
2574     int nargs = C->tf()->domain()->cnt() - TypeFunc::Parms;
2575     if ($primary) { nargs++; }
2576     assert(nargs <= 8, "opto runtime stub has more than 8 args!");
2577     MacroAssembler _masm(&cbuf);
2578     address pc = __ pc();
2579     int call_offset = (nargs + 11) * 4;
2580     int field_offset = in_bytes(JavaThread::frame_anchor_offset()) +
2581                        in_bytes(JavaFrameAnchor::last_Java_pc_offset());
2582     __ lea(rscratch1, InternalAddress(pc + call_offset));
2583     __ str(rscratch1, Address(rthread, field_offset));
2584   %}
2585 
2586   // volatile loads and stores
2587 
2588   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
2589     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2590                  rscratch1, stlrb);
2591     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2592       __ dmb(__ ISH);
2593   %}
2594 
2595   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
2596     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2597                  rscratch1, stlrh);
2598     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2599       __ dmb(__ ISH);
2600   %}
2601 
2602   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
2603     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2604                  rscratch1, stlrw);
2605     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2606       __ dmb(__ ISH);
2607   %}
2608 
2609 
2610   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
2611     Register dst_reg = as_Register($dst$$reg);
2612     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2613              rscratch1, ldarb);
2614     __ sxtbw(dst_reg, dst_reg);
2615   %}
2616 
2617   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
2618     Register dst_reg = as_Register($dst$$reg);
2619     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2620              rscratch1, ldarb);
2621     __ sxtb(dst_reg, dst_reg);
2622   %}
2623 
2624   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
2625     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2626              rscratch1, ldarb);
2627   %}
2628 
2629   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
2630     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2631              rscratch1, ldarb);
2632   %}
2633 
2634   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
2635     Register dst_reg = as_Register($dst$$reg);
2636     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2637              rscratch1, ldarh);
2638     __ sxthw(dst_reg, dst_reg);
2639   %}
2640 
2641   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
2642     Register dst_reg = as_Register($dst$$reg);
2643     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2644              rscratch1, ldarh);
2645     __ sxth(dst_reg, dst_reg);
2646   %}
2647 
2648   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
2649     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2650              rscratch1, ldarh);
2651   %}
2652 
2653   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
2654     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2655              rscratch1, ldarh);
2656   %}
2657 
2658   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
2659     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2660              rscratch1, ldarw);
2661   %}
2662 
2663   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
2664     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2665              rscratch1, ldarw);
2666   %}
2667 
2668   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
2669     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2670              rscratch1, ldar);
2671   %}
2672 
2673   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
2674     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2675              rscratch1, ldarw);
2676     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
2677   %}
2678 
2679   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
2680     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2681              rscratch1, ldar);
2682     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
2683   %}
2684 
2685   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
2686     Register src_reg = as_Register($src$$reg);
2687     // we sometimes get asked to store the stack pointer into the
2688     // current thread -- we cannot do that directly on AArch64
2689     if (src_reg == r31_sp) {
2690         MacroAssembler _masm(&cbuf);
2691       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2692       __ mov(rscratch2, sp);
2693       src_reg = rscratch2;
2694     }
2695     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2696                  rscratch1, stlr);
2697     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2698       __ dmb(__ ISH);
2699   %}
2700 
2701   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
2702     {
2703       MacroAssembler _masm(&cbuf);
2704       FloatRegister src_reg = as_FloatRegister($src$$reg);
2705       __ fmovs(rscratch2, src_reg);
2706     }
2707     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2708                  rscratch1, stlrw);
2709     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2710       __ dmb(__ ISH);
2711   %}
2712 
2713   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
2714     {
2715       MacroAssembler _masm(&cbuf);
2716       FloatRegister src_reg = as_FloatRegister($src$$reg);
2717       __ fmovd(rscratch2, src_reg);
2718     }
2719     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2720                  rscratch1, stlr);
2721     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2722       __ dmb(__ ISH);
2723   %}
2724 
2725   // synchronized read/update encodings
2726 
2727   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
2728     MacroAssembler _masm(&cbuf);
2729     Register dst_reg = as_Register($dst$$reg);
2730     Register base = as_Register($mem$$base);
2731     int index = $mem$$index;
2732     int scale = $mem$$scale;
2733     int disp = $mem$$disp;
2734     if (index == -1) {
2735        if (disp != 0) {      
2736         __ lea(rscratch1, Address(base, disp));
2737         __ ldaxr(dst_reg, rscratch1);
2738       } else {
2739         // TODO
2740         // should we ever get anything other than this case?
2741         __ ldaxr(dst_reg, base);
2742       }
2743     } else {
2744       Register index_reg = as_Register(index);
2745       if (disp == 0) {
2746         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
2747         __ ldaxr(dst_reg, rscratch1);
2748       } else {
2749         __ lea(rscratch1, Address(base, disp));
2750         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
2751         __ ldaxr(dst_reg, rscratch1);
2752       }
2753     }
2754   %}
2755 
2756   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
2757     MacroAssembler _masm(&cbuf);
2758     Register src_reg = as_Register($src$$reg);
2759     Register base = as_Register($mem$$base);
2760     int index = $mem$$index;
2761     int scale = $mem$$scale;
2762     int disp = $mem$$disp;
2763     if (index == -1) {
2764        if (disp != 0) {      
2765         __ lea(rscratch2, Address(base, disp));
2766         __ stlxr(rscratch1, src_reg, rscratch2);
2767       } else {
2768         // TODO
2769         // should we ever get anything other than this case?
2770         __ stlxr(rscratch1, src_reg, base);
2771       }
2772     } else {
2773       Register index_reg = as_Register(index);
2774       if (disp == 0) {
2775         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
2776         __ stlxr(rscratch1, src_reg, rscratch2);
2777       } else {
2778         __ lea(rscratch2, Address(base, disp));
2779         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
2780         __ stlxr(rscratch1, src_reg, rscratch2);
2781       }
2782     }
2783     __ cmpw(rscratch1, zr);
2784   %}
2785 
2786   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2787     MacroAssembler _masm(&cbuf);
2788     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2789     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2790                Assembler::xword, /*acquire*/ false, /*release*/ true);
2791   %}
2792 
2793   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2794     MacroAssembler _masm(&cbuf);
2795     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2796     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2797                Assembler::word, /*acquire*/ false, /*release*/ true);
2798   %}
2799 
2800 
2801   // The only difference between aarch64_enc_cmpxchg and
2802   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
2803   // CompareAndSwap sequence to serve as a barrier on acquiring a
2804   // lock.
2805   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2806     MacroAssembler _masm(&cbuf);
2807     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2808     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2809                Assembler::xword, /*acquire*/ true, /*release*/ true);
2810   %}
2811 
2812   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2813     MacroAssembler _masm(&cbuf);
2814     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2815     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2816                Assembler::word, /*acquire*/ true, /*release*/ true);
2817   %}
2818 
2819   // auxiliary used for CompareAndSwapX to set result register
2820   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
2821     MacroAssembler _masm(&cbuf);
2822     Register res_reg = as_Register($res$$reg);
2823     __ cset(res_reg, Assembler::EQ);
2824   %}
2825 
2826   // prefetch encodings
2827 
2828   enc_class aarch64_enc_prefetchr(memory mem) %{
2829     MacroAssembler _masm(&cbuf);
2830     Register base = as_Register($mem$$base);
2831     int index = $mem$$index;
2832     int scale = $mem$$scale;
2833     int disp = $mem$$disp;
2834     if (index == -1) {
2835       __ prfm(Address(base, disp), PLDL1KEEP);
2836     } else {
2837       Register index_reg = as_Register(index);
2838       if (disp == 0) {
2839         __ prfm(Address(base, index_reg, Address::lsl(scale)), PLDL1KEEP);
2840       } else {
2841         __ lea(rscratch1, Address(base, disp));
2842         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PLDL1KEEP);
2843       }
2844     }
2845   %}
2846 
2847   enc_class aarch64_enc_prefetchw(memory mem) %{
2848     MacroAssembler _masm(&cbuf);
2849     Register base = as_Register($mem$$base);
2850     int index = $mem$$index;
2851     int scale = $mem$$scale;
2852     int disp = $mem$$disp;
2853     if (index == -1) {
2854       __ prfm(Address(base, disp), PSTL1KEEP);
2855     } else {
2856       Register index_reg = as_Register(index);
2857       if (disp == 0) {
2858         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
2859       } else {
2860         __ lea(rscratch1, Address(base, disp));
2861         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
2862       }
2863     }
2864   %}
2865 
2866   enc_class aarch64_enc_prefetchnta(memory mem) %{
2867     MacroAssembler _masm(&cbuf);
2868     Register base = as_Register($mem$$base);
2869     int index = $mem$$index;
2870     int scale = $mem$$scale;
2871     int disp = $mem$$disp;
2872     if (index == -1) {
2873       __ prfm(Address(base, disp), PSTL1STRM);
2874     } else {
2875       Register index_reg = as_Register(index);
2876       if (disp == 0) {
2877         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1STRM);
2878         __ nop();
2879       } else {
2880         __ lea(rscratch1, Address(base, disp));
2881         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1STRM);
2882       }
2883     }
2884   %}
2885 
2886   /// mov envcodings
2887 
2888   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
2889     MacroAssembler _masm(&cbuf);
2890     u_int32_t con = (u_int32_t)$src$$constant;
2891     Register dst_reg = as_Register($dst$$reg);
2892     if (con == 0) {
2893       __ movw(dst_reg, zr);
2894     } else {
2895       __ movw(dst_reg, con);
2896     }
2897   %}
2898 
2899   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
2900     MacroAssembler _masm(&cbuf);
2901     Register dst_reg = as_Register($dst$$reg);
2902     u_int64_t con = (u_int64_t)$src$$constant;
2903     if (con == 0) {
2904       __ mov(dst_reg, zr);
2905     } else {
2906       __ mov(dst_reg, con);
2907     }
2908   %}
2909 
2910   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
2911     MacroAssembler _masm(&cbuf);
2912     Register dst_reg = as_Register($dst$$reg);
2913     address con = (address)$src$$constant;
2914     if (con == NULL || con == (address)1) {
2915       ShouldNotReachHere();
2916     } else {
2917       relocInfo::relocType rtype = $src->constant_reloc();
2918       if (rtype == relocInfo::oop_type) {
2919         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
2920       } else if (rtype == relocInfo::metadata_type) {
2921         __ mov_metadata(dst_reg, (Metadata*)con);
2922       } else {
2923         assert(rtype == relocInfo::none, "unexpected reloc type");
2924         if (con < (address)(uintptr_t)os::vm_page_size()) {
2925           __ mov(dst_reg, con);
2926         } else {
2927           unsigned long offset;
2928           __ adrp(dst_reg, con, offset);
2929           __ add(dst_reg, dst_reg, offset);
2930         }
2931       }
2932     }
2933   %}
2934 
2935   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
2936     MacroAssembler _masm(&cbuf);
2937     Register dst_reg = as_Register($dst$$reg);
2938     __ mov(dst_reg, zr);
2939   %}
2940 
2941   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
2942     MacroAssembler _masm(&cbuf);
2943     Register dst_reg = as_Register($dst$$reg);
2944     __ mov(dst_reg, (u_int64_t)1);
2945   %}
2946 
2947   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
2948     MacroAssembler _masm(&cbuf);
2949     address page = (address)$src$$constant;
2950     Register dst_reg = as_Register($dst$$reg);
2951     unsigned long off;
2952     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
2953     assert(off == 0, "assumed offset == 0");
2954   %}
2955 
2956   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
2957     MacroAssembler _masm(&cbuf);
2958     __ load_byte_map_base($dst$$Register);
2959   %}
2960 
2961   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
2962     MacroAssembler _masm(&cbuf);
2963     Register dst_reg = as_Register($dst$$reg);
2964     address con = (address)$src$$constant;
2965     if (con == NULL) {
2966       ShouldNotReachHere();
2967     } else {
2968       relocInfo::relocType rtype = $src->constant_reloc();
2969       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
2970       __ set_narrow_oop(dst_reg, (jobject)con);
2971     }
2972   %}
2973 
2974   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
2975     MacroAssembler _masm(&cbuf);
2976     Register dst_reg = as_Register($dst$$reg);
2977     __ mov(dst_reg, zr);
2978   %}
2979 
2980   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
2981     MacroAssembler _masm(&cbuf);
2982     Register dst_reg = as_Register($dst$$reg);
2983     address con = (address)$src$$constant;
2984     if (con == NULL) {
2985       ShouldNotReachHere();
2986     } else {
2987       relocInfo::relocType rtype = $src->constant_reloc();
2988       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
2989       __ set_narrow_klass(dst_reg, (Klass *)con);
2990     }
2991   %}
2992 
2993   // arithmetic encodings
2994 
2995   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
2996     MacroAssembler _masm(&cbuf);
2997     Register dst_reg = as_Register($dst$$reg);
2998     Register src_reg = as_Register($src1$$reg);
2999     int32_t con = (int32_t)$src2$$constant;
3000     // add has primary == 0, subtract has primary == 1
3001     if ($primary) { con = -con; }
3002     if (con < 0) {
3003       __ subw(dst_reg, src_reg, -con);
3004     } else {
3005       __ addw(dst_reg, src_reg, con);
3006     }
3007   %}
3008 
3009   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3010     MacroAssembler _masm(&cbuf);
3011     Register dst_reg = as_Register($dst$$reg);
3012     Register src_reg = as_Register($src1$$reg);
3013     int32_t con = (int32_t)$src2$$constant;
3014     // add has primary == 0, subtract has primary == 1
3015     if ($primary) { con = -con; }
3016     if (con < 0) {
3017       __ sub(dst_reg, src_reg, -con);
3018     } else {
3019       __ add(dst_reg, src_reg, con);
3020     }
3021   %}
3022 
3023   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3024     MacroAssembler _masm(&cbuf);
3025    Register dst_reg = as_Register($dst$$reg);
3026    Register src1_reg = as_Register($src1$$reg);
3027    Register src2_reg = as_Register($src2$$reg);
3028     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3029   %}
3030 
3031   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3032     MacroAssembler _masm(&cbuf);
3033    Register dst_reg = as_Register($dst$$reg);
3034    Register src1_reg = as_Register($src1$$reg);
3035    Register src2_reg = as_Register($src2$$reg);
3036     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3037   %}
3038 
3039   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3040     MacroAssembler _masm(&cbuf);
3041    Register dst_reg = as_Register($dst$$reg);
3042    Register src1_reg = as_Register($src1$$reg);
3043    Register src2_reg = as_Register($src2$$reg);
3044     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3045   %}
3046 
3047   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3048     MacroAssembler _masm(&cbuf);
3049    Register dst_reg = as_Register($dst$$reg);
3050    Register src1_reg = as_Register($src1$$reg);
3051    Register src2_reg = as_Register($src2$$reg);
3052     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3053   %}
3054 
3055   // compare instruction encodings
3056 
3057   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3058     MacroAssembler _masm(&cbuf);
3059     Register reg1 = as_Register($src1$$reg);
3060     Register reg2 = as_Register($src2$$reg);
3061     __ cmpw(reg1, reg2);
3062   %}
3063 
3064   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3065     MacroAssembler _masm(&cbuf);
3066     Register reg = as_Register($src1$$reg);
3067     int32_t val = $src2$$constant;
3068     if (val >= 0) {
3069       __ subsw(zr, reg, val);
3070     } else {
3071       __ addsw(zr, reg, -val);
3072     }
3073   %}
3074 
3075   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3076     MacroAssembler _masm(&cbuf);
3077     Register reg1 = as_Register($src1$$reg);
3078     u_int32_t val = (u_int32_t)$src2$$constant;
3079     __ movw(rscratch1, val);
3080     __ cmpw(reg1, rscratch1);
3081   %}
3082 
3083   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3084     MacroAssembler _masm(&cbuf);
3085     Register reg1 = as_Register($src1$$reg);
3086     Register reg2 = as_Register($src2$$reg);
3087     __ cmp(reg1, reg2);
3088   %}
3089 
3090   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3091     MacroAssembler _masm(&cbuf);
3092     Register reg = as_Register($src1$$reg);
3093     int64_t val = $src2$$constant;
3094     if (val >= 0) {
3095       __ subs(zr, reg, val);
3096     } else if (val != -val) {
3097       __ adds(zr, reg, -val);
3098     } else {
3099     // aargh, Long.MIN_VALUE is a special case
3100       __ orr(rscratch1, zr, (u_int64_t)val);
3101       __ subs(zr, reg, rscratch1);
3102     }
3103   %}
3104 
3105   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3106     MacroAssembler _masm(&cbuf);
3107     Register reg1 = as_Register($src1$$reg);
3108     u_int64_t val = (u_int64_t)$src2$$constant;
3109     __ mov(rscratch1, val);
3110     __ cmp(reg1, rscratch1);
3111   %}
3112 
3113   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3114     MacroAssembler _masm(&cbuf);
3115     Register reg1 = as_Register($src1$$reg);
3116     Register reg2 = as_Register($src2$$reg);
3117     __ cmp(reg1, reg2);
3118   %}
3119 
3120   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3121     MacroAssembler _masm(&cbuf);
3122     Register reg1 = as_Register($src1$$reg);
3123     Register reg2 = as_Register($src2$$reg);
3124     __ cmpw(reg1, reg2);
3125   %}
3126 
3127   enc_class aarch64_enc_testp(iRegP src) %{
3128     MacroAssembler _masm(&cbuf);
3129     Register reg = as_Register($src$$reg);
3130     __ cmp(reg, zr);
3131   %}
3132 
3133   enc_class aarch64_enc_testn(iRegN src) %{
3134     MacroAssembler _masm(&cbuf);
3135     Register reg = as_Register($src$$reg);
3136     __ cmpw(reg, zr);
3137   %}
3138 
3139   enc_class aarch64_enc_b(label lbl) %{
3140     MacroAssembler _masm(&cbuf);
3141     Label *L = $lbl$$label;
3142     __ b(*L);
3143   %}
3144 
3145   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3146     MacroAssembler _masm(&cbuf);
3147     Label *L = $lbl$$label;
3148     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3149   %}
3150 
3151   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3152     MacroAssembler _masm(&cbuf);
3153     Label *L = $lbl$$label;
3154     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3155   %}
3156 
3157   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3158   %{
3159      Register sub_reg = as_Register($sub$$reg);
3160      Register super_reg = as_Register($super$$reg);
3161      Register temp_reg = as_Register($temp$$reg);
3162      Register result_reg = as_Register($result$$reg);
3163 
3164      Label miss;
3165      MacroAssembler _masm(&cbuf);
3166      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3167                                      NULL, &miss,
3168                                      /*set_cond_codes:*/ true);
3169      if ($primary) {
3170        __ mov(result_reg, zr);
3171      }
3172      __ bind(miss);
3173   %}
3174 
3175   enc_class aarch64_enc_java_static_call(method meth) %{
3176     MacroAssembler _masm(&cbuf);
3177 
3178     address mark = __ pc();
3179     address addr = (address)$meth$$method;
3180     address call;
3181     if (!_method) {
3182       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3183       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3184     } else if (_optimized_virtual) {
3185       call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
3186     } else {
3187       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
3188     }
3189     if (call == NULL) {
3190       ciEnv::current()->record_failure("CodeCache is full"); 
3191       return;
3192     }
3193 
3194     if (_method) {
3195       // Emit stub for static call
3196       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
3197       if (stub == NULL) {
3198         ciEnv::current()->record_failure("CodeCache is full"); 
3199         return;
3200       }
3201     }
3202   %}
3203 
3204   enc_class aarch64_enc_java_handle_call(method meth) %{
3205     MacroAssembler _masm(&cbuf);
3206     relocInfo::relocType reloc;
3207 
3208     // RFP is preserved across all calls, even compiled calls.
3209     // Use it to preserve SP.
3210     __ mov(rfp, sp);
3211 
3212     address mark = __ pc();
3213     address addr = (address)$meth$$method;
3214     address call;
3215     if (!_method) {
3216       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3217       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3218     } else if (_optimized_virtual) {
3219       call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
3220     } else {
3221       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
3222     }
3223     if (call == NULL) {
3224       ciEnv::current()->record_failure("CodeCache is full"); 
3225       return;
3226     }
3227 
3228     if (_method) {
3229       // Emit stub for static call
3230       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
3231       if (stub == NULL) {
3232         ciEnv::current()->record_failure("CodeCache is full"); 
3233         return;
3234       }
3235     }
3236 
3237     // now restore sp
3238     __ mov(sp, rfp);
3239   %}
3240 
3241   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3242     MacroAssembler _masm(&cbuf);
3243     address call = __ ic_call((address)$meth$$method);
3244     if (call == NULL) {
3245       ciEnv::current()->record_failure("CodeCache is full"); 
3246       return;
3247     }
3248   %}
3249 
3250   enc_class aarch64_enc_call_epilog() %{
3251     MacroAssembler _masm(&cbuf);
3252     if (VerifyStackAtCalls) {
3253       // Check that stack depth is unchanged: find majik cookie on stack
3254       __ call_Unimplemented();
3255     }
3256   %}
3257 
3258   enc_class aarch64_enc_java_to_runtime(method meth) %{
3259     MacroAssembler _masm(&cbuf);
3260 
3261     // some calls to generated routines (arraycopy code) are scheduled
3262     // by C2 as runtime calls. if so we can call them using a br (they
3263     // will be in a reachable segment) otherwise we have to use a blr
3264     // which loads the absolute address into a register.
3265     address entry = (address)$meth$$method;
3266     CodeBlob *cb = CodeCache::find_blob(entry);
3267     if (cb) {
3268       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3269       if (call == NULL) {
3270         ciEnv::current()->record_failure("CodeCache is full"); 
3271         return;
3272       }
3273     } else {
3274       Label retaddr;
3275       __ adr(rscratch2, retaddr);
3276       __ lea(rscratch1, RuntimeAddress(entry));
3277       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3278       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3279       __ blr(rscratch1);
3280       __ bind(retaddr);
3281       __ add(sp, sp, 2 * wordSize);
3282     }
3283   %}
3284 
3285   enc_class aarch64_enc_rethrow() %{
3286     MacroAssembler _masm(&cbuf);
3287     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3288   %}
3289 
3290   enc_class aarch64_enc_ret() %{
3291     MacroAssembler _masm(&cbuf);
3292     __ ret(lr);
3293   %}
3294 
3295   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3296     MacroAssembler _masm(&cbuf);
3297     Register target_reg = as_Register($jump_target$$reg);
3298     __ br(target_reg);
3299   %}
3300 
3301   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3302     MacroAssembler _masm(&cbuf);
3303     Register target_reg = as_Register($jump_target$$reg);
3304     // exception oop should be in r0
3305     // ret addr has been popped into lr
3306     // callee expects it in r3
3307     __ mov(r3, lr);
3308     __ br(target_reg);
3309   %}
3310 
3311   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3312     MacroAssembler _masm(&cbuf);
3313     Register oop = as_Register($object$$reg);
3314     Register box = as_Register($box$$reg);
3315     Register disp_hdr = as_Register($tmp$$reg);
3316     Register tmp = as_Register($tmp2$$reg);
3317     Label cont;
3318     Label object_has_monitor;
3319     Label cas_failed;
3320 
3321     assert_different_registers(oop, box, tmp, disp_hdr);
3322 
3323     // Load markOop from object into displaced_header.
3324     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3325 
3326     // Always do locking in runtime.
3327     if (EmitSync & 0x01) {
3328       __ cmp(oop, zr);
3329       return;
3330     }
3331     
3332     if (UseBiasedLocking && !UseOptoBiasInlining) {
3333       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
3334     }
3335 
3336     // Handle existing monitor
3337     if ((EmitSync & 0x02) == 0) {
3338       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3339     }
3340 
3341     // Set tmp to be (markOop of object | UNLOCK_VALUE).
3342     __ orr(tmp, disp_hdr, markOopDesc::unlocked_value);
3343 
3344     // Load Compare Value application register.
3345 
3346     // Initialize the box. (Must happen before we update the object mark!)
3347     __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3348 
3349     // Compare object markOop with an unlocked value (tmp) and if
3350     // equal exchange the stack address of our box with object markOop.
3351     // On failure disp_hdr contains the possibly locked markOop.
3352     if (UseLSE) {
3353       __ mov(disp_hdr, tmp);
3354       __ casal(Assembler::xword, disp_hdr, box, oop);  // Updates disp_hdr
3355       __ cmp(tmp, disp_hdr);
3356       __ br(Assembler::EQ, cont);
3357     } else {
3358       Label retry_load;
3359       if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
3360         __ prfm(Address(oop), PSTL1STRM);
3361       __ bind(retry_load);
3362       __ ldaxr(disp_hdr, oop);
3363       __ cmp(tmp, disp_hdr);
3364       __ br(Assembler::NE, cas_failed);
3365       // use stlxr to ensure update is immediately visible
3366       __ stlxr(disp_hdr, box, oop);
3367       __ cbzw(disp_hdr, cont);
3368       __ b(retry_load);
3369     }
3370 
3371     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3372 
3373     // If the compare-and-exchange succeeded, then we found an unlocked
3374     // object, will have now locked it will continue at label cont
3375 
3376     __ bind(cas_failed);
3377     // We did not see an unlocked object so try the fast recursive case.
3378 
3379     // Check if the owner is self by comparing the value in the
3380     // markOop of object (disp_hdr) with the stack pointer.
3381     __ mov(rscratch1, sp);
3382     __ sub(disp_hdr, disp_hdr, rscratch1);
3383     __ mov(tmp, (address) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
3384     // If condition is true we are cont and hence we can store 0 as the
3385     // displaced header in the box, which indicates that it is a recursive lock.
3386     __ ands(tmp/*==0?*/, disp_hdr, tmp);
3387     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3388 
3389     // Handle existing monitor.
3390     if ((EmitSync & 0x02) == 0) {
3391       __ b(cont);
3392 
3393       __ bind(object_has_monitor);
3394       // The object's monitor m is unlocked iff m->owner == NULL,
3395       // otherwise m->owner may contain a thread or a stack address.
3396       //
3397       // Try to CAS m->owner from NULL to current thread.
3398       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
3399       __ mov(disp_hdr, zr);
3400 
3401       if (UseLSE) {
3402         __ mov(rscratch1, disp_hdr);
3403         __ casal(Assembler::xword, rscratch1, rthread, tmp);
3404         __ cmp(rscratch1, disp_hdr);
3405       } else {
3406         Label retry_load, fail;
3407         if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
3408           __ prfm(Address(tmp), PSTL1STRM);
3409         __ bind(retry_load);
3410         __ ldaxr(rscratch1, tmp);
3411         __ cmp(disp_hdr, rscratch1);
3412         __ br(Assembler::NE, fail);
3413         // use stlxr to ensure update is immediately visible
3414         __ stlxr(rscratch1, rthread, tmp);
3415         __ cbnzw(rscratch1, retry_load);
3416         __ bind(fail);
3417       }
3418 
3419       // Store a non-null value into the box to avoid looking like a re-entrant
3420       // lock. The fast-path monitor unlock code checks for
3421       // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
3422       // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
3423       __ mov(tmp, (address)markOopDesc::unused_mark());
3424       __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3425     }
3426 
3427     __ bind(cont);
3428     // flag == EQ indicates success
3429     // flag == NE indicates failure
3430   %}
3431 
3432   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3433     MacroAssembler _masm(&cbuf);
3434     Register oop = as_Register($object$$reg);
3435     Register box = as_Register($box$$reg);
3436     Register disp_hdr = as_Register($tmp$$reg);
3437     Register tmp = as_Register($tmp2$$reg);
3438     Label cont;
3439     Label object_has_monitor;
3440 
3441     assert_different_registers(oop, box, tmp, disp_hdr);
3442 
3443     // Always do locking in runtime.
3444     if (EmitSync & 0x01) {
3445       __ cmp(oop, zr); // Oop can't be 0 here => always false.
3446       return;
3447     }
3448 
3449     if (UseBiasedLocking && !UseOptoBiasInlining) {
3450       __ biased_locking_exit(oop, tmp, cont);
3451     }
3452 
3453     // Find the lock address and load the displaced header from the stack.
3454     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3455 
3456     // If the displaced header is 0, we have a recursive unlock.
3457     __ cmp(disp_hdr, zr);
3458     __ br(Assembler::EQ, cont);
3459 
3460     // Handle existing monitor.
3461     if ((EmitSync & 0x02) == 0) {
3462       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
3463       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3464     }
3465 
3466     // Check if it is still a light weight lock, this is is true if we
3467     // see the stack address of the basicLock in the markOop of the
3468     // object.
3469 
3470     if (UseLSE) {
3471       __ mov(tmp, box);
3472       __ casl(Assembler::xword, tmp, disp_hdr, oop);
3473       __ cmp(tmp, box);
3474       __ b(cont);
3475     } else {
3476       Label retry_load;
3477       if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
3478         __ prfm(Address(oop), PSTL1STRM);
3479       __ bind(retry_load);
3480       __ ldxr(tmp, oop);
3481       __ cmp(box, tmp);
3482       __ br(Assembler::NE, cont);
3483       // use stlxr to ensure update is immediately visible
3484       __ stlxr(tmp, disp_hdr, oop);
3485       __ cbzw(tmp, cont);
3486       __ b(retry_load);
3487     }
3488 
3489     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3490 
3491     // Handle existing monitor.
3492     if ((EmitSync & 0x02) == 0) {
3493       __ bind(object_has_monitor);
3494       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
3495       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3496       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
3497       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
3498       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
3499       __ cmp(rscratch1, zr);
3500       __ br(Assembler::NE, cont);
3501 
3502       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
3503       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
3504       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
3505       __ cmp(rscratch1, zr);
3506       __ br(Assembler::NE, cont);
3507       // need a release store here
3508       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3509       __ stlr(zr, tmp); // set unowned
3510     }
3511 
3512     __ bind(cont);
3513     // flag == EQ indicates success
3514     // flag == NE indicates failure
3515   %}
3516 
3517 %}
3518 
3519 //----------FRAME--------------------------------------------------------------
3520 // Definition of frame structure and management information.
3521 //
3522 //  S T A C K   L A Y O U T    Allocators stack-slot number
3523 //                             |   (to get allocators register number
3524 //  G  Owned by    |        |  v    add OptoReg::stack0())
3525 //  r   CALLER     |        |
3526 //  o     |        +--------+      pad to even-align allocators stack-slot
3527 //  w     V        |  pad0  |        numbers; owned by CALLER
3528 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3529 //  h     ^        |   in   |  5
3530 //        |        |  args  |  4   Holes in incoming args owned by SELF
3531 //  |     |        |        |  3
3532 //  |     |        +--------+
3533 //  V     |        | old out|      Empty on Intel, window on Sparc
3534 //        |    old |preserve|      Must be even aligned.
3535 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3536 //        |        |   in   |  3   area for Intel ret address
3537 //     Owned by    |preserve|      Empty on Sparc.
3538 //       SELF      +--------+
3539 //        |        |  pad2  |  2   pad to align old SP
3540 //        |        +--------+  1
3541 //        |        | locks  |  0
3542 //        |        +--------+----> OptoReg::stack0(), even aligned
3543 //        |        |  pad1  | 11   pad to align new SP
3544 //        |        +--------+
3545 //        |        |        | 10
3546 //        |        | spills |  9   spills
3547 //        V        |        |  8   (pad0 slot for callee)
3548 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3549 //        ^        |  out   |  7
3550 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3551 //     Owned by    +--------+
3552 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3553 //        |    new |preserve|      Must be even-aligned.
3554 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3555 //        |        |        |
3556 //
3557 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3558 //         known from SELF's arguments and the Java calling convention.
3559 //         Region 6-7 is determined per call site.
3560 // Note 2: If the calling convention leaves holes in the incoming argument
3561 //         area, those holes are owned by SELF.  Holes in the outgoing area
3562 //         are owned by the CALLEE.  Holes should not be nessecary in the
3563 //         incoming area, as the Java calling convention is completely under
3564 //         the control of the AD file.  Doubles can be sorted and packed to
3565 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3566 //         varargs C calling conventions.
3567 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3568 //         even aligned with pad0 as needed.
3569 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3570 //           (the latter is true on Intel but is it false on AArch64?)
3571 //         region 6-11 is even aligned; it may be padded out more so that
3572 //         the region from SP to FP meets the minimum stack alignment.
3573 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3574 //         alignment.  Region 11, pad1, may be dynamically extended so that
3575 //         SP meets the minimum alignment.
3576 
3577 frame %{
3578   // What direction does stack grow in (assumed to be same for C & Java)
3579   stack_direction(TOWARDS_LOW);
3580 
3581   // These three registers define part of the calling convention
3582   // between compiled code and the interpreter.
3583 
3584   // Inline Cache Register or methodOop for I2C.
3585   inline_cache_reg(R12);
3586 
3587   // Method Oop Register when calling interpreter.
3588   interpreter_method_oop_reg(R12);
3589 
3590   // Number of stack slots consumed by locking an object
3591   sync_stack_slots(2);
3592 
3593   // Compiled code's Frame Pointer
3594   frame_pointer(R31);
3595 
3596   // Interpreter stores its frame pointer in a register which is
3597   // stored to the stack by I2CAdaptors.
3598   // I2CAdaptors convert from interpreted java to compiled java.
3599   interpreter_frame_pointer(R29);
3600 
3601   // Stack alignment requirement
3602   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3603 
3604   // Number of stack slots between incoming argument block and the start of
3605   // a new frame.  The PROLOG must add this many slots to the stack.  The
3606   // EPILOG must remove this many slots. aarch64 needs two slots for
3607   // return address and fp.
3608   // TODO think this is correct but check
3609   in_preserve_stack_slots(4);
3610 
3611   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3612   // for calls to C.  Supports the var-args backing area for register parms.
3613   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3614 
3615   // The after-PROLOG location of the return address.  Location of
3616   // return address specifies a type (REG or STACK) and a number
3617   // representing the register number (i.e. - use a register name) or
3618   // stack slot.
3619   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3620   // Otherwise, it is above the locks and verification slot and alignment word
3621   // TODO this may well be correct but need to check why that - 2 is there
3622   // ppc port uses 0 but we definitely need to allow for fixed_slots
3623   // which folds in the space used for monitors
3624   return_addr(STACK - 2 +
3625               round_to((Compile::current()->in_preserve_stack_slots() +
3626                         Compile::current()->fixed_slots()),
3627                        stack_alignment_in_slots()));
3628 
3629   // Body of function which returns an integer array locating
3630   // arguments either in registers or in stack slots.  Passed an array
3631   // of ideal registers called "sig" and a "length" count.  Stack-slot
3632   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3633   // arguments for a CALLEE.  Incoming stack arguments are
3634   // automatically biased by the preserve_stack_slots field above.
3635 
3636   calling_convention
3637   %{
3638     // No difference between ingoing/outgoing just pass false
3639     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3640   %}
3641 
3642   c_calling_convention
3643   %{
3644     // This is obviously always outgoing
3645     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
3646   %}
3647 
3648   // Location of compiled Java return values.  Same as C for now.
3649   return_value
3650   %{
3651     // TODO do we allow ideal_reg == Op_RegN???
3652     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3653            "only return normal values");
3654 
3655     static const int lo[Op_RegL + 1] = { // enum name
3656       0,                                 // Op_Node
3657       0,                                 // Op_Set
3658       R0_num,                            // Op_RegN
3659       R0_num,                            // Op_RegI
3660       R0_num,                            // Op_RegP
3661       V0_num,                            // Op_RegF
3662       V0_num,                            // Op_RegD
3663       R0_num                             // Op_RegL
3664     };
3665   
3666     static const int hi[Op_RegL + 1] = { // enum name
3667       0,                                 // Op_Node
3668       0,                                 // Op_Set
3669       OptoReg::Bad,                       // Op_RegN
3670       OptoReg::Bad,                      // Op_RegI
3671       R0_H_num,                          // Op_RegP
3672       OptoReg::Bad,                      // Op_RegF
3673       V0_H_num,                          // Op_RegD
3674       R0_H_num                           // Op_RegL
3675     };
3676 
3677     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3678   %}
3679 %}
3680 
3681 //----------ATTRIBUTES---------------------------------------------------------
3682 //----------Operand Attributes-------------------------------------------------
3683 op_attrib op_cost(1);        // Required cost attribute
3684 
3685 //----------Instruction Attributes---------------------------------------------
3686 ins_attrib ins_cost(INSN_COST); // Required cost attribute
3687 ins_attrib ins_size(32);        // Required size attribute (in bits)
3688 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3689                                 // a non-matching short branch variant
3690                                 // of some long branch?
3691 ins_attrib ins_alignment(4);    // Required alignment attribute (must
3692                                 // be a power of 2) specifies the
3693                                 // alignment that some part of the
3694                                 // instruction (not necessarily the
3695                                 // start) requires.  If > 1, a
3696                                 // compute_padding() function must be
3697                                 // provided for the instruction
3698 
3699 //----------OPERANDS-----------------------------------------------------------
3700 // Operand definitions must precede instruction definitions for correct parsing
3701 // in the ADLC because operands constitute user defined types which are used in
3702 // instruction definitions.
3703 
3704 //----------Simple Operands----------------------------------------------------
3705 
3706 // Integer operands 32 bit
3707 // 32 bit immediate
3708 operand immI()
3709 %{
3710   match(ConI);
3711 
3712   op_cost(0);
3713   format %{ %}
3714   interface(CONST_INTER);
3715 %}
3716 
3717 // 32 bit zero
3718 operand immI0()
3719 %{
3720   predicate(n->get_int() == 0);
3721   match(ConI);
3722 
3723   op_cost(0);
3724   format %{ %}
3725   interface(CONST_INTER);
3726 %}
3727 
3728 // 32 bit unit increment
3729 operand immI_1()
3730 %{
3731   predicate(n->get_int() == 1);
3732   match(ConI);
3733 
3734   op_cost(0);
3735   format %{ %}
3736   interface(CONST_INTER);
3737 %}
3738 
3739 // 32 bit unit decrement
3740 operand immI_M1()
3741 %{
3742   predicate(n->get_int() == -1);
3743   match(ConI);
3744 
3745   op_cost(0);
3746   format %{ %}
3747   interface(CONST_INTER);
3748 %}
3749 
3750 operand immI_le_4()
3751 %{
3752   predicate(n->get_int() <= 4);
3753   match(ConI);
3754 
3755   op_cost(0);
3756   format %{ %}
3757   interface(CONST_INTER);
3758 %}
3759 
3760 operand immI_31()
3761 %{
3762   predicate(n->get_int() == 31);
3763   match(ConI);
3764 
3765   op_cost(0);
3766   format %{ %}
3767   interface(CONST_INTER);
3768 %}
3769 
3770 operand immI_8()
3771 %{
3772   predicate(n->get_int() == 8);
3773   match(ConI);
3774 
3775   op_cost(0);
3776   format %{ %}
3777   interface(CONST_INTER);
3778 %}
3779 
3780 operand immI_16()
3781 %{
3782   predicate(n->get_int() == 16);
3783   match(ConI);
3784 
3785   op_cost(0);
3786   format %{ %}
3787   interface(CONST_INTER);
3788 %}
3789 
3790 operand immI_24()
3791 %{
3792   predicate(n->get_int() == 24);
3793   match(ConI);
3794 
3795   op_cost(0);
3796   format %{ %}
3797   interface(CONST_INTER);
3798 %}
3799 
3800 operand immI_32()
3801 %{
3802   predicate(n->get_int() == 32);
3803   match(ConI);
3804 
3805   op_cost(0);
3806   format %{ %}
3807   interface(CONST_INTER);
3808 %}
3809 
3810 operand immI_48()
3811 %{
3812   predicate(n->get_int() == 48);
3813   match(ConI);
3814 
3815   op_cost(0);
3816   format %{ %}
3817   interface(CONST_INTER);
3818 %}
3819 
3820 operand immI_56()
3821 %{
3822   predicate(n->get_int() == 56);
3823   match(ConI);
3824 
3825   op_cost(0);
3826   format %{ %}
3827   interface(CONST_INTER);
3828 %}
3829 
3830 operand immI_64()
3831 %{
3832   predicate(n->get_int() == 64);
3833   match(ConI);
3834 
3835   op_cost(0);
3836   format %{ %}
3837   interface(CONST_INTER);
3838 %}
3839 
3840 operand immI_255()
3841 %{
3842   predicate(n->get_int() == 255);
3843   match(ConI);
3844 
3845   op_cost(0);
3846   format %{ %}
3847   interface(CONST_INTER);
3848 %}
3849 
3850 operand immI_65535()
3851 %{
3852   predicate(n->get_int() == 65535);
3853   match(ConI);
3854 
3855   op_cost(0);
3856   format %{ %}
3857   interface(CONST_INTER);
3858 %}
3859 
3860 operand immL_63()
3861 %{
3862   predicate(n->get_int() == 63);
3863   match(ConI);
3864 
3865   op_cost(0);
3866   format %{ %}
3867   interface(CONST_INTER);
3868 %}
3869 
3870 operand immL_255()
3871 %{
3872   predicate(n->get_int() == 255);
3873   match(ConI);
3874 
3875   op_cost(0);
3876   format %{ %}
3877   interface(CONST_INTER);
3878 %}
3879 
3880 operand immL_65535()
3881 %{
3882   predicate(n->get_long() == 65535L);
3883   match(ConL);
3884 
3885   op_cost(0);
3886   format %{ %}
3887   interface(CONST_INTER);
3888 %}
3889 
3890 operand immL_4294967295()
3891 %{
3892   predicate(n->get_long() == 4294967295L);
3893   match(ConL);
3894 
3895   op_cost(0);
3896   format %{ %}
3897   interface(CONST_INTER);
3898 %}
3899 
3900 operand immL_bitmask()
3901 %{
3902   predicate((n->get_long() != 0)
3903             && ((n->get_long() & 0xc000000000000000l) == 0)
3904             && is_power_of_2(n->get_long() + 1));
3905   match(ConL);
3906 
3907   op_cost(0);
3908   format %{ %}
3909   interface(CONST_INTER);
3910 %}
3911 
3912 operand immI_bitmask()
3913 %{
3914   predicate((n->get_int() != 0)
3915             && ((n->get_int() & 0xc0000000) == 0)
3916             && is_power_of_2(n->get_int() + 1));
3917   match(ConI);
3918 
3919   op_cost(0);
3920   format %{ %}
3921   interface(CONST_INTER);
3922 %}
3923 
3924 // Scale values for scaled offset addressing modes (up to long but not quad)
3925 operand immIScale()
3926 %{
3927   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3928   match(ConI);
3929 
3930   op_cost(0);
3931   format %{ %}
3932   interface(CONST_INTER);
3933 %}
3934 
3935 // 26 bit signed offset -- for pc-relative branches
3936 operand immI26()
3937 %{
3938   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
3939   match(ConI);
3940 
3941   op_cost(0);
3942   format %{ %}
3943   interface(CONST_INTER);
3944 %}
3945 
3946 // 19 bit signed offset -- for pc-relative loads
3947 operand immI19()
3948 %{
3949   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
3950   match(ConI);
3951 
3952   op_cost(0);
3953   format %{ %}
3954   interface(CONST_INTER);
3955 %}
3956 
3957 // 12 bit unsigned offset -- for base plus immediate loads
3958 operand immIU12()
3959 %{
3960   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
3961   match(ConI);
3962 
3963   op_cost(0);
3964   format %{ %}
3965   interface(CONST_INTER);
3966 %}
3967 
3968 operand immLU12()
3969 %{
3970   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
3971   match(ConL);
3972 
3973   op_cost(0);
3974   format %{ %}
3975   interface(CONST_INTER);
3976 %}
3977 
3978 // Offset for scaled or unscaled immediate loads and stores
3979 operand immIOffset()
3980 %{
3981   predicate(Address::offset_ok_for_immed(n->get_int()));
3982   match(ConI);
3983 
3984   op_cost(0);
3985   format %{ %}
3986   interface(CONST_INTER);
3987 %}
3988 
3989 operand immIOffset4()
3990 %{
3991   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
3992   match(ConI);
3993 
3994   op_cost(0);
3995   format %{ %}
3996   interface(CONST_INTER);
3997 %}
3998 
3999 operand immIOffset8()
4000 %{
4001   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
4002   match(ConI);
4003 
4004   op_cost(0);
4005   format %{ %}
4006   interface(CONST_INTER);
4007 %}
4008 
4009 operand immIOffset16()
4010 %{
4011   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
4012   match(ConI);
4013 
4014   op_cost(0);
4015   format %{ %}
4016   interface(CONST_INTER);
4017 %}
4018 
4019 operand immLoffset()
4020 %{
4021   predicate(Address::offset_ok_for_immed(n->get_long()));
4022   match(ConL);
4023 
4024   op_cost(0);
4025   format %{ %}
4026   interface(CONST_INTER);
4027 %}
4028 
4029 operand immLoffset4()
4030 %{
4031   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
4032   match(ConL);
4033 
4034   op_cost(0);
4035   format %{ %}
4036   interface(CONST_INTER);
4037 %}
4038 
4039 operand immLoffset8()
4040 %{
4041   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
4042   match(ConL);
4043 
4044   op_cost(0);
4045   format %{ %}
4046   interface(CONST_INTER);
4047 %}
4048 
4049 operand immLoffset16()
4050 %{
4051   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
4052   match(ConL);
4053 
4054   op_cost(0);
4055   format %{ %}
4056   interface(CONST_INTER);
4057 %}
4058 
4059 // 32 bit integer valid for add sub immediate
4060 operand immIAddSub()
4061 %{
4062   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4063   match(ConI);
4064   op_cost(0);
4065   format %{ %}
4066   interface(CONST_INTER);
4067 %}
4068 
4069 // 32 bit unsigned integer valid for logical immediate
4070 // TODO -- check this is right when e.g the mask is 0x80000000
4071 operand immILog()
4072 %{
4073   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4074   match(ConI);
4075 
4076   op_cost(0);
4077   format %{ %}
4078   interface(CONST_INTER);
4079 %}
4080 
4081 // Integer operands 64 bit
4082 // 64 bit immediate
4083 operand immL()
4084 %{
4085   match(ConL);
4086 
4087   op_cost(0);
4088   format %{ %}
4089   interface(CONST_INTER);
4090 %}
4091 
4092 // 64 bit zero
4093 operand immL0()
4094 %{
4095   predicate(n->get_long() == 0);
4096   match(ConL);
4097 
4098   op_cost(0);
4099   format %{ %}
4100   interface(CONST_INTER);
4101 %}
4102 
4103 // 64 bit unit increment
4104 operand immL_1()
4105 %{
4106   predicate(n->get_long() == 1);
4107   match(ConL);
4108 
4109   op_cost(0);
4110   format %{ %}
4111   interface(CONST_INTER);
4112 %}
4113 
4114 // 64 bit unit decrement
4115 operand immL_M1()
4116 %{
4117   predicate(n->get_long() == -1);
4118   match(ConL);
4119 
4120   op_cost(0);
4121   format %{ %}
4122   interface(CONST_INTER);
4123 %}
4124 
4125 // 32 bit offset of pc in thread anchor
4126 
4127 operand immL_pc_off()
4128 %{
4129   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4130                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4131   match(ConL);
4132 
4133   op_cost(0);
4134   format %{ %}
4135   interface(CONST_INTER);
4136 %}
4137 
4138 // 64 bit integer valid for add sub immediate
4139 operand immLAddSub()
4140 %{
4141   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4142   match(ConL);
4143   op_cost(0);
4144   format %{ %}
4145   interface(CONST_INTER);
4146 %}
4147 
4148 // 64 bit integer valid for logical immediate
4149 operand immLLog()
4150 %{
4151   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4152   match(ConL);
4153   op_cost(0);
4154   format %{ %}
4155   interface(CONST_INTER);
4156 %}
4157 
4158 // Long Immediate: low 32-bit mask
4159 operand immL_32bits()
4160 %{
4161   predicate(n->get_long() == 0xFFFFFFFFL);
4162   match(ConL);
4163   op_cost(0);
4164   format %{ %}
4165   interface(CONST_INTER);
4166 %}
4167 
4168 // Pointer operands
4169 // Pointer Immediate
4170 operand immP()
4171 %{
4172   match(ConP);
4173 
4174   op_cost(0);
4175   format %{ %}
4176   interface(CONST_INTER);
4177 %}
4178 
4179 // NULL Pointer Immediate
4180 operand immP0()
4181 %{
4182   predicate(n->get_ptr() == 0);
4183   match(ConP);
4184 
4185   op_cost(0);
4186   format %{ %}
4187   interface(CONST_INTER);
4188 %}
4189 
4190 // Pointer Immediate One
4191 // this is used in object initialization (initial object header)
4192 operand immP_1()
4193 %{
4194   predicate(n->get_ptr() == 1);
4195   match(ConP);
4196 
4197   op_cost(0);
4198   format %{ %}
4199   interface(CONST_INTER);
4200 %}
4201 
4202 // Polling Page Pointer Immediate
4203 operand immPollPage()
4204 %{
4205   predicate((address)n->get_ptr() == os::get_polling_page());
4206   match(ConP);
4207 
4208   op_cost(0);
4209   format %{ %}
4210   interface(CONST_INTER);
4211 %}
4212 
4213 // Card Table Byte Map Base
4214 operand immByteMapBase()
4215 %{
4216   // Get base of card map
4217   predicate((jbyte*)n->get_ptr() ==
4218         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
4219   match(ConP);
4220 
4221   op_cost(0);
4222   format %{ %}
4223   interface(CONST_INTER);
4224 %}
4225 
4226 // Pointer Immediate Minus One
4227 // this is used when we want to write the current PC to the thread anchor
4228 operand immP_M1()
4229 %{
4230   predicate(n->get_ptr() == -1);
4231   match(ConP);
4232 
4233   op_cost(0);
4234   format %{ %}
4235   interface(CONST_INTER);
4236 %}
4237 
4238 // Pointer Immediate Minus Two
4239 // this is used when we want to write the current PC to the thread anchor
4240 operand immP_M2()
4241 %{
4242   predicate(n->get_ptr() == -2);
4243   match(ConP);
4244 
4245   op_cost(0);
4246   format %{ %}
4247   interface(CONST_INTER);
4248 %}
4249 
4250 // Float and Double operands
4251 // Double Immediate
4252 operand immD()
4253 %{
4254   match(ConD);
4255   op_cost(0);
4256   format %{ %}
4257   interface(CONST_INTER);
4258 %}
4259 
4260 // constant 'double +0.0'.
4261 operand immD0()
4262 %{
4263   predicate((n->getd() == 0) &&
4264             (fpclassify(n->getd()) == FP_ZERO) && (signbit(n->getd()) == 0));
4265   match(ConD);
4266   op_cost(0);
4267   format %{ %}
4268   interface(CONST_INTER);
4269 %}
4270 
4271 // constant 'double +0.0'.
4272 operand immDPacked()
4273 %{
4274   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4275   match(ConD);
4276   op_cost(0);
4277   format %{ %}
4278   interface(CONST_INTER);
4279 %}
4280 
4281 // Float Immediate
4282 operand immF()
4283 %{
4284   match(ConF);
4285   op_cost(0);
4286   format %{ %}
4287   interface(CONST_INTER);
4288 %}
4289 
4290 // constant 'float +0.0'.
4291 operand immF0()
4292 %{
4293   predicate((n->getf() == 0) &&
4294             (fpclassify(n->getf()) == FP_ZERO) && (signbit(n->getf()) == 0));
4295   match(ConF);
4296   op_cost(0);
4297   format %{ %}
4298   interface(CONST_INTER);
4299 %}
4300 
4301 // 
4302 operand immFPacked()
4303 %{
4304   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4305   match(ConF);
4306   op_cost(0);
4307   format %{ %}
4308   interface(CONST_INTER);
4309 %}
4310 
4311 // Narrow pointer operands
4312 // Narrow Pointer Immediate
4313 operand immN()
4314 %{
4315   match(ConN);
4316 
4317   op_cost(0);
4318   format %{ %}
4319   interface(CONST_INTER);
4320 %}
4321 
4322 // Narrow NULL Pointer Immediate
4323 operand immN0()
4324 %{
4325   predicate(n->get_narrowcon() == 0);
4326   match(ConN);
4327 
4328   op_cost(0);
4329   format %{ %}
4330   interface(CONST_INTER);
4331 %}
4332 
4333 operand immNKlass()
4334 %{
4335   match(ConNKlass);
4336 
4337   op_cost(0);
4338   format %{ %}
4339   interface(CONST_INTER);
4340 %}
4341 
4342 // Integer 32 bit Register Operands
4343 // Integer 32 bitRegister (excludes SP)
4344 operand iRegI()
4345 %{
4346   constraint(ALLOC_IN_RC(any_reg32));
4347   match(RegI);
4348   match(iRegINoSp);
4349   op_cost(0);
4350   format %{ %}
4351   interface(REG_INTER);
4352 %}
4353 
4354 // Integer 32 bit Register not Special
4355 operand iRegINoSp()
4356 %{
4357   constraint(ALLOC_IN_RC(no_special_reg32));
4358   match(RegI);
4359   op_cost(0);
4360   format %{ %}
4361   interface(REG_INTER);
4362 %}
4363 
4364 // Integer 64 bit Register Operands
4365 // Integer 64 bit Register (includes SP)
4366 operand iRegL()
4367 %{
4368   constraint(ALLOC_IN_RC(any_reg));
4369   match(RegL);
4370   match(iRegLNoSp);
4371   op_cost(0);
4372   format %{ %}
4373   interface(REG_INTER);
4374 %}
4375 
4376 // Integer 64 bit Register not Special
4377 operand iRegLNoSp()
4378 %{
4379   constraint(ALLOC_IN_RC(no_special_reg));
4380   match(RegL);
4381   format %{ %}
4382   interface(REG_INTER);
4383 %}
4384 
4385 // Pointer Register Operands
4386 // Pointer Register
4387 operand iRegP()
4388 %{
4389   constraint(ALLOC_IN_RC(ptr_reg));
4390   match(RegP);
4391   match(iRegPNoSp);
4392   match(iRegP_R0);
4393   //match(iRegP_R2);
4394   //match(iRegP_R4);
4395   //match(iRegP_R5);
4396   match(thread_RegP);
4397   op_cost(0);
4398   format %{ %}
4399   interface(REG_INTER);
4400 %}
4401 
4402 // Pointer 64 bit Register not Special
4403 operand iRegPNoSp()
4404 %{
4405   constraint(ALLOC_IN_RC(no_special_ptr_reg));
4406   match(RegP);
4407   // match(iRegP);
4408   // match(iRegP_R0);
4409   // match(iRegP_R2);
4410   // match(iRegP_R4);
4411   // match(iRegP_R5);
4412   // match(thread_RegP);
4413   op_cost(0);
4414   format %{ %}
4415   interface(REG_INTER);
4416 %}
4417 
4418 // Pointer 64 bit Register R0 only
4419 operand iRegP_R0()
4420 %{
4421   constraint(ALLOC_IN_RC(r0_reg));
4422   match(RegP);
4423   // match(iRegP);
4424   match(iRegPNoSp);
4425   op_cost(0);
4426   format %{ %}
4427   interface(REG_INTER);
4428 %}
4429 
4430 // Pointer 64 bit Register R1 only
4431 operand iRegP_R1()
4432 %{
4433   constraint(ALLOC_IN_RC(r1_reg));
4434   match(RegP);
4435   // match(iRegP);
4436   match(iRegPNoSp);
4437   op_cost(0);
4438   format %{ %}
4439   interface(REG_INTER);
4440 %}
4441 
4442 // Pointer 64 bit Register R2 only
4443 operand iRegP_R2()
4444 %{
4445   constraint(ALLOC_IN_RC(r2_reg));
4446   match(RegP);
4447   // match(iRegP);
4448   match(iRegPNoSp);
4449   op_cost(0);
4450   format %{ %}
4451   interface(REG_INTER);
4452 %}
4453 
4454 // Pointer 64 bit Register R3 only
4455 operand iRegP_R3()
4456 %{
4457   constraint(ALLOC_IN_RC(r3_reg));
4458   match(RegP);
4459   // match(iRegP);
4460   match(iRegPNoSp);
4461   op_cost(0);
4462   format %{ %}
4463   interface(REG_INTER);
4464 %}
4465 
4466 // Pointer 64 bit Register R4 only
4467 operand iRegP_R4()
4468 %{
4469   constraint(ALLOC_IN_RC(r4_reg));
4470   match(RegP);
4471   // match(iRegP);
4472   match(iRegPNoSp);
4473   op_cost(0);
4474   format %{ %}
4475   interface(REG_INTER);
4476 %}
4477 
4478 // Pointer 64 bit Register R5 only
4479 operand iRegP_R5()
4480 %{
4481   constraint(ALLOC_IN_RC(r5_reg));
4482   match(RegP);
4483   // match(iRegP);
4484   match(iRegPNoSp);
4485   op_cost(0);
4486   format %{ %}
4487   interface(REG_INTER);
4488 %}
4489 
4490 // Pointer 64 bit Register R10 only
4491 operand iRegP_R10()
4492 %{
4493   constraint(ALLOC_IN_RC(r10_reg));
4494   match(RegP);
4495   // match(iRegP);
4496   match(iRegPNoSp);
4497   op_cost(0);
4498   format %{ %}
4499   interface(REG_INTER);
4500 %}
4501 
4502 // Long 64 bit Register R11 only
4503 operand iRegL_R11()
4504 %{
4505   constraint(ALLOC_IN_RC(r11_reg));
4506   match(RegL);
4507   match(iRegLNoSp);
4508   op_cost(0);
4509   format %{ %}
4510   interface(REG_INTER);
4511 %}
4512 
4513 // Pointer 64 bit Register FP only
4514 operand iRegP_FP()
4515 %{
4516   constraint(ALLOC_IN_RC(fp_reg));
4517   match(RegP);
4518   // match(iRegP);
4519   op_cost(0);
4520   format %{ %}
4521   interface(REG_INTER);
4522 %}
4523 
4524 // Register R0 only
4525 operand iRegI_R0()
4526 %{
4527   constraint(ALLOC_IN_RC(int_r0_reg));
4528   match(RegI);
4529   match(iRegINoSp);
4530   op_cost(0);
4531   format %{ %}
4532   interface(REG_INTER);
4533 %}
4534 
4535 // Register R2 only
4536 operand iRegI_R2()
4537 %{
4538   constraint(ALLOC_IN_RC(int_r2_reg));
4539   match(RegI);
4540   match(iRegINoSp);
4541   op_cost(0);
4542   format %{ %}
4543   interface(REG_INTER);
4544 %}
4545 
4546 // Register R3 only
4547 operand iRegI_R3()
4548 %{
4549   constraint(ALLOC_IN_RC(int_r3_reg));
4550   match(RegI);
4551   match(iRegINoSp);
4552   op_cost(0);
4553   format %{ %}
4554   interface(REG_INTER);
4555 %}
4556 
4557 
4558 // Register R2 only
4559 operand iRegI_R4()
4560 %{
4561   constraint(ALLOC_IN_RC(int_r4_reg));
4562   match(RegI);
4563   match(iRegINoSp);
4564   op_cost(0);
4565   format %{ %}
4566   interface(REG_INTER);
4567 %}
4568 
4569 
4570 // Pointer Register Operands
4571 // Narrow Pointer Register
4572 operand iRegN()
4573 %{
4574   constraint(ALLOC_IN_RC(any_reg32));
4575   match(RegN);
4576   match(iRegNNoSp);
4577   op_cost(0);
4578   format %{ %}
4579   interface(REG_INTER);
4580 %}
4581 
4582 // Integer 64 bit Register not Special
4583 operand iRegNNoSp()
4584 %{
4585   constraint(ALLOC_IN_RC(no_special_reg32));
4586   match(RegN);
4587   op_cost(0);
4588   format %{ %}
4589   interface(REG_INTER);
4590 %}
4591 
4592 // heap base register -- used for encoding immN0
4593 
4594 operand iRegIHeapbase()
4595 %{
4596   constraint(ALLOC_IN_RC(heapbase_reg));
4597   match(RegI);
4598   op_cost(0);
4599   format %{ %}
4600   interface(REG_INTER);
4601 %}
4602 
4603 // Float Register
4604 // Float register operands
4605 operand vRegF()
4606 %{
4607   constraint(ALLOC_IN_RC(float_reg));
4608   match(RegF);
4609 
4610   op_cost(0);
4611   format %{ %}
4612   interface(REG_INTER);
4613 %}
4614 
4615 // Double Register
4616 // Double register operands
4617 operand vRegD()
4618 %{
4619   constraint(ALLOC_IN_RC(double_reg));
4620   match(RegD);
4621 
4622   op_cost(0);
4623   format %{ %}
4624   interface(REG_INTER);
4625 %}
4626 
4627 operand vecD()
4628 %{
4629   constraint(ALLOC_IN_RC(vectord_reg));
4630   match(VecD);
4631 
4632   op_cost(0);
4633   format %{ %}
4634   interface(REG_INTER);
4635 %}
4636 
4637 operand vecX()
4638 %{
4639   constraint(ALLOC_IN_RC(vectorx_reg));
4640   match(VecX);
4641 
4642   op_cost(0);
4643   format %{ %}
4644   interface(REG_INTER);
4645 %}
4646 
4647 operand vRegD_V0()
4648 %{
4649   constraint(ALLOC_IN_RC(v0_reg));
4650   match(RegD);
4651   op_cost(0);
4652   format %{ %}
4653   interface(REG_INTER);
4654 %}
4655 
4656 operand vRegD_V1()
4657 %{
4658   constraint(ALLOC_IN_RC(v1_reg));
4659   match(RegD);
4660   op_cost(0);
4661   format %{ %}
4662   interface(REG_INTER);
4663 %}
4664 
4665 operand vRegD_V2()
4666 %{
4667   constraint(ALLOC_IN_RC(v2_reg));
4668   match(RegD);
4669   op_cost(0);
4670   format %{ %}
4671   interface(REG_INTER);
4672 %}
4673 
4674 operand vRegD_V3()
4675 %{
4676   constraint(ALLOC_IN_RC(v3_reg));
4677   match(RegD);
4678   op_cost(0);
4679   format %{ %}
4680   interface(REG_INTER);
4681 %}
4682 
4683 // Flags register, used as output of signed compare instructions
4684 
4685 // note that on AArch64 we also use this register as the output for
4686 // for floating point compare instructions (CmpF CmpD). this ensures
4687 // that ordered inequality tests use GT, GE, LT or LE none of which
4688 // pass through cases where the result is unordered i.e. one or both
4689 // inputs to the compare is a NaN. this means that the ideal code can
4690 // replace e.g. a GT with an LE and not end up capturing the NaN case
4691 // (where the comparison should always fail). EQ and NE tests are
4692 // always generated in ideal code so that unordered folds into the NE
4693 // case, matching the behaviour of AArch64 NE.
4694 //
4695 // This differs from x86 where the outputs of FP compares use a
4696 // special FP flags registers and where compares based on this
4697 // register are distinguished into ordered inequalities (cmpOpUCF) and
4698 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
4699 // to explicitly handle the unordered case in branches. x86 also has
4700 // to include extra CMoveX rules to accept a cmpOpUCF input.
4701 
4702 operand rFlagsReg()
4703 %{
4704   constraint(ALLOC_IN_RC(int_flags));
4705   match(RegFlags);
4706 
4707   op_cost(0);
4708   format %{ "RFLAGS" %}
4709   interface(REG_INTER);
4710 %}
4711 
4712 // Flags register, used as output of unsigned compare instructions
4713 operand rFlagsRegU()
4714 %{
4715   constraint(ALLOC_IN_RC(int_flags));
4716   match(RegFlags);
4717 
4718   op_cost(0);
4719   format %{ "RFLAGSU" %}
4720   interface(REG_INTER);
4721 %}
4722 
4723 // Special Registers
4724 
4725 // Method Register
4726 operand inline_cache_RegP(iRegP reg)
4727 %{
4728   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
4729   match(reg);
4730   match(iRegPNoSp);
4731   op_cost(0);
4732   format %{ %}
4733   interface(REG_INTER);
4734 %}
4735 
4736 operand interpreter_method_oop_RegP(iRegP reg)
4737 %{
4738   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
4739   match(reg);
4740   match(iRegPNoSp);
4741   op_cost(0);
4742   format %{ %}
4743   interface(REG_INTER);
4744 %}
4745 
4746 // Thread Register
4747 operand thread_RegP(iRegP reg)
4748 %{
4749   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
4750   match(reg);
4751   op_cost(0);
4752   format %{ %}
4753   interface(REG_INTER);
4754 %}
4755 
4756 operand lr_RegP(iRegP reg)
4757 %{
4758   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
4759   match(reg);
4760   op_cost(0);
4761   format %{ %}
4762   interface(REG_INTER);
4763 %}
4764 
4765 //----------Memory Operands----------------------------------------------------
4766 
4767 operand indirect(iRegP reg)
4768 %{
4769   constraint(ALLOC_IN_RC(ptr_reg));
4770   match(reg);
4771   op_cost(0);
4772   format %{ "[$reg]" %}
4773   interface(MEMORY_INTER) %{
4774     base($reg);
4775     index(0xffffffff);
4776     scale(0x0);
4777     disp(0x0);
4778   %}
4779 %}
4780 
4781 operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
4782 %{
4783   predicate(size_fits_all_mem_uses(n->as_AddP(),
4784                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
4785   constraint(ALLOC_IN_RC(ptr_reg));
4786   match(AddP (AddP reg (LShiftL lreg scale)) off);
4787   op_cost(INSN_COST);
4788   format %{ "$reg, $lreg lsl($scale), $off" %}
4789   interface(MEMORY_INTER) %{
4790     base($reg);
4791     index($lreg);
4792     scale($scale);
4793     disp($off);
4794   %}
4795 %}
4796 
4797 operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
4798 %{
4799   predicate(size_fits_all_mem_uses(n->as_AddP(),
4800                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
4801   constraint(ALLOC_IN_RC(ptr_reg));
4802   match(AddP (AddP reg (LShiftL lreg scale)) off);
4803   op_cost(INSN_COST);
4804   format %{ "$reg, $lreg lsl($scale), $off" %}
4805   interface(MEMORY_INTER) %{
4806     base($reg);
4807     index($lreg);
4808     scale($scale);
4809     disp($off);
4810   %}
4811 %}
4812 
4813 operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
4814 %{
4815   constraint(ALLOC_IN_RC(ptr_reg));
4816   match(AddP (AddP reg (ConvI2L ireg)) off);
4817   op_cost(INSN_COST);
4818   format %{ "$reg, $ireg, $off I2L" %}
4819   interface(MEMORY_INTER) %{
4820     base($reg);
4821     index($ireg);
4822     scale(0x0);
4823     disp($off);
4824   %}
4825 %}
4826 
4827 operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
4828 %{
4829   predicate(size_fits_all_mem_uses(n->as_AddP(),
4830                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
4831   constraint(ALLOC_IN_RC(ptr_reg));
4832   match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
4833   op_cost(INSN_COST);
4834   format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
4835   interface(MEMORY_INTER) %{
4836     base($reg);
4837     index($ireg);
4838     scale($scale);
4839     disp($off);
4840   %}
4841 %}
4842 
4843 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
4844 %{
4845   predicate(size_fits_all_mem_uses(n->as_AddP(),
4846                                    n->in(AddPNode::Offset)->in(2)->get_int()));
4847   constraint(ALLOC_IN_RC(ptr_reg));
4848   match(AddP reg (LShiftL (ConvI2L ireg) scale));
4849   op_cost(0);
4850   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
4851   interface(MEMORY_INTER) %{
4852     base($reg);
4853     index($ireg);
4854     scale($scale);
4855     disp(0x0);
4856   %}
4857 %}
4858 
4859 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
4860 %{
4861   predicate(size_fits_all_mem_uses(n->as_AddP(),
4862                                    n->in(AddPNode::Offset)->in(2)->get_int()));
4863   constraint(ALLOC_IN_RC(ptr_reg));
4864   match(AddP reg (LShiftL lreg scale));
4865   op_cost(0);
4866   format %{ "$reg, $lreg lsl($scale)" %}
4867   interface(MEMORY_INTER) %{
4868     base($reg);
4869     index($lreg);
4870     scale($scale);
4871     disp(0x0);
4872   %}
4873 %}
4874 
4875 operand indIndex(iRegP reg, iRegL lreg)
4876 %{
4877   constraint(ALLOC_IN_RC(ptr_reg));
4878   match(AddP reg lreg);
4879   op_cost(0);
4880   format %{ "$reg, $lreg" %}
4881   interface(MEMORY_INTER) %{
4882     base($reg);
4883     index($lreg);
4884     scale(0x0);
4885     disp(0x0);
4886   %}
4887 %}
4888 
4889 operand indOffI(iRegP reg, immIOffset off)
4890 %{
4891   constraint(ALLOC_IN_RC(ptr_reg));
4892   match(AddP reg off);
4893   op_cost(0);
4894   format %{ "[$reg, $off]" %}
4895   interface(MEMORY_INTER) %{
4896     base($reg);
4897     index(0xffffffff);
4898     scale(0x0);
4899     disp($off);
4900   %}
4901 %}
4902 
4903 operand indOffI4(iRegP reg, immIOffset4 off)
4904 %{
4905   constraint(ALLOC_IN_RC(ptr_reg));
4906   match(AddP reg off);
4907   op_cost(0);
4908   format %{ "[$reg, $off]" %}
4909   interface(MEMORY_INTER) %{
4910     base($reg);
4911     index(0xffffffff);
4912     scale(0x0);
4913     disp($off);
4914   %}
4915 %}
4916 
4917 operand indOffI8(iRegP reg, immIOffset8 off)
4918 %{
4919   constraint(ALLOC_IN_RC(ptr_reg));
4920   match(AddP reg off);
4921   op_cost(0);
4922   format %{ "[$reg, $off]" %}
4923   interface(MEMORY_INTER) %{
4924     base($reg);
4925     index(0xffffffff);
4926     scale(0x0);
4927     disp($off);
4928   %}
4929 %}
4930 
4931 operand indOffI16(iRegP reg, immIOffset16 off)
4932 %{
4933   constraint(ALLOC_IN_RC(ptr_reg));
4934   match(AddP reg off);
4935   op_cost(0);
4936   format %{ "[$reg, $off]" %}
4937   interface(MEMORY_INTER) %{
4938     base($reg);
4939     index(0xffffffff);
4940     scale(0x0);
4941     disp($off);
4942   %}
4943 %}
4944 
4945 operand indOffL(iRegP reg, immLoffset off)
4946 %{
4947   constraint(ALLOC_IN_RC(ptr_reg));
4948   match(AddP reg off);
4949   op_cost(0);
4950   format %{ "[$reg, $off]" %}
4951   interface(MEMORY_INTER) %{
4952     base($reg);
4953     index(0xffffffff);
4954     scale(0x0);
4955     disp($off);
4956   %}
4957 %}
4958 
4959 operand indOffL4(iRegP reg, immLoffset4 off)
4960 %{
4961   constraint(ALLOC_IN_RC(ptr_reg));
4962   match(AddP reg off);
4963   op_cost(0);
4964   format %{ "[$reg, $off]" %}
4965   interface(MEMORY_INTER) %{
4966     base($reg);
4967     index(0xffffffff);
4968     scale(0x0);
4969     disp($off);
4970   %}
4971 %}
4972 
4973 operand indOffL8(iRegP reg, immLoffset8 off)
4974 %{
4975   constraint(ALLOC_IN_RC(ptr_reg));
4976   match(AddP reg off);
4977   op_cost(0);
4978   format %{ "[$reg, $off]" %}
4979   interface(MEMORY_INTER) %{
4980     base($reg);
4981     index(0xffffffff);
4982     scale(0x0);
4983     disp($off);
4984   %}
4985 %}
4986 
4987 operand indOffL16(iRegP reg, immLoffset16 off)
4988 %{
4989   constraint(ALLOC_IN_RC(ptr_reg));
4990   match(AddP reg off);
4991   op_cost(0);
4992   format %{ "[$reg, $off]" %}
4993   interface(MEMORY_INTER) %{
4994     base($reg);
4995     index(0xffffffff);
4996     scale(0x0);
4997     disp($off);
4998   %}
4999 %}
5000 
5001 operand indirectN(iRegN reg)
5002 %{
5003   predicate(Universe::narrow_oop_shift() == 0);
5004   constraint(ALLOC_IN_RC(ptr_reg));
5005   match(DecodeN reg);
5006   op_cost(0);
5007   format %{ "[$reg]\t# narrow" %}
5008   interface(MEMORY_INTER) %{
5009     base($reg);
5010     index(0xffffffff);
5011     scale(0x0);
5012     disp(0x0);
5013   %}
5014 %}
5015 
5016 operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
5017 %{
5018   predicate(Universe::narrow_oop_shift() == 0 &&
5019             size_fits_all_mem_uses(n->as_AddP(),
5020                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
5021   constraint(ALLOC_IN_RC(ptr_reg));
5022   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5023   op_cost(0);
5024   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5025   interface(MEMORY_INTER) %{
5026     base($reg);
5027     index($lreg);
5028     scale($scale);
5029     disp($off);
5030   %}
5031 %}
5032 
5033 operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
5034 %{
5035   predicate(Universe::narrow_oop_shift() == 0 &&
5036             size_fits_all_mem_uses(n->as_AddP(),
5037                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
5038   constraint(ALLOC_IN_RC(ptr_reg));
5039   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5040   op_cost(INSN_COST);
5041   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5042   interface(MEMORY_INTER) %{
5043     base($reg);
5044     index($lreg);
5045     scale($scale);
5046     disp($off);
5047   %}
5048 %}
5049 
5050 operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
5051 %{
5052   predicate(Universe::narrow_oop_shift() == 0);
5053   constraint(ALLOC_IN_RC(ptr_reg));
5054   match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
5055   op_cost(INSN_COST);
5056   format %{ "$reg, $ireg, $off I2L\t# narrow" %}
5057   interface(MEMORY_INTER) %{
5058     base($reg);
5059     index($ireg);
5060     scale(0x0);
5061     disp($off);
5062   %}
5063 %}
5064 
5065 operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
5066 %{
5067   predicate(Universe::narrow_oop_shift() == 0 &&
5068             size_fits_all_mem_uses(n->as_AddP(),
5069                                    n->in(AddPNode::Address)->in(AddPNode::Offset)->in(2)->get_int()));
5070   constraint(ALLOC_IN_RC(ptr_reg));
5071   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
5072   op_cost(INSN_COST);
5073   format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
5074   interface(MEMORY_INTER) %{
5075     base($reg);
5076     index($ireg);
5077     scale($scale);
5078     disp($off);
5079   %}
5080 %}
5081 
5082 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
5083 %{
5084   predicate(Universe::narrow_oop_shift() == 0 &&
5085             size_fits_all_mem_uses(n->as_AddP(),
5086                                    n->in(AddPNode::Offset)->in(2)->get_int()));
5087   constraint(ALLOC_IN_RC(ptr_reg));
5088   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
5089   op_cost(0);
5090   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5091   interface(MEMORY_INTER) %{
5092     base($reg);
5093     index($ireg);
5094     scale($scale);
5095     disp(0x0);
5096   %}
5097 %}
5098 
5099 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5100 %{
5101   predicate(Universe::narrow_oop_shift() == 0 &&
5102             size_fits_all_mem_uses(n->as_AddP(),
5103                                    n->in(AddPNode::Offset)->in(2)->get_int()));
5104   constraint(ALLOC_IN_RC(ptr_reg));
5105   match(AddP (DecodeN reg) (LShiftL lreg scale));
5106   op_cost(0);
5107   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5108   interface(MEMORY_INTER) %{
5109     base($reg);
5110     index($lreg);
5111     scale($scale);
5112     disp(0x0);
5113   %}
5114 %}
5115 
5116 operand indIndexN(iRegN reg, iRegL lreg)
5117 %{
5118   predicate(Universe::narrow_oop_shift() == 0);
5119   constraint(ALLOC_IN_RC(ptr_reg));
5120   match(AddP (DecodeN reg) lreg);
5121   op_cost(0);
5122   format %{ "$reg, $lreg\t# narrow" %}
5123   interface(MEMORY_INTER) %{
5124     base($reg);
5125     index($lreg);
5126     scale(0x0);
5127     disp(0x0);
5128   %}
5129 %}
5130 
5131 operand indOffIN(iRegN reg, immIOffset off)
5132 %{
5133   predicate(Universe::narrow_oop_shift() == 0);
5134   constraint(ALLOC_IN_RC(ptr_reg));
5135   match(AddP (DecodeN reg) off);
5136   op_cost(0);
5137   format %{ "[$reg, $off]\t# narrow" %}
5138   interface(MEMORY_INTER) %{
5139     base($reg);
5140     index(0xffffffff);
5141     scale(0x0);
5142     disp($off);
5143   %}
5144 %}
5145 
5146 operand indOffLN(iRegN reg, immLoffset off)
5147 %{
5148   predicate(Universe::narrow_oop_shift() == 0);
5149   constraint(ALLOC_IN_RC(ptr_reg));
5150   match(AddP (DecodeN reg) off);
5151   op_cost(0);
5152   format %{ "[$reg, $off]\t# narrow" %}
5153   interface(MEMORY_INTER) %{
5154     base($reg);
5155     index(0xffffffff);
5156     scale(0x0);
5157     disp($off);
5158   %}
5159 %}
5160 
5161 
5162 
5163 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5164 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5165 %{
5166   constraint(ALLOC_IN_RC(ptr_reg));
5167   match(AddP reg off);
5168   op_cost(0);
5169   format %{ "[$reg, $off]" %}
5170   interface(MEMORY_INTER) %{
5171     base($reg);
5172     index(0xffffffff);
5173     scale(0x0);
5174     disp($off);
5175   %}
5176 %}
5177 
5178 //----------Special Memory Operands--------------------------------------------
5179 // Stack Slot Operand - This operand is used for loading and storing temporary
5180 //                      values on the stack where a match requires a value to
5181 //                      flow through memory.
5182 operand stackSlotP(sRegP reg)
5183 %{
5184   constraint(ALLOC_IN_RC(stack_slots));
5185   op_cost(100);
5186   // No match rule because this operand is only generated in matching
5187   // match(RegP);
5188   format %{ "[$reg]" %}
5189   interface(MEMORY_INTER) %{
5190     base(0x1e);  // RSP
5191     index(0x0);  // No Index
5192     scale(0x0);  // No Scale
5193     disp($reg);  // Stack Offset
5194   %}
5195 %}
5196 
5197 operand stackSlotI(sRegI reg)
5198 %{
5199   constraint(ALLOC_IN_RC(stack_slots));
5200   // No match rule because this operand is only generated in matching
5201   // match(RegI);
5202   format %{ "[$reg]" %}
5203   interface(MEMORY_INTER) %{
5204     base(0x1e);  // RSP
5205     index(0x0);  // No Index
5206     scale(0x0);  // No Scale
5207     disp($reg);  // Stack Offset
5208   %}
5209 %}
5210 
5211 operand stackSlotF(sRegF reg)
5212 %{
5213   constraint(ALLOC_IN_RC(stack_slots));
5214   // No match rule because this operand is only generated in matching
5215   // match(RegF);
5216   format %{ "[$reg]" %}
5217   interface(MEMORY_INTER) %{
5218     base(0x1e);  // RSP
5219     index(0x0);  // No Index
5220     scale(0x0);  // No Scale
5221     disp($reg);  // Stack Offset
5222   %}
5223 %}
5224 
5225 operand stackSlotD(sRegD reg)
5226 %{
5227   constraint(ALLOC_IN_RC(stack_slots));
5228   // No match rule because this operand is only generated in matching
5229   // match(RegD);
5230   format %{ "[$reg]" %}
5231   interface(MEMORY_INTER) %{
5232     base(0x1e);  // RSP
5233     index(0x0);  // No Index
5234     scale(0x0);  // No Scale
5235     disp($reg);  // Stack Offset
5236   %}
5237 %}
5238 
5239 operand stackSlotL(sRegL reg)
5240 %{
5241   constraint(ALLOC_IN_RC(stack_slots));
5242   // No match rule because this operand is only generated in matching
5243   // match(RegL);
5244   format %{ "[$reg]" %}
5245   interface(MEMORY_INTER) %{
5246     base(0x1e);  // RSP
5247     index(0x0);  // No Index
5248     scale(0x0);  // No Scale
5249     disp($reg);  // Stack Offset
5250   %}
5251 %}
5252 
5253 // Operands for expressing Control Flow
5254 // NOTE: Label is a predefined operand which should not be redefined in
5255 //       the AD file. It is generically handled within the ADLC.
5256 
5257 //----------Conditional Branch Operands----------------------------------------
5258 // Comparison Op  - This is the operation of the comparison, and is limited to
5259 //                  the following set of codes:
5260 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5261 //
5262 // Other attributes of the comparison, such as unsignedness, are specified
5263 // by the comparison instruction that sets a condition code flags register.
5264 // That result is represented by a flags operand whose subtype is appropriate
5265 // to the unsignedness (etc.) of the comparison.
5266 //
5267 // Later, the instruction which matches both the Comparison Op (a Bool) and
5268 // the flags (produced by the Cmp) specifies the coding of the comparison op
5269 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5270 
5271 // used for signed integral comparisons and fp comparisons
5272 
5273 operand cmpOp()
5274 %{
5275   match(Bool);
5276 
5277   format %{ "" %}
5278   interface(COND_INTER) %{
5279     equal(0x0, "eq");
5280     not_equal(0x1, "ne");
5281     less(0xb, "lt");
5282     greater_equal(0xa, "ge");
5283     less_equal(0xd, "le");
5284     greater(0xc, "gt");
5285     overflow(0x6, "vs");
5286     no_overflow(0x7, "vc");
5287   %}
5288 %}
5289 
5290 // used for unsigned integral comparisons
5291 
5292 operand cmpOpU()
5293 %{
5294   match(Bool);
5295 
5296   format %{ "" %}
5297   interface(COND_INTER) %{
5298     equal(0x0, "eq");
5299     not_equal(0x1, "ne");
5300     less(0x3, "lo");
5301     greater_equal(0x2, "hs");
5302     less_equal(0x9, "ls");
5303     greater(0x8, "hi");
5304     overflow(0x6, "vs");
5305     no_overflow(0x7, "vc");
5306   %}
5307 %}
5308 
5309 // Special operand allowing long args to int ops to be truncated for free
5310 
5311 operand iRegL2I(iRegL reg) %{
5312 
5313   op_cost(0);
5314 
5315   match(ConvL2I reg);
5316 
5317   format %{ "l2i($reg)" %}
5318 
5319   interface(REG_INTER)
5320 %}
5321 
5322 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
5323 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
5324 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
5325 
5326 //----------OPERAND CLASSES----------------------------------------------------
5327 // Operand Classes are groups of operands that are used as to simplify
5328 // instruction definitions by not requiring the AD writer to specify
5329 // separate instructions for every form of operand when the
5330 // instruction accepts multiple operand types with the same basic
5331 // encoding and format. The classic case of this is memory operands.
5332 
5333 // memory is used to define read/write location for load/store
5334 // instruction defs. we can turn a memory op into an Address
5335 
5336 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
5337                indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
5338  
5339  // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5340 
5341 
5342 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5343 // operations. it allows the src to be either an iRegI or a (ConvL2I
5344 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5345 // can be elided because the 32-bit instruction will just employ the
5346 // lower 32 bits anyway.
5347 //
5348 // n.b. this does not elide all L2I conversions. if the truncated
5349 // value is consumed by more than one operation then the ConvL2I
5350 // cannot be bundled into the consuming nodes so an l2i gets planted
5351 // (actually a movw $dst $src) and the downstream instructions consume
5352 // the result of the l2i as an iRegI input. That's a shame since the
5353 // movw is actually redundant but its not too costly.
5354 
5355 opclass iRegIorL2I(iRegI, iRegL2I);
5356 
5357 //----------PIPELINE-----------------------------------------------------------
5358 // Rules which define the behavior of the target architectures pipeline.
5359 
5360 // For specific pipelines, eg A53, define the stages of that pipeline
5361 //pipe_desc(ISS, EX1, EX2, WR);
5362 #define ISS S0
5363 #define EX1 S1
5364 #define EX2 S2
5365 #define WR  S3
5366 
5367 // Integer ALU reg operation
5368 pipeline %{
5369 
5370 attributes %{
5371   // ARM instructions are of fixed length
5372   fixed_size_instructions;        // Fixed size instructions TODO does
5373   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5374   // ARM instructions come in 32-bit word units
5375   instruction_unit_size = 4;         // An instruction is 4 bytes long
5376   instruction_fetch_unit_size = 64;  // The processor fetches one line
5377   instruction_fetch_units = 1;       // of 64 bytes
5378 
5379   // List of nop instructions
5380   nops( MachNop );
5381 %}
5382 
5383 // We don't use an actual pipeline model so don't care about resources
5384 // or description. we do use pipeline classes to introduce fixed
5385 // latencies
5386 
5387 //----------RESOURCES----------------------------------------------------------
5388 // Resources are the functional units available to the machine
5389 
5390 resources( INS0, INS1, INS01 = INS0 | INS1,
5391            ALU0, ALU1, ALU = ALU0 | ALU1,
5392            MAC,
5393            DIV,
5394            BRANCH,
5395            LDST,
5396            NEON_FP);
5397 
5398 //----------PIPELINE DESCRIPTION-----------------------------------------------
5399 // Pipeline Description specifies the stages in the machine's pipeline
5400 
5401 // Define the pipeline as a generic 6 stage pipeline
5402 pipe_desc(S0, S1, S2, S3, S4, S5);
5403 
5404 //----------PIPELINE CLASSES---------------------------------------------------
5405 // Pipeline Classes describe the stages in which input and output are
5406 // referenced by the hardware pipeline.
5407 
5408 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
5409 %{
5410   single_instruction;
5411   src1   : S1(read);
5412   src2   : S2(read);
5413   dst    : S5(write);
5414   INS01  : ISS;
5415   NEON_FP : S5;
5416 %}
5417 
5418 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
5419 %{
5420   single_instruction;
5421   src1   : S1(read);
5422   src2   : S2(read);
5423   dst    : S5(write);
5424   INS01  : ISS;
5425   NEON_FP : S5;
5426 %}
5427 
5428 pipe_class fp_uop_s(vRegF dst, vRegF src)
5429 %{
5430   single_instruction;
5431   src    : S1(read);
5432   dst    : S5(write);
5433   INS01  : ISS;
5434   NEON_FP : S5;
5435 %}
5436 
5437 pipe_class fp_uop_d(vRegD dst, vRegD src)
5438 %{
5439   single_instruction;
5440   src    : S1(read);
5441   dst    : S5(write);
5442   INS01  : ISS;
5443   NEON_FP : S5;
5444 %}
5445 
5446 pipe_class fp_d2f(vRegF dst, vRegD src)
5447 %{
5448   single_instruction;
5449   src    : S1(read);
5450   dst    : S5(write);
5451   INS01  : ISS;
5452   NEON_FP : S5;
5453 %}
5454 
5455 pipe_class fp_f2d(vRegD dst, vRegF src)
5456 %{
5457   single_instruction;
5458   src    : S1(read);
5459   dst    : S5(write);
5460   INS01  : ISS;
5461   NEON_FP : S5;
5462 %}
5463 
5464 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
5465 %{
5466   single_instruction;
5467   src    : S1(read);
5468   dst    : S5(write);
5469   INS01  : ISS;
5470   NEON_FP : S5;
5471 %}
5472 
5473 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
5474 %{
5475   single_instruction;
5476   src    : S1(read);
5477   dst    : S5(write);
5478   INS01  : ISS;
5479   NEON_FP : S5;
5480 %}
5481 
5482 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
5483 %{
5484   single_instruction;
5485   src    : S1(read);
5486   dst    : S5(write);
5487   INS01  : ISS;
5488   NEON_FP : S5;
5489 %}
5490 
5491 pipe_class fp_l2f(vRegF dst, iRegL src)
5492 %{
5493   single_instruction;
5494   src    : S1(read);
5495   dst    : S5(write);
5496   INS01  : ISS;
5497   NEON_FP : S5;
5498 %}
5499 
5500 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
5501 %{
5502   single_instruction;
5503   src    : S1(read);
5504   dst    : S5(write);
5505   INS01  : ISS;
5506   NEON_FP : S5;
5507 %}
5508 
5509 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
5510 %{
5511   single_instruction;
5512   src    : S1(read);
5513   dst    : S5(write);
5514   INS01  : ISS;
5515   NEON_FP : S5;
5516 %}
5517 
5518 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
5519 %{
5520   single_instruction;
5521   src    : S1(read);
5522   dst    : S5(write);
5523   INS01  : ISS;
5524   NEON_FP : S5;
5525 %}
5526 
5527 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
5528 %{
5529   single_instruction;
5530   src    : S1(read);
5531   dst    : S5(write);
5532   INS01  : ISS;
5533   NEON_FP : S5;
5534 %}
5535 
5536 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
5537 %{
5538   single_instruction;
5539   src1   : S1(read);
5540   src2   : S2(read);
5541   dst    : S5(write);
5542   INS0   : ISS;
5543   NEON_FP : S5;
5544 %}
5545 
5546 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
5547 %{
5548   single_instruction;
5549   src1   : S1(read);
5550   src2   : S2(read);
5551   dst    : S5(write);
5552   INS0   : ISS;
5553   NEON_FP : S5;
5554 %}
5555 
5556 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
5557 %{
5558   single_instruction;
5559   cr     : S1(read);
5560   src1   : S1(read);
5561   src2   : S1(read);
5562   dst    : S3(write);
5563   INS01  : ISS;
5564   NEON_FP : S3;
5565 %}
5566 
5567 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
5568 %{
5569   single_instruction;
5570   cr     : S1(read);
5571   src1   : S1(read);
5572   src2   : S1(read);
5573   dst    : S3(write);
5574   INS01  : ISS;
5575   NEON_FP : S3;
5576 %}
5577 
5578 pipe_class fp_imm_s(vRegF dst)
5579 %{
5580   single_instruction;
5581   dst    : S3(write);
5582   INS01  : ISS;
5583   NEON_FP : S3;
5584 %}
5585 
5586 pipe_class fp_imm_d(vRegD dst)
5587 %{
5588   single_instruction;
5589   dst    : S3(write);
5590   INS01  : ISS;
5591   NEON_FP : S3;
5592 %}
5593 
5594 pipe_class fp_load_constant_s(vRegF dst)
5595 %{
5596   single_instruction;
5597   dst    : S4(write);
5598   INS01  : ISS;
5599   NEON_FP : S4;
5600 %}
5601 
5602 pipe_class fp_load_constant_d(vRegD dst)
5603 %{
5604   single_instruction;
5605   dst    : S4(write);
5606   INS01  : ISS;
5607   NEON_FP : S4;
5608 %}
5609 
5610 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
5611 %{
5612   single_instruction;
5613   dst    : S5(write);
5614   src1   : S1(read);
5615   src2   : S1(read);
5616   INS01  : ISS;
5617   NEON_FP : S5;
5618 %}
5619 
5620 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
5621 %{
5622   single_instruction;
5623   dst    : S5(write);
5624   src1   : S1(read);
5625   src2   : S1(read);
5626   INS0   : ISS;
5627   NEON_FP : S5;
5628 %}
5629 
5630 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
5631 %{
5632   single_instruction;
5633   dst    : S5(write);
5634   src1   : S1(read);
5635   src2   : S1(read);
5636   dst    : S1(read);
5637   INS01  : ISS;
5638   NEON_FP : S5;
5639 %}
5640 
5641 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
5642 %{
5643   single_instruction;
5644   dst    : S5(write);
5645   src1   : S1(read);
5646   src2   : S1(read);
5647   dst    : S1(read);
5648   INS0   : ISS;
5649   NEON_FP : S5;
5650 %}
5651 
5652 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
5653 %{
5654   single_instruction;
5655   dst    : S4(write);
5656   src1   : S2(read);
5657   src2   : S2(read);
5658   INS01  : ISS;
5659   NEON_FP : S4;
5660 %}
5661 
5662 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
5663 %{
5664   single_instruction;
5665   dst    : S4(write);
5666   src1   : S2(read);
5667   src2   : S2(read);
5668   INS0   : ISS;
5669   NEON_FP : S4;
5670 %}
5671 
5672 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
5673 %{
5674   single_instruction;
5675   dst    : S3(write);
5676   src1   : S2(read);
5677   src2   : S2(read);
5678   INS01  : ISS;
5679   NEON_FP : S3;
5680 %}
5681 
5682 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
5683 %{
5684   single_instruction;
5685   dst    : S3(write);
5686   src1   : S2(read);
5687   src2   : S2(read);
5688   INS0   : ISS;
5689   NEON_FP : S3;
5690 %}
5691 
5692 pipe_class vshift64(vecD dst, vecD src, vecX shift)
5693 %{
5694   single_instruction;
5695   dst    : S3(write);
5696   src    : S1(read);
5697   shift  : S1(read);
5698   INS01  : ISS;
5699   NEON_FP : S3;
5700 %}
5701 
5702 pipe_class vshift128(vecX dst, vecX src, vecX shift)
5703 %{
5704   single_instruction;
5705   dst    : S3(write);
5706   src    : S1(read);
5707   shift  : S1(read);
5708   INS0   : ISS;
5709   NEON_FP : S3;
5710 %}
5711 
5712 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
5713 %{
5714   single_instruction;
5715   dst    : S3(write);
5716   src    : S1(read);
5717   INS01  : ISS;
5718   NEON_FP : S3;
5719 %}
5720 
5721 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
5722 %{
5723   single_instruction;
5724   dst    : S3(write);
5725   src    : S1(read);
5726   INS0   : ISS;
5727   NEON_FP : S3;
5728 %}
5729 
5730 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
5731 %{
5732   single_instruction;
5733   dst    : S5(write);
5734   src1   : S1(read);
5735   src2   : S1(read);
5736   INS01  : ISS;
5737   NEON_FP : S5;
5738 %}
5739 
5740 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
5741 %{
5742   single_instruction;
5743   dst    : S5(write);
5744   src1   : S1(read);
5745   src2   : S1(read);
5746   INS0   : ISS;
5747   NEON_FP : S5;
5748 %}
5749 
5750 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
5751 %{
5752   single_instruction;
5753   dst    : S5(write);
5754   src1   : S1(read);
5755   src2   : S1(read);
5756   INS0   : ISS;
5757   NEON_FP : S5;
5758 %}
5759 
5760 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
5761 %{
5762   single_instruction;
5763   dst    : S5(write);
5764   src1   : S1(read);
5765   src2   : S1(read);
5766   INS0   : ISS;
5767   NEON_FP : S5;
5768 %}
5769 
5770 pipe_class vsqrt_fp128(vecX dst, vecX src)
5771 %{
5772   single_instruction;
5773   dst    : S5(write);
5774   src    : S1(read);
5775   INS0   : ISS;
5776   NEON_FP : S5;
5777 %}
5778 
5779 pipe_class vunop_fp64(vecD dst, vecD src)
5780 %{
5781   single_instruction;
5782   dst    : S5(write);
5783   src    : S1(read);
5784   INS01  : ISS;
5785   NEON_FP : S5;
5786 %}
5787 
5788 pipe_class vunop_fp128(vecX dst, vecX src)
5789 %{
5790   single_instruction;
5791   dst    : S5(write);
5792   src    : S1(read);
5793   INS0   : ISS;
5794   NEON_FP : S5;
5795 %}
5796 
5797 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
5798 %{
5799   single_instruction;
5800   dst    : S3(write);
5801   src    : S1(read);
5802   INS01  : ISS;
5803   NEON_FP : S3;
5804 %}
5805 
5806 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
5807 %{
5808   single_instruction;
5809   dst    : S3(write);
5810   src    : S1(read);
5811   INS01  : ISS;
5812   NEON_FP : S3;
5813 %}
5814 
5815 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
5816 %{
5817   single_instruction;
5818   dst    : S3(write);
5819   src    : S1(read);
5820   INS01  : ISS;
5821   NEON_FP : S3;
5822 %}
5823 
5824 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
5825 %{
5826   single_instruction;
5827   dst    : S3(write);
5828   src    : S1(read);
5829   INS01  : ISS;
5830   NEON_FP : S3;
5831 %}
5832 
5833 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
5834 %{
5835   single_instruction;
5836   dst    : S3(write);
5837   src    : S1(read);
5838   INS01  : ISS;
5839   NEON_FP : S3;
5840 %}
5841 
5842 pipe_class vmovi_reg_imm64(vecD dst)
5843 %{
5844   single_instruction;
5845   dst    : S3(write);
5846   INS01  : ISS;
5847   NEON_FP : S3;
5848 %}
5849 
5850 pipe_class vmovi_reg_imm128(vecX dst)
5851 %{
5852   single_instruction;
5853   dst    : S3(write);
5854   INS0   : ISS;
5855   NEON_FP : S3;
5856 %}
5857 
5858 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
5859 %{
5860   single_instruction;
5861   dst    : S5(write);
5862   mem    : ISS(read);
5863   INS01  : ISS;
5864   NEON_FP : S3;
5865 %}
5866 
5867 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
5868 %{
5869   single_instruction;
5870   dst    : S5(write);
5871   mem    : ISS(read);
5872   INS01  : ISS;
5873   NEON_FP : S3;
5874 %}
5875 
5876 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
5877 %{
5878   single_instruction;
5879   mem    : ISS(read);
5880   src    : S2(read);
5881   INS01  : ISS;
5882   NEON_FP : S3;
5883 %}
5884 
5885 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
5886 %{
5887   single_instruction;
5888   mem    : ISS(read);
5889   src    : S2(read);
5890   INS01  : ISS;
5891   NEON_FP : S3;
5892 %}
5893 
5894 //------- Integer ALU operations --------------------------
5895 
5896 // Integer ALU reg-reg operation
5897 // Operands needed in EX1, result generated in EX2
5898 // Eg.  ADD     x0, x1, x2
5899 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
5900 %{
5901   single_instruction;
5902   dst    : EX2(write);
5903   src1   : EX1(read);
5904   src2   : EX1(read);
5905   INS01  : ISS; // Dual issue as instruction 0 or 1
5906   ALU    : EX2;
5907 %}
5908 
5909 // Integer ALU reg-reg operation with constant shift
5910 // Shifted register must be available in LATE_ISS instead of EX1
5911 // Eg.  ADD     x0, x1, x2, LSL #2
5912 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
5913 %{
5914   single_instruction;
5915   dst    : EX2(write);
5916   src1   : EX1(read);
5917   src2   : ISS(read);
5918   INS01  : ISS;
5919   ALU    : EX2;
5920 %}
5921 
5922 // Integer ALU reg operation with constant shift
5923 // Eg.  LSL     x0, x1, #shift
5924 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
5925 %{
5926   single_instruction;
5927   dst    : EX2(write);
5928   src1   : ISS(read);
5929   INS01  : ISS;
5930   ALU    : EX2;
5931 %}
5932 
5933 // Integer ALU reg-reg operation with variable shift
5934 // Both operands must be available in LATE_ISS instead of EX1
5935 // Result is available in EX1 instead of EX2
5936 // Eg.  LSLV    x0, x1, x2
5937 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
5938 %{
5939   single_instruction;
5940   dst    : EX1(write);
5941   src1   : ISS(read);
5942   src2   : ISS(read);
5943   INS01  : ISS;
5944   ALU    : EX1;
5945 %}
5946 
5947 // Integer ALU reg-reg operation with extract
5948 // As for _vshift above, but result generated in EX2
5949 // Eg.  EXTR    x0, x1, x2, #N
5950 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
5951 %{
5952   single_instruction;
5953   dst    : EX2(write);
5954   src1   : ISS(read);
5955   src2   : ISS(read);
5956   INS1   : ISS; // Can only dual issue as Instruction 1
5957   ALU    : EX1;
5958 %}
5959 
5960 // Integer ALU reg operation
5961 // Eg.  NEG     x0, x1
5962 pipe_class ialu_reg(iRegI dst, iRegI src)
5963 %{
5964   single_instruction;
5965   dst    : EX2(write);
5966   src    : EX1(read);
5967   INS01  : ISS;
5968   ALU    : EX2;
5969 %}
5970 
5971 // Integer ALU reg mmediate operation
5972 // Eg.  ADD     x0, x1, #N
5973 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
5974 %{
5975   single_instruction;
5976   dst    : EX2(write);
5977   src1   : EX1(read);
5978   INS01  : ISS;
5979   ALU    : EX2;
5980 %}
5981 
5982 // Integer ALU immediate operation (no source operands)
5983 // Eg.  MOV     x0, #N
5984 pipe_class ialu_imm(iRegI dst)
5985 %{
5986   single_instruction;
5987   dst    : EX1(write);
5988   INS01  : ISS;
5989   ALU    : EX1;
5990 %}
5991 
5992 //------- Compare operation -------------------------------
5993 
5994 // Compare reg-reg
5995 // Eg.  CMP     x0, x1
5996 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
5997 %{
5998   single_instruction;
5999 //  fixed_latency(16);
6000   cr     : EX2(write);
6001   op1    : EX1(read);
6002   op2    : EX1(read);
6003   INS01  : ISS;
6004   ALU    : EX2;
6005 %}
6006 
6007 // Compare reg-reg
6008 // Eg.  CMP     x0, #N
6009 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6010 %{
6011   single_instruction;
6012 //  fixed_latency(16);
6013   cr     : EX2(write);
6014   op1    : EX1(read);
6015   INS01  : ISS;
6016   ALU    : EX2;
6017 %}
6018 
6019 //------- Conditional instructions ------------------------
6020 
6021 // Conditional no operands
6022 // Eg.  CSINC   x0, zr, zr, <cond>
6023 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6024 %{
6025   single_instruction;
6026   cr     : EX1(read);
6027   dst    : EX2(write);
6028   INS01  : ISS;
6029   ALU    : EX2;
6030 %}
6031 
6032 // Conditional 2 operand
6033 // EG.  CSEL    X0, X1, X2, <cond>
6034 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6035 %{
6036   single_instruction;
6037   cr     : EX1(read);
6038   src1   : EX1(read);
6039   src2   : EX1(read);
6040   dst    : EX2(write);
6041   INS01  : ISS;
6042   ALU    : EX2;
6043 %}
6044 
6045 // Conditional 2 operand
6046 // EG.  CSEL    X0, X1, X2, <cond>
6047 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6048 %{
6049   single_instruction;
6050   cr     : EX1(read);
6051   src    : EX1(read);
6052   dst    : EX2(write);
6053   INS01  : ISS;
6054   ALU    : EX2;
6055 %}
6056 
6057 //------- Multiply pipeline operations --------------------
6058 
6059 // Multiply reg-reg
6060 // Eg.  MUL     w0, w1, w2
6061 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6062 %{
6063   single_instruction;
6064   dst    : WR(write);
6065   src1   : ISS(read);
6066   src2   : ISS(read);
6067   INS01  : ISS;
6068   MAC    : WR;
6069 %}
6070 
6071 // Multiply accumulate
6072 // Eg.  MADD    w0, w1, w2, w3
6073 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6074 %{
6075   single_instruction;
6076   dst    : WR(write);
6077   src1   : ISS(read);
6078   src2   : ISS(read);
6079   src3   : ISS(read);
6080   INS01  : ISS;
6081   MAC    : WR;
6082 %}
6083 
6084 // Eg.  MUL     w0, w1, w2
6085 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6086 %{
6087   single_instruction;
6088   fixed_latency(3); // Maximum latency for 64 bit mul
6089   dst    : WR(write);
6090   src1   : ISS(read);
6091   src2   : ISS(read);
6092   INS01  : ISS;
6093   MAC    : WR;
6094 %}
6095 
6096 // Multiply accumulate
6097 // Eg.  MADD    w0, w1, w2, w3
6098 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6099 %{
6100   single_instruction;
6101   fixed_latency(3); // Maximum latency for 64 bit mul
6102   dst    : WR(write);
6103   src1   : ISS(read);
6104   src2   : ISS(read);
6105   src3   : ISS(read);
6106   INS01  : ISS;
6107   MAC    : WR;
6108 %}
6109 
6110 //------- Divide pipeline operations --------------------
6111 
6112 // Eg.  SDIV    w0, w1, w2
6113 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6114 %{
6115   single_instruction;
6116   fixed_latency(8); // Maximum latency for 32 bit divide
6117   dst    : WR(write);
6118   src1   : ISS(read);
6119   src2   : ISS(read);
6120   INS0   : ISS; // Can only dual issue as instruction 0
6121   DIV    : WR;
6122 %}
6123 
6124 // Eg.  SDIV    x0, x1, x2
6125 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6126 %{
6127   single_instruction;
6128   fixed_latency(16); // Maximum latency for 64 bit divide
6129   dst    : WR(write);
6130   src1   : ISS(read);
6131   src2   : ISS(read);
6132   INS0   : ISS; // Can only dual issue as instruction 0
6133   DIV    : WR;
6134 %}
6135 
6136 //------- Load pipeline operations ------------------------
6137 
6138 // Load - prefetch
6139 // Eg.  PFRM    <mem>
6140 pipe_class iload_prefetch(memory mem)
6141 %{
6142   single_instruction;
6143   mem    : ISS(read);
6144   INS01  : ISS;
6145   LDST   : WR;
6146 %}
6147 
6148 // Load - reg, mem
6149 // Eg.  LDR     x0, <mem>
6150 pipe_class iload_reg_mem(iRegI dst, memory mem)
6151 %{
6152   single_instruction;
6153   dst    : WR(write);
6154   mem    : ISS(read);
6155   INS01  : ISS;
6156   LDST   : WR;
6157 %}
6158 
6159 // Load - reg, reg
6160 // Eg.  LDR     x0, [sp, x1]
6161 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6162 %{
6163   single_instruction;
6164   dst    : WR(write);
6165   src    : ISS(read);
6166   INS01  : ISS;
6167   LDST   : WR;
6168 %}
6169 
6170 //------- Store pipeline operations -----------------------
6171 
6172 // Store - zr, mem
6173 // Eg.  STR     zr, <mem>
6174 pipe_class istore_mem(memory mem)
6175 %{
6176   single_instruction;
6177   mem    : ISS(read);
6178   INS01  : ISS;
6179   LDST   : WR;
6180 %}
6181 
6182 // Store - reg, mem
6183 // Eg.  STR     x0, <mem>
6184 pipe_class istore_reg_mem(iRegI src, memory mem)
6185 %{
6186   single_instruction;
6187   mem    : ISS(read);
6188   src    : EX2(read);
6189   INS01  : ISS;
6190   LDST   : WR;
6191 %}
6192 
6193 // Store - reg, reg
6194 // Eg. STR      x0, [sp, x1]
6195 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6196 %{
6197   single_instruction;
6198   dst    : ISS(read);
6199   src    : EX2(read);
6200   INS01  : ISS;
6201   LDST   : WR;
6202 %}
6203 
6204 //------- Store pipeline operations -----------------------
6205 
6206 // Branch
6207 pipe_class pipe_branch()
6208 %{
6209   single_instruction;
6210   INS01  : ISS;
6211   BRANCH : EX1;
6212 %}
6213 
6214 // Conditional branch
6215 pipe_class pipe_branch_cond(rFlagsReg cr)
6216 %{
6217   single_instruction;
6218   cr     : EX1(read);
6219   INS01  : ISS;
6220   BRANCH : EX1;
6221 %}
6222 
6223 // Compare & Branch
6224 // EG.  CBZ/CBNZ
6225 pipe_class pipe_cmp_branch(iRegI op1)
6226 %{
6227   single_instruction;
6228   op1    : EX1(read);
6229   INS01  : ISS;
6230   BRANCH : EX1;
6231 %}
6232 
6233 //------- Synchronisation operations ----------------------
6234 
6235 // Any operation requiring serialization.
6236 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6237 pipe_class pipe_serial()
6238 %{
6239   single_instruction;
6240   force_serialization;
6241   fixed_latency(16);
6242   INS01  : ISS(2); // Cannot dual issue with any other instruction
6243   LDST   : WR;
6244 %}
6245 
6246 // Generic big/slow expanded idiom - also serialized
6247 pipe_class pipe_slow()
6248 %{
6249   instruction_count(10);
6250   multiple_bundles;
6251   force_serialization;
6252   fixed_latency(16);
6253   INS01  : ISS(2); // Cannot dual issue with any other instruction
6254   LDST   : WR;
6255 %}
6256 
6257 // Empty pipeline class
6258 pipe_class pipe_class_empty()
6259 %{
6260   single_instruction;
6261   fixed_latency(0);
6262 %}
6263 
6264 // Default pipeline class.
6265 pipe_class pipe_class_default()
6266 %{
6267   single_instruction;
6268   fixed_latency(2);
6269 %}
6270 
6271 // Pipeline class for compares.
6272 pipe_class pipe_class_compare()
6273 %{
6274   single_instruction;
6275   fixed_latency(16);
6276 %}
6277 
6278 // Pipeline class for memory operations.
6279 pipe_class pipe_class_memory()
6280 %{
6281   single_instruction;
6282   fixed_latency(16);
6283 %}
6284 
6285 // Pipeline class for call.
6286 pipe_class pipe_class_call()
6287 %{
6288   single_instruction;
6289   fixed_latency(100);
6290 %}
6291 
6292 // Define the class for the Nop node.
6293 define %{
6294    MachNop = pipe_class_empty;
6295 %}
6296 
6297 %}
6298 //----------INSTRUCTIONS-------------------------------------------------------
6299 //
6300 // match      -- States which machine-independent subtree may be replaced
6301 //               by this instruction.
6302 // ins_cost   -- The estimated cost of this instruction is used by instruction
6303 //               selection to identify a minimum cost tree of machine
6304 //               instructions that matches a tree of machine-independent
6305 //               instructions.
6306 // format     -- A string providing the disassembly for this instruction.
6307 //               The value of an instruction's operand may be inserted
6308 //               by referring to it with a '$' prefix.
6309 // opcode     -- Three instruction opcodes may be provided.  These are referred
6310 //               to within an encode class as $primary, $secondary, and $tertiary
6311 //               rrspectively.  The primary opcode is commonly used to
6312 //               indicate the type of machine instruction, while secondary
6313 //               and tertiary are often used for prefix options or addressing
6314 //               modes.
6315 // ins_encode -- A list of encode classes with parameters. The encode class
6316 //               name must have been defined in an 'enc_class' specification
6317 //               in the encode section of the architecture description.
6318 
6319 // ============================================================================
6320 // Memory (Load/Store) Instructions
6321 
6322 // Load Instructions
6323 
6324 // Load Byte (8 bit signed)
6325 instruct loadB(iRegINoSp dst, memory mem)
6326 %{
6327   match(Set dst (LoadB mem));
6328   predicate(!needs_acquiring_load(n));
6329 
6330   ins_cost(4 * INSN_COST);
6331   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6332 
6333   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6334 
6335   ins_pipe(iload_reg_mem);
6336 %}
6337 
6338 // Load Byte (8 bit signed) into long
6339 instruct loadB2L(iRegLNoSp dst, memory mem)
6340 %{
6341   match(Set dst (ConvI2L (LoadB mem)));
6342   predicate(!needs_acquiring_load(n->in(1)));
6343 
6344   ins_cost(4 * INSN_COST);
6345   format %{ "ldrsb  $dst, $mem\t# byte" %}
6346 
6347   ins_encode(aarch64_enc_ldrsb(dst, mem));
6348 
6349   ins_pipe(iload_reg_mem);
6350 %}
6351 
6352 // Load Byte (8 bit unsigned)
6353 instruct loadUB(iRegINoSp dst, memory mem)
6354 %{
6355   match(Set dst (LoadUB mem));
6356   predicate(!needs_acquiring_load(n));
6357 
6358   ins_cost(4 * INSN_COST);
6359   format %{ "ldrbw  $dst, $mem\t# byte" %}
6360 
6361   ins_encode(aarch64_enc_ldrb(dst, mem));
6362 
6363   ins_pipe(iload_reg_mem);
6364 %}
6365 
6366 // Load Byte (8 bit unsigned) into long
6367 instruct loadUB2L(iRegLNoSp dst, memory mem)
6368 %{
6369   match(Set dst (ConvI2L (LoadUB mem)));
6370   predicate(!needs_acquiring_load(n->in(1)));
6371 
6372   ins_cost(4 * INSN_COST);
6373   format %{ "ldrb  $dst, $mem\t# byte" %}
6374 
6375   ins_encode(aarch64_enc_ldrb(dst, mem));
6376 
6377   ins_pipe(iload_reg_mem);
6378 %}
6379 
6380 // Load Short (16 bit signed)
6381 instruct loadS(iRegINoSp dst, memory mem)
6382 %{
6383   match(Set dst (LoadS mem));
6384   predicate(!needs_acquiring_load(n));
6385 
6386   ins_cost(4 * INSN_COST);
6387   format %{ "ldrshw  $dst, $mem\t# short" %}
6388 
6389   ins_encode(aarch64_enc_ldrshw(dst, mem));
6390 
6391   ins_pipe(iload_reg_mem);
6392 %}
6393 
6394 // Load Short (16 bit signed) into long
6395 instruct loadS2L(iRegLNoSp dst, memory mem)
6396 %{
6397   match(Set dst (ConvI2L (LoadS mem)));
6398   predicate(!needs_acquiring_load(n->in(1)));
6399 
6400   ins_cost(4 * INSN_COST);
6401   format %{ "ldrsh  $dst, $mem\t# short" %}
6402 
6403   ins_encode(aarch64_enc_ldrsh(dst, mem));
6404 
6405   ins_pipe(iload_reg_mem);
6406 %}
6407 
6408 // Load Char (16 bit unsigned)
6409 instruct loadUS(iRegINoSp dst, memory mem)
6410 %{
6411   match(Set dst (LoadUS mem));
6412   predicate(!needs_acquiring_load(n));
6413 
6414   ins_cost(4 * INSN_COST);
6415   format %{ "ldrh  $dst, $mem\t# short" %}
6416 
6417   ins_encode(aarch64_enc_ldrh(dst, mem));
6418 
6419   ins_pipe(iload_reg_mem);
6420 %}
6421 
6422 // Load Short/Char (16 bit unsigned) into long
6423 instruct loadUS2L(iRegLNoSp dst, memory mem)
6424 %{
6425   match(Set dst (ConvI2L (LoadUS mem)));
6426   predicate(!needs_acquiring_load(n->in(1)));
6427 
6428   ins_cost(4 * INSN_COST);
6429   format %{ "ldrh  $dst, $mem\t# short" %}
6430 
6431   ins_encode(aarch64_enc_ldrh(dst, mem));
6432 
6433   ins_pipe(iload_reg_mem);
6434 %}
6435 
6436 // Load Integer (32 bit signed)
6437 instruct loadI(iRegINoSp dst, memory mem)
6438 %{
6439   match(Set dst (LoadI mem));
6440   predicate(!needs_acquiring_load(n));
6441 
6442   ins_cost(4 * INSN_COST);
6443   format %{ "ldrw  $dst, $mem\t# int" %}
6444 
6445   ins_encode(aarch64_enc_ldrw(dst, mem));
6446 
6447   ins_pipe(iload_reg_mem);
6448 %}
6449 
6450 // Load Integer (32 bit signed) into long
6451 instruct loadI2L(iRegLNoSp dst, memory mem)
6452 %{
6453   match(Set dst (ConvI2L (LoadI mem)));
6454   predicate(!needs_acquiring_load(n->in(1)));
6455 
6456   ins_cost(4 * INSN_COST);
6457   format %{ "ldrsw  $dst, $mem\t# int" %}
6458 
6459   ins_encode(aarch64_enc_ldrsw(dst, mem));
6460 
6461   ins_pipe(iload_reg_mem);
6462 %}
6463 
6464 // Load Integer (32 bit unsigned) into long
6465 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6466 %{
6467   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6468   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6469 
6470   ins_cost(4 * INSN_COST);
6471   format %{ "ldrw  $dst, $mem\t# int" %}
6472 
6473   ins_encode(aarch64_enc_ldrw(dst, mem));
6474 
6475   ins_pipe(iload_reg_mem);
6476 %}
6477 
6478 // Load Long (64 bit signed)
6479 instruct loadL(iRegLNoSp dst, memory mem)
6480 %{
6481   match(Set dst (LoadL mem));
6482   predicate(!needs_acquiring_load(n));
6483 
6484   ins_cost(4 * INSN_COST);
6485   format %{ "ldr  $dst, $mem\t# int" %}
6486 
6487   ins_encode(aarch64_enc_ldr(dst, mem));
6488 
6489   ins_pipe(iload_reg_mem);
6490 %}
6491 
6492 // Load Range
6493 instruct loadRange(iRegINoSp dst, memory mem)
6494 %{
6495   match(Set dst (LoadRange mem));
6496 
6497   ins_cost(4 * INSN_COST);
6498   format %{ "ldrw  $dst, $mem\t# range" %}
6499 
6500   ins_encode(aarch64_enc_ldrw(dst, mem));
6501 
6502   ins_pipe(iload_reg_mem);
6503 %}
6504 
6505 // Load Pointer
6506 instruct loadP(iRegPNoSp dst, memory mem)
6507 %{
6508   match(Set dst (LoadP mem));
6509   predicate(!needs_acquiring_load(n));
6510 
6511   ins_cost(4 * INSN_COST);
6512   format %{ "ldr  $dst, $mem\t# ptr" %}
6513 
6514   ins_encode(aarch64_enc_ldr(dst, mem));
6515 
6516   ins_pipe(iload_reg_mem);
6517 %}
6518 
6519 // Load Compressed Pointer
6520 instruct loadN(iRegNNoSp dst, memory mem)
6521 %{
6522   match(Set dst (LoadN mem));
6523   predicate(!needs_acquiring_load(n));
6524 
6525   ins_cost(4 * INSN_COST);
6526   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6527 
6528   ins_encode(aarch64_enc_ldrw(dst, mem));
6529 
6530   ins_pipe(iload_reg_mem);
6531 %}
6532 
6533 // Load Klass Pointer
6534 instruct loadKlass(iRegPNoSp dst, memory mem)
6535 %{
6536   match(Set dst (LoadKlass mem));
6537   predicate(!needs_acquiring_load(n));
6538 
6539   ins_cost(4 * INSN_COST);
6540   format %{ "ldr  $dst, $mem\t# class" %}
6541 
6542   ins_encode(aarch64_enc_ldr(dst, mem));
6543 
6544   ins_pipe(iload_reg_mem);
6545 %}
6546 
6547 // Load Narrow Klass Pointer
6548 instruct loadNKlass(iRegNNoSp dst, memory mem)
6549 %{
6550   match(Set dst (LoadNKlass mem));
6551   predicate(!needs_acquiring_load(n));
6552 
6553   ins_cost(4 * INSN_COST);
6554   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6555 
6556   ins_encode(aarch64_enc_ldrw(dst, mem));
6557 
6558   ins_pipe(iload_reg_mem);
6559 %}
6560 
6561 // Load Float
6562 instruct loadF(vRegF dst, memory mem)
6563 %{
6564   match(Set dst (LoadF mem));
6565   predicate(!needs_acquiring_load(n));
6566 
6567   ins_cost(4 * INSN_COST);
6568   format %{ "ldrs  $dst, $mem\t# float" %}
6569 
6570   ins_encode( aarch64_enc_ldrs(dst, mem) );
6571 
6572   ins_pipe(pipe_class_memory);
6573 %}
6574 
6575 // Load Double
6576 instruct loadD(vRegD dst, memory mem)
6577 %{
6578   match(Set dst (LoadD mem));
6579   predicate(!needs_acquiring_load(n));
6580 
6581   ins_cost(4 * INSN_COST);
6582   format %{ "ldrd  $dst, $mem\t# double" %}
6583 
6584   ins_encode( aarch64_enc_ldrd(dst, mem) );
6585 
6586   ins_pipe(pipe_class_memory);
6587 %}
6588 
6589 
6590 // Load Int Constant
6591 instruct loadConI(iRegINoSp dst, immI src)
6592 %{
6593   match(Set dst src);
6594 
6595   ins_cost(INSN_COST);
6596   format %{ "mov $dst, $src\t# int" %}
6597 
6598   ins_encode( aarch64_enc_movw_imm(dst, src) );
6599 
6600   ins_pipe(ialu_imm);
6601 %}
6602 
6603 // Load Long Constant
6604 instruct loadConL(iRegLNoSp dst, immL src)
6605 %{
6606   match(Set dst src);
6607 
6608   ins_cost(INSN_COST);
6609   format %{ "mov $dst, $src\t# long" %}
6610 
6611   ins_encode( aarch64_enc_mov_imm(dst, src) );
6612 
6613   ins_pipe(ialu_imm);
6614 %}
6615 
6616 // Load Pointer Constant
6617 
6618 instruct loadConP(iRegPNoSp dst, immP con)
6619 %{
6620   match(Set dst con);
6621 
6622   ins_cost(INSN_COST * 4);
6623   format %{
6624     "mov  $dst, $con\t# ptr\n\t"
6625   %}
6626 
6627   ins_encode(aarch64_enc_mov_p(dst, con));
6628 
6629   ins_pipe(ialu_imm);
6630 %}
6631 
6632 // Load Null Pointer Constant
6633 
6634 instruct loadConP0(iRegPNoSp dst, immP0 con)
6635 %{
6636   match(Set dst con);
6637 
6638   ins_cost(INSN_COST);
6639   format %{ "mov  $dst, $con\t# NULL ptr" %}
6640 
6641   ins_encode(aarch64_enc_mov_p0(dst, con));
6642 
6643   ins_pipe(ialu_imm);
6644 %}
6645 
6646 // Load Pointer Constant One
6647 
6648 instruct loadConP1(iRegPNoSp dst, immP_1 con)
6649 %{
6650   match(Set dst con);
6651 
6652   ins_cost(INSN_COST);
6653   format %{ "mov  $dst, $con\t# NULL ptr" %}
6654 
6655   ins_encode(aarch64_enc_mov_p1(dst, con));
6656 
6657   ins_pipe(ialu_imm);
6658 %}
6659 
6660 // Load Poll Page Constant
6661 
6662 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
6663 %{
6664   match(Set dst con);
6665 
6666   ins_cost(INSN_COST);
6667   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
6668 
6669   ins_encode(aarch64_enc_mov_poll_page(dst, con));
6670 
6671   ins_pipe(ialu_imm);
6672 %}
6673 
6674 // Load Byte Map Base Constant
6675 
6676 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
6677 %{
6678   match(Set dst con);
6679 
6680   ins_cost(INSN_COST);
6681   format %{ "adr  $dst, $con\t# Byte Map Base" %}
6682 
6683   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
6684 
6685   ins_pipe(ialu_imm);
6686 %}
6687 
6688 // Load Narrow Pointer Constant
6689 
6690 instruct loadConN(iRegNNoSp dst, immN con)
6691 %{
6692   match(Set dst con);
6693 
6694   ins_cost(INSN_COST * 4);
6695   format %{ "mov  $dst, $con\t# compressed ptr" %}
6696 
6697   ins_encode(aarch64_enc_mov_n(dst, con));
6698 
6699   ins_pipe(ialu_imm);
6700 %}
6701 
6702 // Load Narrow Null Pointer Constant
6703 
6704 instruct loadConN0(iRegNNoSp dst, immN0 con)
6705 %{
6706   match(Set dst con);
6707 
6708   ins_cost(INSN_COST);
6709   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
6710 
6711   ins_encode(aarch64_enc_mov_n0(dst, con));
6712 
6713   ins_pipe(ialu_imm);
6714 %}
6715 
6716 // Load Narrow Klass Constant
6717 
6718 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
6719 %{
6720   match(Set dst con);
6721 
6722   ins_cost(INSN_COST);
6723   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
6724 
6725   ins_encode(aarch64_enc_mov_nk(dst, con));
6726 
6727   ins_pipe(ialu_imm);
6728 %}
6729 
6730 // Load Packed Float Constant
6731 
6732 instruct loadConF_packed(vRegF dst, immFPacked con) %{
6733   match(Set dst con);
6734   ins_cost(INSN_COST * 4);
6735   format %{ "fmovs  $dst, $con"%}
6736   ins_encode %{
6737     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
6738   %}
6739 
6740   ins_pipe(fp_imm_s);
6741 %}
6742 
6743 // Load Float Constant
6744 
6745 instruct loadConF(vRegF dst, immF con) %{
6746   match(Set dst con);
6747 
6748   ins_cost(INSN_COST * 4);
6749 
6750   format %{
6751     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6752   %}
6753 
6754   ins_encode %{
6755     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6756   %}
6757 
6758   ins_pipe(fp_load_constant_s);
6759 %}
6760 
6761 // Load Packed Double Constant
6762 
6763 instruct loadConD_packed(vRegD dst, immDPacked con) %{
6764   match(Set dst con);
6765   ins_cost(INSN_COST);
6766   format %{ "fmovd  $dst, $con"%}
6767   ins_encode %{
6768     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
6769   %}
6770 
6771   ins_pipe(fp_imm_d);
6772 %}
6773 
6774 // Load Double Constant
6775 
6776 instruct loadConD(vRegD dst, immD con) %{
6777   match(Set dst con);
6778 
6779   ins_cost(INSN_COST * 5);
6780   format %{
6781     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6782   %}
6783 
6784   ins_encode %{
6785     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
6786   %}
6787 
6788   ins_pipe(fp_load_constant_d);
6789 %}
6790 
6791 // Store Instructions
6792 
6793 // Store CMS card-mark Immediate
6794 instruct storeimmCM0(immI0 zero, memory mem)
6795 %{
6796   match(Set mem (StoreCM mem zero));
6797   predicate(unnecessary_storestore(n));
6798 
6799   ins_cost(INSN_COST);
6800   format %{ "strb zr, $mem\t# byte" %}
6801 
6802   ins_encode(aarch64_enc_strb0(mem));
6803 
6804   ins_pipe(istore_mem);
6805 %}
6806 
6807 // Store CMS card-mark Immediate with intervening StoreStore
6808 // needed when using CMS with no conditional card marking
6809 instruct storeimmCM0_ordered(immI0 zero, memory mem)
6810 %{
6811   match(Set mem (StoreCM mem zero));
6812 
6813   ins_cost(INSN_COST * 2);
6814   format %{ "dmb ishst"
6815       "\n\tstrb zr, $mem\t# byte" %}
6816 
6817   ins_encode(aarch64_enc_strb0_ordered(mem));
6818 
6819   ins_pipe(istore_mem);
6820 %}
6821 
6822 // Store Byte
6823 instruct storeB(iRegIorL2I src, memory mem)
6824 %{
6825   match(Set mem (StoreB mem src));
6826   predicate(!needs_releasing_store(n));
6827 
6828   ins_cost(INSN_COST);
6829   format %{ "strb  $src, $mem\t# byte" %}
6830 
6831   ins_encode(aarch64_enc_strb(src, mem));
6832 
6833   ins_pipe(istore_reg_mem);
6834 %}
6835 
6836 
6837 instruct storeimmB0(immI0 zero, memory mem)
6838 %{
6839   match(Set mem (StoreB mem zero));
6840   predicate(!needs_releasing_store(n));
6841 
6842   ins_cost(INSN_COST);
6843   format %{ "strb zr, $mem\t# byte" %}
6844 
6845   ins_encode(aarch64_enc_strb0(mem));
6846 
6847   ins_pipe(istore_mem);
6848 %}
6849 
6850 // Store Char/Short
6851 instruct storeC(iRegIorL2I src, memory mem)
6852 %{
6853   match(Set mem (StoreC mem src));
6854   predicate(!needs_releasing_store(n));
6855 
6856   ins_cost(INSN_COST);
6857   format %{ "strh  $src, $mem\t# short" %}
6858 
6859   ins_encode(aarch64_enc_strh(src, mem));
6860 
6861   ins_pipe(istore_reg_mem);
6862 %}
6863 
6864 instruct storeimmC0(immI0 zero, memory mem)
6865 %{
6866   match(Set mem (StoreC mem zero));
6867   predicate(!needs_releasing_store(n));
6868 
6869   ins_cost(INSN_COST);
6870   format %{ "strh  zr, $mem\t# short" %}
6871 
6872   ins_encode(aarch64_enc_strh0(mem));
6873 
6874   ins_pipe(istore_mem);
6875 %}
6876 
6877 // Store Integer
6878 
6879 instruct storeI(iRegIorL2I src, memory mem)
6880 %{
6881   match(Set mem(StoreI mem src));
6882   predicate(!needs_releasing_store(n));
6883 
6884   ins_cost(INSN_COST);
6885   format %{ "strw  $src, $mem\t# int" %}
6886 
6887   ins_encode(aarch64_enc_strw(src, mem));
6888 
6889   ins_pipe(istore_reg_mem);
6890 %}
6891 
6892 instruct storeimmI0(immI0 zero, memory mem)
6893 %{
6894   match(Set mem(StoreI mem zero));
6895   predicate(!needs_releasing_store(n));
6896 
6897   ins_cost(INSN_COST);
6898   format %{ "strw  zr, $mem\t# int" %}
6899 
6900   ins_encode(aarch64_enc_strw0(mem));
6901 
6902   ins_pipe(istore_mem);
6903 %}
6904 
6905 // Store Long (64 bit signed)
6906 instruct storeL(iRegL src, memory mem)
6907 %{
6908   match(Set mem (StoreL mem src));
6909   predicate(!needs_releasing_store(n));
6910 
6911   ins_cost(INSN_COST);
6912   format %{ "str  $src, $mem\t# int" %}
6913 
6914   ins_encode(aarch64_enc_str(src, mem));
6915 
6916   ins_pipe(istore_reg_mem);
6917 %}
6918 
6919 // Store Long (64 bit signed)
6920 instruct storeimmL0(immL0 zero, memory mem)
6921 %{
6922   match(Set mem (StoreL mem zero));
6923   predicate(!needs_releasing_store(n));
6924 
6925   ins_cost(INSN_COST);
6926   format %{ "str  zr, $mem\t# int" %}
6927 
6928   ins_encode(aarch64_enc_str0(mem));
6929 
6930   ins_pipe(istore_mem);
6931 %}
6932 
6933 // Store Pointer
6934 instruct storeP(iRegP src, memory mem)
6935 %{
6936   match(Set mem (StoreP mem src));
6937   predicate(!needs_releasing_store(n));
6938 
6939   ins_cost(INSN_COST);
6940   format %{ "str  $src, $mem\t# ptr" %}
6941 
6942   ins_encode(aarch64_enc_str(src, mem));
6943 
6944   ins_pipe(istore_reg_mem);
6945 %}
6946 
6947 // Store Pointer
6948 instruct storeimmP0(immP0 zero, memory mem)
6949 %{
6950   match(Set mem (StoreP mem zero));
6951   predicate(!needs_releasing_store(n));
6952 
6953   ins_cost(INSN_COST);
6954   format %{ "str zr, $mem\t# ptr" %}
6955 
6956   ins_encode(aarch64_enc_str0(mem));
6957 
6958   ins_pipe(istore_mem);
6959 %}
6960 
6961 // Store Compressed Pointer
6962 instruct storeN(iRegN src, memory mem)
6963 %{
6964   match(Set mem (StoreN mem src));
6965   predicate(!needs_releasing_store(n));
6966 
6967   ins_cost(INSN_COST);
6968   format %{ "strw  $src, $mem\t# compressed ptr" %}
6969 
6970   ins_encode(aarch64_enc_strw(src, mem));
6971 
6972   ins_pipe(istore_reg_mem);
6973 %}
6974 
6975 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
6976 %{
6977   match(Set mem (StoreN mem zero));
6978   predicate(Universe::narrow_oop_base() == NULL &&
6979             Universe::narrow_klass_base() == NULL  &&
6980             (!needs_releasing_store(n)));
6981 
6982   ins_cost(INSN_COST);
6983   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
6984 
6985   ins_encode(aarch64_enc_strw(heapbase, mem));
6986 
6987   ins_pipe(istore_reg_mem);
6988 %}
6989 
6990 // Store Float
6991 instruct storeF(vRegF src, memory mem)
6992 %{
6993   match(Set mem (StoreF mem src));
6994   predicate(!needs_releasing_store(n));
6995 
6996   ins_cost(INSN_COST);
6997   format %{ "strs  $src, $mem\t# float" %}
6998 
6999   ins_encode( aarch64_enc_strs(src, mem) );
7000 
7001   ins_pipe(pipe_class_memory);
7002 %}
7003 
7004 // TODO
7005 // implement storeImmF0 and storeFImmPacked
7006 
7007 // Store Double
7008 instruct storeD(vRegD src, memory mem)
7009 %{
7010   match(Set mem (StoreD mem src));
7011   predicate(!needs_releasing_store(n));
7012 
7013   ins_cost(INSN_COST);
7014   format %{ "strd  $src, $mem\t# double" %}
7015 
7016   ins_encode( aarch64_enc_strd(src, mem) );
7017 
7018   ins_pipe(pipe_class_memory);
7019 %}
7020 
7021 // Store Compressed Klass Pointer
7022 instruct storeNKlass(iRegN src, memory mem)
7023 %{
7024   predicate(!needs_releasing_store(n));
7025   match(Set mem (StoreNKlass mem src));
7026 
7027   ins_cost(INSN_COST);
7028   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7029 
7030   ins_encode(aarch64_enc_strw(src, mem));
7031 
7032   ins_pipe(istore_reg_mem);
7033 %}
7034 
7035 // TODO
7036 // implement storeImmD0 and storeDImmPacked
7037 
7038 // prefetch instructions
7039 // Must be safe to execute with invalid address (cannot fault).
7040 
7041 instruct prefetchr( memory mem ) %{
7042   match(PrefetchRead mem);
7043 
7044   ins_cost(INSN_COST);
7045   format %{ "prfm $mem, PLDL1KEEP\t# Prefetch into level 1 cache read keep" %}
7046 
7047   ins_encode( aarch64_enc_prefetchr(mem) );
7048 
7049   ins_pipe(iload_prefetch);
7050 %}
7051 
7052 instruct prefetchw( memory mem ) %{
7053   match(PrefetchAllocation mem);
7054 
7055   ins_cost(INSN_COST);
7056   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7057 
7058   ins_encode( aarch64_enc_prefetchw(mem) );
7059 
7060   ins_pipe(iload_prefetch);
7061 %}
7062 
7063 instruct prefetchnta( memory mem ) %{
7064   match(PrefetchWrite mem);
7065 
7066   ins_cost(INSN_COST);
7067   format %{ "prfm $mem, PSTL1STRM\t# Prefetch into level 1 cache write streaming" %}
7068 
7069   ins_encode( aarch64_enc_prefetchnta(mem) );
7070 
7071   ins_pipe(iload_prefetch);
7072 %}
7073 
7074 //  ---------------- volatile loads and stores ----------------
7075 
7076 // Load Byte (8 bit signed)
7077 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7078 %{
7079   match(Set dst (LoadB mem));
7080 
7081   ins_cost(VOLATILE_REF_COST);
7082   format %{ "ldarsb  $dst, $mem\t# byte" %}
7083 
7084   ins_encode(aarch64_enc_ldarsb(dst, mem));
7085 
7086   ins_pipe(pipe_serial);
7087 %}
7088 
7089 // Load Byte (8 bit signed) into long
7090 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7091 %{
7092   match(Set dst (ConvI2L (LoadB mem)));
7093 
7094   ins_cost(VOLATILE_REF_COST);
7095   format %{ "ldarsb  $dst, $mem\t# byte" %}
7096 
7097   ins_encode(aarch64_enc_ldarsb(dst, mem));
7098 
7099   ins_pipe(pipe_serial);
7100 %}
7101 
7102 // Load Byte (8 bit unsigned)
7103 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7104 %{
7105   match(Set dst (LoadUB mem));
7106 
7107   ins_cost(VOLATILE_REF_COST);
7108   format %{ "ldarb  $dst, $mem\t# byte" %}
7109 
7110   ins_encode(aarch64_enc_ldarb(dst, mem));
7111 
7112   ins_pipe(pipe_serial);
7113 %}
7114 
7115 // Load Byte (8 bit unsigned) into long
7116 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7117 %{
7118   match(Set dst (ConvI2L (LoadUB mem)));
7119 
7120   ins_cost(VOLATILE_REF_COST);
7121   format %{ "ldarb  $dst, $mem\t# byte" %}
7122 
7123   ins_encode(aarch64_enc_ldarb(dst, mem));
7124 
7125   ins_pipe(pipe_serial);
7126 %}
7127 
7128 // Load Short (16 bit signed)
7129 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7130 %{
7131   match(Set dst (LoadS mem));
7132 
7133   ins_cost(VOLATILE_REF_COST);
7134   format %{ "ldarshw  $dst, $mem\t# short" %}
7135 
7136   ins_encode(aarch64_enc_ldarshw(dst, mem));
7137 
7138   ins_pipe(pipe_serial);
7139 %}
7140 
7141 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7142 %{
7143   match(Set dst (LoadUS mem));
7144 
7145   ins_cost(VOLATILE_REF_COST);
7146   format %{ "ldarhw  $dst, $mem\t# short" %}
7147 
7148   ins_encode(aarch64_enc_ldarhw(dst, mem));
7149 
7150   ins_pipe(pipe_serial);
7151 %}
7152 
7153 // Load Short/Char (16 bit unsigned) into long
7154 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7155 %{
7156   match(Set dst (ConvI2L (LoadUS mem)));
7157 
7158   ins_cost(VOLATILE_REF_COST);
7159   format %{ "ldarh  $dst, $mem\t# short" %}
7160 
7161   ins_encode(aarch64_enc_ldarh(dst, mem));
7162 
7163   ins_pipe(pipe_serial);
7164 %}
7165 
7166 // Load Short/Char (16 bit signed) into long
7167 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7168 %{
7169   match(Set dst (ConvI2L (LoadS mem)));
7170 
7171   ins_cost(VOLATILE_REF_COST);
7172   format %{ "ldarh  $dst, $mem\t# short" %}
7173 
7174   ins_encode(aarch64_enc_ldarsh(dst, mem));
7175 
7176   ins_pipe(pipe_serial);
7177 %}
7178 
7179 // Load Integer (32 bit signed)
7180 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7181 %{
7182   match(Set dst (LoadI mem));
7183 
7184   ins_cost(VOLATILE_REF_COST);
7185   format %{ "ldarw  $dst, $mem\t# int" %}
7186 
7187   ins_encode(aarch64_enc_ldarw(dst, mem));
7188 
7189   ins_pipe(pipe_serial);
7190 %}
7191 
7192 // Load Integer (32 bit unsigned) into long
7193 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7194 %{
7195   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7196 
7197   ins_cost(VOLATILE_REF_COST);
7198   format %{ "ldarw  $dst, $mem\t# int" %}
7199 
7200   ins_encode(aarch64_enc_ldarw(dst, mem));
7201 
7202   ins_pipe(pipe_serial);
7203 %}
7204 
7205 // Load Long (64 bit signed)
7206 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7207 %{
7208   match(Set dst (LoadL mem));
7209 
7210   ins_cost(VOLATILE_REF_COST);
7211   format %{ "ldar  $dst, $mem\t# int" %}
7212 
7213   ins_encode(aarch64_enc_ldar(dst, mem));
7214 
7215   ins_pipe(pipe_serial);
7216 %}
7217 
7218 // Load Pointer
7219 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7220 %{
7221   match(Set dst (LoadP mem));
7222 
7223   ins_cost(VOLATILE_REF_COST);
7224   format %{ "ldar  $dst, $mem\t# ptr" %}
7225 
7226   ins_encode(aarch64_enc_ldar(dst, mem));
7227 
7228   ins_pipe(pipe_serial);
7229 %}
7230 
7231 // Load Compressed Pointer
7232 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7233 %{
7234   match(Set dst (LoadN mem));
7235 
7236   ins_cost(VOLATILE_REF_COST);
7237   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7238 
7239   ins_encode(aarch64_enc_ldarw(dst, mem));
7240 
7241   ins_pipe(pipe_serial);
7242 %}
7243 
7244 // Load Float
7245 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7246 %{
7247   match(Set dst (LoadF mem));
7248 
7249   ins_cost(VOLATILE_REF_COST);
7250   format %{ "ldars  $dst, $mem\t# float" %}
7251 
7252   ins_encode( aarch64_enc_fldars(dst, mem) );
7253 
7254   ins_pipe(pipe_serial);
7255 %}
7256 
7257 // Load Double
7258 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7259 %{
7260   match(Set dst (LoadD mem));
7261 
7262   ins_cost(VOLATILE_REF_COST);
7263   format %{ "ldard  $dst, $mem\t# double" %}
7264 
7265   ins_encode( aarch64_enc_fldard(dst, mem) );
7266 
7267   ins_pipe(pipe_serial);
7268 %}
7269 
7270 // Store Byte
7271 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7272 %{
7273   match(Set mem (StoreB mem src));
7274 
7275   ins_cost(VOLATILE_REF_COST);
7276   format %{ "stlrb  $src, $mem\t# byte" %}
7277 
7278   ins_encode(aarch64_enc_stlrb(src, mem));
7279 
7280   ins_pipe(pipe_class_memory);
7281 %}
7282 
7283 // Store Char/Short
7284 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7285 %{
7286   match(Set mem (StoreC mem src));
7287 
7288   ins_cost(VOLATILE_REF_COST);
7289   format %{ "stlrh  $src, $mem\t# short" %}
7290 
7291   ins_encode(aarch64_enc_stlrh(src, mem));
7292 
7293   ins_pipe(pipe_class_memory);
7294 %}
7295 
7296 // Store Integer
7297 
7298 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7299 %{
7300   match(Set mem(StoreI mem src));
7301 
7302   ins_cost(VOLATILE_REF_COST);
7303   format %{ "stlrw  $src, $mem\t# int" %}
7304 
7305   ins_encode(aarch64_enc_stlrw(src, mem));
7306 
7307   ins_pipe(pipe_class_memory);
7308 %}
7309 
7310 // Store Long (64 bit signed)
7311 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7312 %{
7313   match(Set mem (StoreL mem src));
7314 
7315   ins_cost(VOLATILE_REF_COST);
7316   format %{ "stlr  $src, $mem\t# int" %}
7317 
7318   ins_encode(aarch64_enc_stlr(src, mem));
7319 
7320   ins_pipe(pipe_class_memory);
7321 %}
7322 
7323 // Store Pointer
7324 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7325 %{
7326   match(Set mem (StoreP mem src));
7327 
7328   ins_cost(VOLATILE_REF_COST);
7329   format %{ "stlr  $src, $mem\t# ptr" %}
7330 
7331   ins_encode(aarch64_enc_stlr(src, mem));
7332 
7333   ins_pipe(pipe_class_memory);
7334 %}
7335 
7336 // Store Compressed Pointer
7337 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7338 %{
7339   match(Set mem (StoreN mem src));
7340 
7341   ins_cost(VOLATILE_REF_COST);
7342   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7343 
7344   ins_encode(aarch64_enc_stlrw(src, mem));
7345 
7346   ins_pipe(pipe_class_memory);
7347 %}
7348 
7349 // Store Float
7350 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7351 %{
7352   match(Set mem (StoreF mem src));
7353 
7354   ins_cost(VOLATILE_REF_COST);
7355   format %{ "stlrs  $src, $mem\t# float" %}
7356 
7357   ins_encode( aarch64_enc_fstlrs(src, mem) );
7358 
7359   ins_pipe(pipe_class_memory);
7360 %}
7361 
7362 // TODO
7363 // implement storeImmF0 and storeFImmPacked
7364 
7365 // Store Double
7366 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7367 %{
7368   match(Set mem (StoreD mem src));
7369 
7370   ins_cost(VOLATILE_REF_COST);
7371   format %{ "stlrd  $src, $mem\t# double" %}
7372 
7373   ins_encode( aarch64_enc_fstlrd(src, mem) );
7374 
7375   ins_pipe(pipe_class_memory);
7376 %}
7377 
7378 //  ---------------- end of volatile loads and stores ----------------
7379 
7380 // ============================================================================
7381 // BSWAP Instructions
7382 
7383 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7384   match(Set dst (ReverseBytesI src));
7385 
7386   ins_cost(INSN_COST);
7387   format %{ "revw  $dst, $src" %}
7388 
7389   ins_encode %{
7390     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7391   %}
7392 
7393   ins_pipe(ialu_reg);
7394 %}
7395 
7396 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7397   match(Set dst (ReverseBytesL src));
7398 
7399   ins_cost(INSN_COST);
7400   format %{ "rev  $dst, $src" %}
7401 
7402   ins_encode %{
7403     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7404   %}
7405 
7406   ins_pipe(ialu_reg);
7407 %}
7408 
7409 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7410   match(Set dst (ReverseBytesUS src));
7411 
7412   ins_cost(INSN_COST);
7413   format %{ "rev16w  $dst, $src" %}
7414 
7415   ins_encode %{
7416     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7417   %}
7418 
7419   ins_pipe(ialu_reg);
7420 %}
7421 
7422 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7423   match(Set dst (ReverseBytesS src));
7424 
7425   ins_cost(INSN_COST);
7426   format %{ "rev16w  $dst, $src\n\t"
7427             "sbfmw $dst, $dst, #0, #15" %}
7428 
7429   ins_encode %{
7430     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7431     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7432   %}
7433 
7434   ins_pipe(ialu_reg);
7435 %}
7436 
7437 // ============================================================================
7438 // Zero Count Instructions
7439 
7440 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7441   match(Set dst (CountLeadingZerosI src));
7442 
7443   ins_cost(INSN_COST);
7444   format %{ "clzw  $dst, $src" %}
7445   ins_encode %{
7446     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7447   %}
7448 
7449   ins_pipe(ialu_reg);
7450 %}
7451 
7452 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7453   match(Set dst (CountLeadingZerosL src));
7454 
7455   ins_cost(INSN_COST);
7456   format %{ "clz   $dst, $src" %}
7457   ins_encode %{
7458     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7459   %}
7460 
7461   ins_pipe(ialu_reg);
7462 %}
7463 
7464 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7465   match(Set dst (CountTrailingZerosI src));
7466 
7467   ins_cost(INSN_COST * 2);
7468   format %{ "rbitw  $dst, $src\n\t"
7469             "clzw   $dst, $dst" %}
7470   ins_encode %{
7471     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7472     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7473   %}
7474 
7475   ins_pipe(ialu_reg);
7476 %}
7477 
7478 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7479   match(Set dst (CountTrailingZerosL src));
7480 
7481   ins_cost(INSN_COST * 2);
7482   format %{ "rbit   $dst, $src\n\t"
7483             "clz    $dst, $dst" %}
7484   ins_encode %{
7485     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7486     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7487   %}
7488 
7489   ins_pipe(ialu_reg);
7490 %}
7491 
7492 //---------- Population Count Instructions -------------------------------------
7493 //
7494 
7495 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
7496   predicate(UsePopCountInstruction);
7497   match(Set dst (PopCountI src));
7498   effect(TEMP tmp);
7499   ins_cost(INSN_COST * 13);
7500 
7501   format %{ "movw   $src, $src\n\t"
7502             "mov    $tmp, $src\t# vector (1D)\n\t"
7503             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7504             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7505             "mov    $dst, $tmp\t# vector (1D)" %}
7506   ins_encode %{
7507     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
7508     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7509     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7510     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7511     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7512   %}
7513 
7514   ins_pipe(pipe_class_default);
7515 %}
7516 
7517 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
7518   predicate(UsePopCountInstruction);
7519   match(Set dst (PopCountI (LoadI mem)));
7520   effect(TEMP tmp);
7521   ins_cost(INSN_COST * 13);
7522 
7523   format %{ "ldrs   $tmp, $mem\n\t"
7524             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7525             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7526             "mov    $dst, $tmp\t# vector (1D)" %}
7527   ins_encode %{
7528     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7529     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
7530                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7531     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7532     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7533     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7534   %}
7535 
7536   ins_pipe(pipe_class_default);
7537 %}
7538 
7539 // Note: Long.bitCount(long) returns an int.
7540 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
7541   predicate(UsePopCountInstruction);
7542   match(Set dst (PopCountL src));
7543   effect(TEMP tmp);
7544   ins_cost(INSN_COST * 13);
7545 
7546   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
7547             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7548             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7549             "mov    $dst, $tmp\t# vector (1D)" %}
7550   ins_encode %{
7551     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7552     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7553     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7554     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7555   %}
7556 
7557   ins_pipe(pipe_class_default);
7558 %}
7559 
7560 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
7561   predicate(UsePopCountInstruction);
7562   match(Set dst (PopCountL (LoadL mem)));
7563   effect(TEMP tmp);
7564   ins_cost(INSN_COST * 13);
7565 
7566   format %{ "ldrd   $tmp, $mem\n\t"
7567             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7568             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7569             "mov    $dst, $tmp\t# vector (1D)" %}
7570   ins_encode %{
7571     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7572     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
7573                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7574     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7575     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7576     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7577   %}
7578 
7579   ins_pipe(pipe_class_default);
7580 %}
7581 
7582 // ============================================================================
7583 // MemBar Instruction
7584 
7585 instruct load_fence() %{
7586   match(LoadFence);
7587   ins_cost(VOLATILE_REF_COST);
7588 
7589   format %{ "load_fence" %}
7590 
7591   ins_encode %{
7592     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7593   %}
7594   ins_pipe(pipe_serial);
7595 %}
7596 
7597 instruct unnecessary_membar_acquire() %{
7598   predicate(unnecessary_acquire(n));
7599   match(MemBarAcquire);
7600   ins_cost(0);
7601 
7602   format %{ "membar_acquire (elided)" %}
7603 
7604   ins_encode %{
7605     __ block_comment("membar_acquire (elided)");
7606   %}
7607 
7608   ins_pipe(pipe_class_empty);
7609 %}
7610 
7611 instruct membar_acquire() %{
7612   match(MemBarAcquire);
7613   ins_cost(VOLATILE_REF_COST);
7614 
7615   format %{ "membar_acquire" %}
7616 
7617   ins_encode %{
7618     __ block_comment("membar_acquire");
7619     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7620   %}
7621 
7622   ins_pipe(pipe_serial);
7623 %}
7624 
7625 
7626 instruct membar_acquire_lock() %{
7627   match(MemBarAcquireLock);
7628   ins_cost(VOLATILE_REF_COST);
7629 
7630   format %{ "membar_acquire_lock (elided)" %}
7631 
7632   ins_encode %{
7633     __ block_comment("membar_acquire_lock (elided)");
7634   %}
7635 
7636   ins_pipe(pipe_serial);
7637 %}
7638 
7639 instruct store_fence() %{
7640   match(StoreFence);
7641   ins_cost(VOLATILE_REF_COST);
7642 
7643   format %{ "store_fence" %}
7644 
7645   ins_encode %{
7646     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7647   %}
7648   ins_pipe(pipe_serial);
7649 %}
7650 
7651 instruct unnecessary_membar_release() %{
7652   predicate(unnecessary_release(n));
7653   match(MemBarRelease);
7654   ins_cost(0);
7655 
7656   format %{ "membar_release (elided)" %}
7657 
7658   ins_encode %{
7659     __ block_comment("membar_release (elided)");
7660   %}
7661   ins_pipe(pipe_serial);
7662 %}
7663 
7664 instruct membar_release() %{
7665   match(MemBarRelease);
7666   ins_cost(VOLATILE_REF_COST);
7667 
7668   format %{ "membar_release" %}
7669 
7670   ins_encode %{
7671     __ block_comment("membar_release");
7672     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7673   %}
7674   ins_pipe(pipe_serial);
7675 %}
7676 
7677 instruct membar_storestore() %{
7678   match(MemBarStoreStore);
7679   ins_cost(VOLATILE_REF_COST);
7680 
7681   format %{ "MEMBAR-store-store" %}
7682 
7683   ins_encode %{
7684     __ membar(Assembler::StoreStore);
7685   %}
7686   ins_pipe(pipe_serial);
7687 %}
7688 
7689 instruct membar_release_lock() %{
7690   match(MemBarReleaseLock);
7691   ins_cost(VOLATILE_REF_COST);
7692 
7693   format %{ "membar_release_lock (elided)" %}
7694 
7695   ins_encode %{
7696     __ block_comment("membar_release_lock (elided)");
7697   %}
7698 
7699   ins_pipe(pipe_serial);
7700 %}
7701 
7702 instruct unnecessary_membar_volatile() %{
7703   predicate(unnecessary_volatile(n));
7704   match(MemBarVolatile);
7705   ins_cost(0);
7706 
7707   format %{ "membar_volatile (elided)" %}
7708 
7709   ins_encode %{
7710     __ block_comment("membar_volatile (elided)");
7711   %}
7712 
7713   ins_pipe(pipe_serial);
7714 %}
7715 
7716 instruct membar_volatile() %{
7717   match(MemBarVolatile);
7718   ins_cost(VOLATILE_REF_COST*100);
7719 
7720   format %{ "membar_volatile" %}
7721 
7722   ins_encode %{
7723     __ block_comment("membar_volatile");
7724     __ membar(Assembler::StoreLoad);
7725     %}
7726 
7727   ins_pipe(pipe_serial);
7728 %}
7729 
7730 // ============================================================================
7731 // Cast/Convert Instructions
7732 
7733 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7734   match(Set dst (CastX2P src));
7735 
7736   ins_cost(INSN_COST);
7737   format %{ "mov $dst, $src\t# long -> ptr" %}
7738 
7739   ins_encode %{
7740     if ($dst$$reg != $src$$reg) {
7741       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7742     }
7743   %}
7744 
7745   ins_pipe(ialu_reg);
7746 %}
7747 
7748 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7749   match(Set dst (CastP2X src));
7750 
7751   ins_cost(INSN_COST);
7752   format %{ "mov $dst, $src\t# ptr -> long" %}
7753 
7754   ins_encode %{
7755     if ($dst$$reg != $src$$reg) {
7756       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7757     }
7758   %}
7759 
7760   ins_pipe(ialu_reg);
7761 %}
7762 
7763 // Convert oop into int for vectors alignment masking
7764 instruct convP2I(iRegINoSp dst, iRegP src) %{
7765   match(Set dst (ConvL2I (CastP2X src)));
7766 
7767   ins_cost(INSN_COST);
7768   format %{ "movw $dst, $src\t# ptr -> int" %}
7769   ins_encode %{
7770     __ movw($dst$$Register, $src$$Register);
7771   %}
7772 
7773   ins_pipe(ialu_reg);
7774 %}
7775 
7776 // Convert compressed oop into int for vectors alignment masking
7777 // in case of 32bit oops (heap < 4Gb).
7778 instruct convN2I(iRegINoSp dst, iRegN src)
7779 %{
7780   predicate(Universe::narrow_oop_shift() == 0);
7781   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7782 
7783   ins_cost(INSN_COST);
7784   format %{ "mov dst, $src\t# compressed ptr -> int" %}
7785   ins_encode %{
7786     __ movw($dst$$Register, $src$$Register);
7787   %}
7788 
7789   ins_pipe(ialu_reg);
7790 %}
7791 
7792 
7793 // Convert oop pointer into compressed form
7794 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7795   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7796   match(Set dst (EncodeP src));
7797   effect(KILL cr);
7798   ins_cost(INSN_COST * 3);
7799   format %{ "encode_heap_oop $dst, $src" %}
7800   ins_encode %{
7801     Register s = $src$$Register;
7802     Register d = $dst$$Register;
7803     __ encode_heap_oop(d, s);
7804   %}
7805   ins_pipe(ialu_reg);
7806 %}
7807 
7808 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7809   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7810   match(Set dst (EncodeP src));
7811   ins_cost(INSN_COST * 3);
7812   format %{ "encode_heap_oop_not_null $dst, $src" %}
7813   ins_encode %{
7814     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7815   %}
7816   ins_pipe(ialu_reg);
7817 %}
7818 
7819 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7820   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
7821             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
7822   match(Set dst (DecodeN src));
7823   ins_cost(INSN_COST * 3);
7824   format %{ "decode_heap_oop $dst, $src" %}
7825   ins_encode %{
7826     Register s = $src$$Register;
7827     Register d = $dst$$Register;
7828     __ decode_heap_oop(d, s);
7829   %}
7830   ins_pipe(ialu_reg);
7831 %}
7832 
7833 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7834   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
7835             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
7836   match(Set dst (DecodeN src));
7837   ins_cost(INSN_COST * 3);
7838   format %{ "decode_heap_oop_not_null $dst, $src" %}
7839   ins_encode %{
7840     Register s = $src$$Register;
7841     Register d = $dst$$Register;
7842     __ decode_heap_oop_not_null(d, s);
7843   %}
7844   ins_pipe(ialu_reg);
7845 %}
7846 
7847 // n.b. AArch64 implementations of encode_klass_not_null and
7848 // decode_klass_not_null do not modify the flags register so, unlike
7849 // Intel, we don't kill CR as a side effect here
7850 
7851 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
7852   match(Set dst (EncodePKlass src));
7853 
7854   ins_cost(INSN_COST * 3);
7855   format %{ "encode_klass_not_null $dst,$src" %}
7856 
7857   ins_encode %{
7858     Register src_reg = as_Register($src$$reg);
7859     Register dst_reg = as_Register($dst$$reg);
7860     __ encode_klass_not_null(dst_reg, src_reg);
7861   %}
7862 
7863    ins_pipe(ialu_reg);
7864 %}
7865 
7866 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
7867   match(Set dst (DecodeNKlass src));
7868 
7869   ins_cost(INSN_COST * 3);
7870   format %{ "decode_klass_not_null $dst,$src" %}
7871 
7872   ins_encode %{
7873     Register src_reg = as_Register($src$$reg);
7874     Register dst_reg = as_Register($dst$$reg);
7875     if (dst_reg != src_reg) {
7876       __ decode_klass_not_null(dst_reg, src_reg);
7877     } else {
7878       __ decode_klass_not_null(dst_reg);
7879     }
7880   %}
7881 
7882    ins_pipe(ialu_reg);
7883 %}
7884 
7885 instruct checkCastPP(iRegPNoSp dst)
7886 %{
7887   match(Set dst (CheckCastPP dst));
7888 
7889   size(0);
7890   format %{ "# checkcastPP of $dst" %}
7891   ins_encode(/* empty encoding */);
7892   ins_pipe(pipe_class_empty);
7893 %}
7894 
7895 instruct castPP(iRegPNoSp dst)
7896 %{
7897   match(Set dst (CastPP dst));
7898 
7899   size(0);
7900   format %{ "# castPP of $dst" %}
7901   ins_encode(/* empty encoding */);
7902   ins_pipe(pipe_class_empty);
7903 %}
7904 
7905 instruct castII(iRegI dst)
7906 %{
7907   match(Set dst (CastII dst));
7908 
7909   size(0);
7910   format %{ "# castII of $dst" %}
7911   ins_encode(/* empty encoding */);
7912   ins_cost(0);
7913   ins_pipe(pipe_class_empty);
7914 %}
7915 
7916 // ============================================================================
7917 // Atomic operation instructions
7918 //
7919 // Intel and SPARC both implement Ideal Node LoadPLocked and
7920 // Store{PIL}Conditional instructions using a normal load for the
7921 // LoadPLocked and a CAS for the Store{PIL}Conditional.
7922 //
7923 // The ideal code appears only to use LoadPLocked/StorePLocked as a
7924 // pair to lock object allocations from Eden space when not using
7925 // TLABs.
7926 //
7927 // There does not appear to be a Load{IL}Locked Ideal Node and the
7928 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
7929 // and to use StoreIConditional only for 32-bit and StoreLConditional
7930 // only for 64-bit.
7931 //
7932 // We implement LoadPLocked and StorePLocked instructions using,
7933 // respectively the AArch64 hw load-exclusive and store-conditional
7934 // instructions. Whereas we must implement each of
7935 // Store{IL}Conditional using a CAS which employs a pair of
7936 // instructions comprising a load-exclusive followed by a
7937 // store-conditional.
7938 
7939 
7940 // Locked-load (linked load) of the current heap-top
7941 // used when updating the eden heap top
7942 // implemented using ldaxr on AArch64
7943 
7944 instruct loadPLocked(iRegPNoSp dst, indirect mem)
7945 %{
7946   match(Set dst (LoadPLocked mem));
7947 
7948   ins_cost(VOLATILE_REF_COST);
7949 
7950   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
7951 
7952   ins_encode(aarch64_enc_ldaxr(dst, mem));
7953 
7954   ins_pipe(pipe_serial);
7955 %}
7956 
7957 // Conditional-store of the updated heap-top.
7958 // Used during allocation of the shared heap.
7959 // Sets flag (EQ) on success.
7960 // implemented using stlxr on AArch64.
7961 
7962 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) 
7963 %{
7964   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7965 
7966   ins_cost(VOLATILE_REF_COST);
7967 
7968  // TODO
7969  // do we need to do a store-conditional release or can we just use a
7970  // plain store-conditional?
7971 
7972   format %{
7973     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
7974     "cmpw rscratch1, zr\t# EQ on successful write"
7975   %}
7976 
7977   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
7978 
7979   ins_pipe(pipe_serial);
7980 %}
7981 
7982 
7983 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
7984 // when attempting to rebias a lock towards the current thread.  We
7985 // must use the acquire form of cmpxchg in order to guarantee acquire
7986 // semantics in this case.
7987 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) 
7988 %{
7989   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7990 
7991   ins_cost(VOLATILE_REF_COST);
7992 
7993   format %{
7994     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
7995     "cmpw rscratch1, zr\t# EQ on successful write"
7996   %}
7997 
7998   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
7999 
8000   ins_pipe(pipe_slow);
8001 %}
8002 
8003 // storeIConditional also has acquire semantics, for no better reason
8004 // than matching storeLConditional.  At the time of writing this
8005 // comment storeIConditional was not used anywhere by AArch64.
8006 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) 
8007 %{
8008   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8009 
8010   ins_cost(VOLATILE_REF_COST);
8011 
8012   format %{
8013     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8014     "cmpw rscratch1, zr\t# EQ on successful write"
8015   %}
8016 
8017   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8018 
8019   ins_pipe(pipe_slow);
8020 %}
8021 
8022 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8023 // can't match them
8024 
8025 // standard CompareAndSwapX when we are using barriers
8026 // these have higher priority than the rules selected by a predicate
8027 
8028 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8029 
8030   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8031   ins_cost(2 * VOLATILE_REF_COST);
8032 
8033   effect(KILL cr);
8034 
8035  format %{
8036     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8037     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8038  %}
8039 
8040  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8041             aarch64_enc_cset_eq(res));
8042 
8043   ins_pipe(pipe_slow);
8044 %}
8045 
8046 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8047 
8048   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8049   ins_cost(2 * VOLATILE_REF_COST);
8050 
8051   effect(KILL cr);
8052 
8053  format %{
8054     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8055     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8056  %}
8057 
8058  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8059             aarch64_enc_cset_eq(res));
8060 
8061   ins_pipe(pipe_slow);
8062 %}
8063 
8064 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8065 
8066   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8067   ins_cost(2 * VOLATILE_REF_COST);
8068 
8069   effect(KILL cr);
8070 
8071  format %{
8072     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8073     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8074  %}
8075 
8076  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8077             aarch64_enc_cset_eq(res));
8078 
8079   ins_pipe(pipe_slow);
8080 %}
8081 
8082 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8083 
8084   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8085   ins_cost(2 * VOLATILE_REF_COST);
8086 
8087   effect(KILL cr);
8088 
8089  format %{
8090     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8091     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8092  %}
8093 
8094  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8095             aarch64_enc_cset_eq(res));
8096 
8097   ins_pipe(pipe_slow);
8098 %}
8099 
8100 
8101 // alternative CompareAndSwapX when we are eliding barriers
8102 
8103 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8104 
8105   predicate(needs_acquiring_load_exclusive(n));
8106   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8107   ins_cost(VOLATILE_REF_COST);
8108 
8109   effect(KILL cr);
8110 
8111  format %{
8112     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8113     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8114  %}
8115 
8116  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8117             aarch64_enc_cset_eq(res));
8118 
8119   ins_pipe(pipe_slow);
8120 %}
8121 
8122 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8123 
8124   predicate(needs_acquiring_load_exclusive(n));
8125   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8126   ins_cost(VOLATILE_REF_COST);
8127 
8128   effect(KILL cr);
8129 
8130  format %{
8131     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8132     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8133  %}
8134 
8135  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8136             aarch64_enc_cset_eq(res));
8137 
8138   ins_pipe(pipe_slow);
8139 %}
8140 
8141 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8142 
8143   predicate(needs_acquiring_load_exclusive(n));
8144   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8145   ins_cost(VOLATILE_REF_COST);
8146 
8147   effect(KILL cr);
8148 
8149  format %{
8150     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8151     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8152  %}
8153 
8154  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8155             aarch64_enc_cset_eq(res));
8156 
8157   ins_pipe(pipe_slow);
8158 %}
8159 
8160 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8161 
8162   predicate(needs_acquiring_load_exclusive(n));
8163   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8164   ins_cost(VOLATILE_REF_COST);
8165 
8166   effect(KILL cr);
8167 
8168  format %{
8169     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8170     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8171  %}
8172 
8173  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8174             aarch64_enc_cset_eq(res));
8175 
8176   ins_pipe(pipe_slow);
8177 %}
8178 
8179 
8180 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
8181   match(Set prev (GetAndSetI mem newv));
8182   ins_cost(2 * VOLATILE_REF_COST);
8183   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8184   ins_encode %{
8185     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8186   %}
8187   ins_pipe(pipe_serial);
8188 %}
8189 
8190 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
8191   match(Set prev (GetAndSetL mem newv));
8192   ins_cost(2 * VOLATILE_REF_COST);
8193   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8194   ins_encode %{
8195     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8196   %}
8197   ins_pipe(pipe_serial);
8198 %}
8199 
8200 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
8201   match(Set prev (GetAndSetN mem newv));
8202   ins_cost(2 * VOLATILE_REF_COST);
8203   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8204   ins_encode %{
8205     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8206   %}
8207   ins_pipe(pipe_serial);
8208 %}
8209 
8210 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
8211   match(Set prev (GetAndSetP mem newv));
8212   ins_cost(2 * VOLATILE_REF_COST);
8213   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8214   ins_encode %{
8215     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8216   %}
8217   ins_pipe(pipe_serial);
8218 %}
8219 
8220 instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) %{
8221   predicate(needs_acquiring_load_exclusive(n));
8222   match(Set prev (GetAndSetI mem newv));
8223   ins_cost(VOLATILE_REF_COST);
8224   format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]" %}
8225   ins_encode %{
8226     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8227   %}
8228   ins_pipe(pipe_serial);
8229 %}
8230 
8231 instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{
8232   predicate(needs_acquiring_load_exclusive(n));
8233   match(Set prev (GetAndSetL mem newv));
8234   ins_cost(VOLATILE_REF_COST);
8235   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8236   ins_encode %{
8237     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8238   %}
8239   ins_pipe(pipe_serial);
8240 %}
8241 
8242 instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
8243   predicate(needs_acquiring_load_exclusive(n));
8244   match(Set prev (GetAndSetN mem newv));
8245   ins_cost(VOLATILE_REF_COST);
8246   format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %}
8247   ins_encode %{
8248     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8249   %}
8250   ins_pipe(pipe_serial);
8251 %}
8252 
8253 instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{
8254   predicate(needs_acquiring_load_exclusive(n));
8255   match(Set prev (GetAndSetP mem newv));
8256   ins_cost(VOLATILE_REF_COST);
8257   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8258   ins_encode %{
8259     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8260   %}
8261   ins_pipe(pipe_serial);
8262 %}
8263 
8264 
8265 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8266   match(Set newval (GetAndAddL mem incr));
8267   ins_cost(2 * VOLATILE_REF_COST + 1);
8268   format %{ "get_and_addL $newval, [$mem], $incr" %}
8269   ins_encode %{
8270     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
8271   %}
8272   ins_pipe(pipe_serial);
8273 %}
8274 
8275 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
8276   predicate(n->as_LoadStore()->result_not_used());
8277   match(Set dummy (GetAndAddL mem incr));
8278   ins_cost(2 * VOLATILE_REF_COST);
8279   format %{ "get_and_addL [$mem], $incr" %}
8280   ins_encode %{
8281     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
8282   %}
8283   ins_pipe(pipe_serial);
8284 %}
8285 
8286 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8287   match(Set newval (GetAndAddL mem incr));
8288   ins_cost(2 * VOLATILE_REF_COST + 1);
8289   format %{ "get_and_addL $newval, [$mem], $incr" %}
8290   ins_encode %{
8291     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
8292   %}
8293   ins_pipe(pipe_serial);
8294 %}
8295 
8296 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
8297   predicate(n->as_LoadStore()->result_not_used());
8298   match(Set dummy (GetAndAddL mem incr));
8299   ins_cost(2 * VOLATILE_REF_COST);
8300   format %{ "get_and_addL [$mem], $incr" %}
8301   ins_encode %{
8302     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
8303   %}
8304   ins_pipe(pipe_serial);
8305 %}
8306 
8307 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8308   match(Set newval (GetAndAddI mem incr));
8309   ins_cost(2 * VOLATILE_REF_COST + 1);
8310   format %{ "get_and_addI $newval, [$mem], $incr" %}
8311   ins_encode %{
8312     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8313   %}
8314   ins_pipe(pipe_serial);
8315 %}
8316 
8317 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
8318   predicate(n->as_LoadStore()->result_not_used());
8319   match(Set dummy (GetAndAddI mem incr));
8320   ins_cost(2 * VOLATILE_REF_COST);
8321   format %{ "get_and_addI [$mem], $incr" %}
8322   ins_encode %{
8323     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
8324   %}
8325   ins_pipe(pipe_serial);
8326 %}
8327 
8328 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8329   match(Set newval (GetAndAddI mem incr));
8330   ins_cost(2 * VOLATILE_REF_COST + 1);
8331   format %{ "get_and_addI $newval, [$mem], $incr" %}
8332   ins_encode %{
8333     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8334   %}
8335   ins_pipe(pipe_serial);
8336 %}
8337 
8338 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
8339   predicate(n->as_LoadStore()->result_not_used());
8340   match(Set dummy (GetAndAddI mem incr));
8341   ins_cost(2 * VOLATILE_REF_COST);
8342   format %{ "get_and_addI [$mem], $incr" %}
8343   ins_encode %{
8344     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
8345   %}
8346   ins_pipe(pipe_serial);
8347 %}
8348 
8349 instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) %{
8350   predicate(needs_acquiring_load_exclusive(n));
8351   match(Set newval (GetAndAddL mem incr));
8352   ins_cost(VOLATILE_REF_COST + 1);
8353   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
8354   ins_encode %{
8355     __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
8356   %}
8357   ins_pipe(pipe_serial);
8358 %}
8359 
8360 instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
8361   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8362   match(Set dummy (GetAndAddL mem incr));
8363   ins_cost(VOLATILE_REF_COST);
8364   format %{ "get_and_addL_acq [$mem], $incr" %}
8365   ins_encode %{
8366     __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
8367   %}
8368   ins_pipe(pipe_serial);
8369 %}
8370 
8371 instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8372   predicate(needs_acquiring_load_exclusive(n));
8373   match(Set newval (GetAndAddL mem incr));
8374   ins_cost(VOLATILE_REF_COST + 1);
8375   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
8376   ins_encode %{
8377     __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
8378   %}
8379   ins_pipe(pipe_serial);
8380 %}
8381 
8382 instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAddSub incr) %{
8383   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8384   match(Set dummy (GetAndAddL mem incr));
8385   ins_cost(VOLATILE_REF_COST);
8386   format %{ "get_and_addL_acq [$mem], $incr" %}
8387   ins_encode %{
8388     __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
8389   %}
8390   ins_pipe(pipe_serial);
8391 %}
8392 
8393 instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8394   predicate(needs_acquiring_load_exclusive(n));
8395   match(Set newval (GetAndAddI mem incr));
8396   ins_cost(VOLATILE_REF_COST + 1);
8397   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
8398   ins_encode %{
8399     __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8400   %}
8401   ins_pipe(pipe_serial);
8402 %}
8403 
8404 instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) %{
8405   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8406   match(Set dummy (GetAndAddI mem incr));
8407   ins_cost(VOLATILE_REF_COST);
8408   format %{ "get_and_addI_acq [$mem], $incr" %}
8409   ins_encode %{
8410     __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
8411   %}
8412   ins_pipe(pipe_serial);
8413 %}
8414 
8415 instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8416   predicate(needs_acquiring_load_exclusive(n));
8417   match(Set newval (GetAndAddI mem incr));
8418   ins_cost(VOLATILE_REF_COST + 1);
8419   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
8420   ins_encode %{
8421     __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8422   %}
8423   ins_pipe(pipe_serial);
8424 %}
8425 
8426 instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAddSub incr) %{
8427   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8428   match(Set dummy (GetAndAddI mem incr));
8429   ins_cost(VOLATILE_REF_COST);
8430   format %{ "get_and_addI_acq [$mem], $incr" %}
8431   ins_encode %{
8432     __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
8433   %}
8434   ins_pipe(pipe_serial);
8435 %}
8436 
8437 // ============================================================================
8438 // Conditional Move Instructions
8439 
8440 // n.b. we have identical rules for both a signed compare op (cmpOp)
8441 // and an unsigned compare op (cmpOpU). it would be nice if we could
8442 // define an op class which merged both inputs and use it to type the
8443 // argument to a single rule. unfortunatelyt his fails because the
8444 // opclass does not live up to the COND_INTER interface of its
8445 // component operands. When the generic code tries to negate the
8446 // operand it ends up running the generci Machoper::negate method
8447 // which throws a ShouldNotHappen. So, we have to provide two flavours
8448 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
8449 
8450 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8451   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8452 
8453   ins_cost(INSN_COST * 2);
8454   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
8455 
8456   ins_encode %{
8457     __ cselw(as_Register($dst$$reg),
8458              as_Register($src2$$reg),
8459              as_Register($src1$$reg),
8460              (Assembler::Condition)$cmp$$cmpcode);
8461   %}
8462 
8463   ins_pipe(icond_reg_reg);
8464 %}
8465 
8466 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8467   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8468 
8469   ins_cost(INSN_COST * 2);
8470   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
8471 
8472   ins_encode %{
8473     __ cselw(as_Register($dst$$reg),
8474              as_Register($src2$$reg),
8475              as_Register($src1$$reg),
8476              (Assembler::Condition)$cmp$$cmpcode);
8477   %}
8478 
8479   ins_pipe(icond_reg_reg);
8480 %}
8481 
8482 // special cases where one arg is zero
8483 
8484 // n.b. this is selected in preference to the rule above because it
8485 // avoids loading constant 0 into a source register
8486 
8487 // TODO
8488 // we ought only to be able to cull one of these variants as the ideal
8489 // transforms ought always to order the zero consistently (to left/right?)
8490 
8491 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8492   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8493 
8494   ins_cost(INSN_COST * 2);
8495   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
8496 
8497   ins_encode %{
8498     __ cselw(as_Register($dst$$reg),
8499              as_Register($src$$reg),
8500              zr,
8501              (Assembler::Condition)$cmp$$cmpcode);
8502   %}
8503 
8504   ins_pipe(icond_reg);
8505 %}
8506 
8507 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8508   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8509 
8510   ins_cost(INSN_COST * 2);
8511   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
8512 
8513   ins_encode %{
8514     __ cselw(as_Register($dst$$reg),
8515              as_Register($src$$reg),
8516              zr,
8517              (Assembler::Condition)$cmp$$cmpcode);
8518   %}
8519 
8520   ins_pipe(icond_reg);
8521 %}
8522 
8523 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8524   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8525 
8526   ins_cost(INSN_COST * 2);
8527   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
8528 
8529   ins_encode %{
8530     __ cselw(as_Register($dst$$reg),
8531              zr,
8532              as_Register($src$$reg),
8533              (Assembler::Condition)$cmp$$cmpcode);
8534   %}
8535 
8536   ins_pipe(icond_reg);
8537 %}
8538 
8539 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8540   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8541 
8542   ins_cost(INSN_COST * 2);
8543   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
8544 
8545   ins_encode %{
8546     __ cselw(as_Register($dst$$reg),
8547              zr,
8548              as_Register($src$$reg),
8549              (Assembler::Condition)$cmp$$cmpcode);
8550   %}
8551 
8552   ins_pipe(icond_reg);
8553 %}
8554 
8555 // special case for creating a boolean 0 or 1
8556 
8557 // n.b. this is selected in preference to the rule above because it
8558 // avoids loading constants 0 and 1 into a source register
8559 
8560 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8561   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8562 
8563   ins_cost(INSN_COST * 2);
8564   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
8565 
8566   ins_encode %{
8567     // equivalently
8568     // cset(as_Register($dst$$reg),
8569     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8570     __ csincw(as_Register($dst$$reg),
8571              zr,
8572              zr,
8573              (Assembler::Condition)$cmp$$cmpcode);
8574   %}
8575 
8576   ins_pipe(icond_none);
8577 %}
8578 
8579 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8580   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8581 
8582   ins_cost(INSN_COST * 2);
8583   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
8584 
8585   ins_encode %{
8586     // equivalently
8587     // cset(as_Register($dst$$reg),
8588     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8589     __ csincw(as_Register($dst$$reg),
8590              zr,
8591              zr,
8592              (Assembler::Condition)$cmp$$cmpcode);
8593   %}
8594 
8595   ins_pipe(icond_none);
8596 %}
8597 
8598 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8599   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8600 
8601   ins_cost(INSN_COST * 2);
8602   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
8603 
8604   ins_encode %{
8605     __ csel(as_Register($dst$$reg),
8606             as_Register($src2$$reg),
8607             as_Register($src1$$reg),
8608             (Assembler::Condition)$cmp$$cmpcode);
8609   %}
8610 
8611   ins_pipe(icond_reg_reg);
8612 %}
8613 
8614 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8615   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8616 
8617   ins_cost(INSN_COST * 2);
8618   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
8619 
8620   ins_encode %{
8621     __ csel(as_Register($dst$$reg),
8622             as_Register($src2$$reg),
8623             as_Register($src1$$reg),
8624             (Assembler::Condition)$cmp$$cmpcode);
8625   %}
8626 
8627   ins_pipe(icond_reg_reg);
8628 %}
8629 
8630 // special cases where one arg is zero
8631 
8632 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8633   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8634 
8635   ins_cost(INSN_COST * 2);
8636   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
8637 
8638   ins_encode %{
8639     __ csel(as_Register($dst$$reg),
8640             zr,
8641             as_Register($src$$reg),
8642             (Assembler::Condition)$cmp$$cmpcode);
8643   %}
8644 
8645   ins_pipe(icond_reg);
8646 %}
8647 
8648 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8649   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8650 
8651   ins_cost(INSN_COST * 2);
8652   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
8653 
8654   ins_encode %{
8655     __ csel(as_Register($dst$$reg),
8656             zr,
8657             as_Register($src$$reg),
8658             (Assembler::Condition)$cmp$$cmpcode);
8659   %}
8660 
8661   ins_pipe(icond_reg);
8662 %}
8663 
8664 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8665   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8666 
8667   ins_cost(INSN_COST * 2);
8668   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
8669 
8670   ins_encode %{
8671     __ csel(as_Register($dst$$reg),
8672             as_Register($src$$reg),
8673             zr,
8674             (Assembler::Condition)$cmp$$cmpcode);
8675   %}
8676 
8677   ins_pipe(icond_reg);
8678 %}
8679 
8680 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8681   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8682 
8683   ins_cost(INSN_COST * 2);
8684   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
8685 
8686   ins_encode %{
8687     __ csel(as_Register($dst$$reg),
8688             as_Register($src$$reg),
8689             zr,
8690             (Assembler::Condition)$cmp$$cmpcode);
8691   %}
8692 
8693   ins_pipe(icond_reg);
8694 %}
8695 
8696 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8697   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8698 
8699   ins_cost(INSN_COST * 2);
8700   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
8701 
8702   ins_encode %{
8703     __ csel(as_Register($dst$$reg),
8704             as_Register($src2$$reg),
8705             as_Register($src1$$reg),
8706             (Assembler::Condition)$cmp$$cmpcode);
8707   %}
8708 
8709   ins_pipe(icond_reg_reg);
8710 %}
8711 
8712 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8713   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8714 
8715   ins_cost(INSN_COST * 2);
8716   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
8717 
8718   ins_encode %{
8719     __ csel(as_Register($dst$$reg),
8720             as_Register($src2$$reg),
8721             as_Register($src1$$reg),
8722             (Assembler::Condition)$cmp$$cmpcode);
8723   %}
8724 
8725   ins_pipe(icond_reg_reg);
8726 %}
8727 
8728 // special cases where one arg is zero
8729 
8730 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8731   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8732 
8733   ins_cost(INSN_COST * 2);
8734   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
8735 
8736   ins_encode %{
8737     __ csel(as_Register($dst$$reg),
8738             zr,
8739             as_Register($src$$reg),
8740             (Assembler::Condition)$cmp$$cmpcode);
8741   %}
8742 
8743   ins_pipe(icond_reg);
8744 %}
8745 
8746 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8747   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8748 
8749   ins_cost(INSN_COST * 2);
8750   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
8751 
8752   ins_encode %{
8753     __ csel(as_Register($dst$$reg),
8754             zr,
8755             as_Register($src$$reg),
8756             (Assembler::Condition)$cmp$$cmpcode);
8757   %}
8758 
8759   ins_pipe(icond_reg);
8760 %}
8761 
8762 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8763   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8764 
8765   ins_cost(INSN_COST * 2);
8766   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
8767 
8768   ins_encode %{
8769     __ csel(as_Register($dst$$reg),
8770             as_Register($src$$reg),
8771             zr,
8772             (Assembler::Condition)$cmp$$cmpcode);
8773   %}
8774 
8775   ins_pipe(icond_reg);
8776 %}
8777 
8778 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8779   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8780 
8781   ins_cost(INSN_COST * 2);
8782   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
8783 
8784   ins_encode %{
8785     __ csel(as_Register($dst$$reg),
8786             as_Register($src$$reg),
8787             zr,
8788             (Assembler::Condition)$cmp$$cmpcode);
8789   %}
8790 
8791   ins_pipe(icond_reg);
8792 %}
8793 
8794 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8795   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8796 
8797   ins_cost(INSN_COST * 2);
8798   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8799 
8800   ins_encode %{
8801     __ cselw(as_Register($dst$$reg),
8802              as_Register($src2$$reg),
8803              as_Register($src1$$reg),
8804              (Assembler::Condition)$cmp$$cmpcode);
8805   %}
8806 
8807   ins_pipe(icond_reg_reg);
8808 %}
8809 
8810 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8811   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8812 
8813   ins_cost(INSN_COST * 2);
8814   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8815 
8816   ins_encode %{
8817     __ cselw(as_Register($dst$$reg),
8818              as_Register($src2$$reg),
8819              as_Register($src1$$reg),
8820              (Assembler::Condition)$cmp$$cmpcode);
8821   %}
8822 
8823   ins_pipe(icond_reg_reg);
8824 %}
8825 
8826 // special cases where one arg is zero
8827 
8828 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8829   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8830 
8831   ins_cost(INSN_COST * 2);
8832   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
8833 
8834   ins_encode %{
8835     __ cselw(as_Register($dst$$reg),
8836              zr,
8837              as_Register($src$$reg),
8838              (Assembler::Condition)$cmp$$cmpcode);
8839   %}
8840 
8841   ins_pipe(icond_reg);
8842 %}
8843 
8844 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8845   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8846 
8847   ins_cost(INSN_COST * 2);
8848   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
8849 
8850   ins_encode %{
8851     __ cselw(as_Register($dst$$reg),
8852              zr,
8853              as_Register($src$$reg),
8854              (Assembler::Condition)$cmp$$cmpcode);
8855   %}
8856 
8857   ins_pipe(icond_reg);
8858 %}
8859 
8860 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
8861   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
8862 
8863   ins_cost(INSN_COST * 2);
8864   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
8865 
8866   ins_encode %{
8867     __ cselw(as_Register($dst$$reg),
8868              as_Register($src$$reg),
8869              zr,
8870              (Assembler::Condition)$cmp$$cmpcode);
8871   %}
8872 
8873   ins_pipe(icond_reg);
8874 %}
8875 
8876 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
8877   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
8878 
8879   ins_cost(INSN_COST * 2);
8880   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
8881 
8882   ins_encode %{
8883     __ cselw(as_Register($dst$$reg),
8884              as_Register($src$$reg),
8885              zr,
8886              (Assembler::Condition)$cmp$$cmpcode);
8887   %}
8888 
8889   ins_pipe(icond_reg);
8890 %}
8891 
8892 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
8893 %{
8894   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
8895 
8896   ins_cost(INSN_COST * 3);
8897 
8898   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
8899   ins_encode %{
8900     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8901     __ fcsels(as_FloatRegister($dst$$reg),
8902               as_FloatRegister($src2$$reg),
8903               as_FloatRegister($src1$$reg),
8904               cond);
8905   %}
8906 
8907   ins_pipe(fp_cond_reg_reg_s);
8908 %}
8909 
8910 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
8911 %{
8912   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
8913 
8914   ins_cost(INSN_COST * 3);
8915 
8916   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
8917   ins_encode %{
8918     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8919     __ fcsels(as_FloatRegister($dst$$reg),
8920               as_FloatRegister($src2$$reg),
8921               as_FloatRegister($src1$$reg),
8922               cond);
8923   %}
8924 
8925   ins_pipe(fp_cond_reg_reg_s);
8926 %}
8927 
8928 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
8929 %{
8930   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
8931 
8932   ins_cost(INSN_COST * 3);
8933 
8934   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
8935   ins_encode %{
8936     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8937     __ fcseld(as_FloatRegister($dst$$reg),
8938               as_FloatRegister($src2$$reg),
8939               as_FloatRegister($src1$$reg),
8940               cond);
8941   %}
8942 
8943   ins_pipe(fp_cond_reg_reg_d);
8944 %}
8945 
8946 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
8947 %{
8948   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
8949 
8950   ins_cost(INSN_COST * 3);
8951 
8952   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
8953   ins_encode %{
8954     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8955     __ fcseld(as_FloatRegister($dst$$reg),
8956               as_FloatRegister($src2$$reg),
8957               as_FloatRegister($src1$$reg),
8958               cond);
8959   %}
8960 
8961   ins_pipe(fp_cond_reg_reg_d);
8962 %}
8963 
8964 // ============================================================================
8965 // Arithmetic Instructions
8966 //
8967 
8968 // Integer Addition
8969 
8970 // TODO
8971 // these currently employ operations which do not set CR and hence are
8972 // not flagged as killing CR but we would like to isolate the cases
8973 // where we want to set flags from those where we don't. need to work
8974 // out how to do that.
8975 
8976 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8977   match(Set dst (AddI src1 src2));
8978 
8979   ins_cost(INSN_COST);
8980   format %{ "addw  $dst, $src1, $src2" %}
8981 
8982   ins_encode %{
8983     __ addw(as_Register($dst$$reg),
8984             as_Register($src1$$reg),
8985             as_Register($src2$$reg));
8986   %}
8987 
8988   ins_pipe(ialu_reg_reg);
8989 %}
8990 
8991 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
8992   match(Set dst (AddI src1 src2));
8993 
8994   ins_cost(INSN_COST);
8995   format %{ "addw $dst, $src1, $src2" %}
8996 
8997   // use opcode to indicate that this is an add not a sub
8998   opcode(0x0);
8999 
9000   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9001 
9002   ins_pipe(ialu_reg_imm);
9003 %}
9004 
9005 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
9006   match(Set dst (AddI (ConvL2I src1) src2));
9007 
9008   ins_cost(INSN_COST);
9009   format %{ "addw $dst, $src1, $src2" %}
9010 
9011   // use opcode to indicate that this is an add not a sub
9012   opcode(0x0);
9013 
9014   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9015 
9016   ins_pipe(ialu_reg_imm);
9017 %}
9018 
9019 // Pointer Addition
9020 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9021   match(Set dst (AddP src1 src2));
9022 
9023   ins_cost(INSN_COST);
9024   format %{ "add $dst, $src1, $src2\t# ptr" %}
9025 
9026   ins_encode %{
9027     __ add(as_Register($dst$$reg),
9028            as_Register($src1$$reg),
9029            as_Register($src2$$reg));
9030   %}
9031 
9032   ins_pipe(ialu_reg_reg);
9033 %}
9034 
9035 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9036   match(Set dst (AddP src1 (ConvI2L src2)));
9037 
9038   ins_cost(1.9 * INSN_COST);
9039   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9040 
9041   ins_encode %{
9042     __ add(as_Register($dst$$reg),
9043            as_Register($src1$$reg),
9044            as_Register($src2$$reg), ext::sxtw);
9045   %}
9046 
9047   ins_pipe(ialu_reg_reg);
9048 %}
9049 
9050 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9051   match(Set dst (AddP src1 (LShiftL src2 scale)));
9052 
9053   ins_cost(1.9 * INSN_COST);
9054   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9055 
9056   ins_encode %{
9057     __ lea(as_Register($dst$$reg),
9058            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9059                    Address::lsl($scale$$constant)));
9060   %}
9061 
9062   ins_pipe(ialu_reg_reg_shift);
9063 %}
9064 
9065 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9066   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9067 
9068   ins_cost(1.9 * INSN_COST);
9069   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9070 
9071   ins_encode %{
9072     __ lea(as_Register($dst$$reg),
9073            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9074                    Address::sxtw($scale$$constant)));
9075   %}
9076 
9077   ins_pipe(ialu_reg_reg_shift);
9078 %}
9079 
9080 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9081   match(Set dst (LShiftL (ConvI2L src) scale));
9082 
9083   ins_cost(INSN_COST);
9084   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9085 
9086   ins_encode %{
9087     __ sbfiz(as_Register($dst$$reg),
9088           as_Register($src$$reg),
9089           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9090   %}
9091 
9092   ins_pipe(ialu_reg_shift);
9093 %}
9094 
9095 // Pointer Immediate Addition
9096 // n.b. this needs to be more expensive than using an indirect memory
9097 // operand
9098 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9099   match(Set dst (AddP src1 src2));
9100 
9101   ins_cost(INSN_COST);
9102   format %{ "add $dst, $src1, $src2\t# ptr" %}
9103 
9104   // use opcode to indicate that this is an add not a sub
9105   opcode(0x0);
9106 
9107   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9108 
9109   ins_pipe(ialu_reg_imm);
9110 %}
9111 
9112 // Long Addition
9113 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9114 
9115   match(Set dst (AddL src1 src2));
9116 
9117   ins_cost(INSN_COST);
9118   format %{ "add  $dst, $src1, $src2" %}
9119 
9120   ins_encode %{
9121     __ add(as_Register($dst$$reg),
9122            as_Register($src1$$reg),
9123            as_Register($src2$$reg));
9124   %}
9125 
9126   ins_pipe(ialu_reg_reg);
9127 %}
9128 
9129 // No constant pool entries requiredLong Immediate Addition.
9130 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9131   match(Set dst (AddL src1 src2));
9132 
9133   ins_cost(INSN_COST);
9134   format %{ "add $dst, $src1, $src2" %}
9135 
9136   // use opcode to indicate that this is an add not a sub
9137   opcode(0x0);
9138 
9139   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9140 
9141   ins_pipe(ialu_reg_imm);
9142 %}
9143 
9144 // Integer Subtraction
9145 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9146   match(Set dst (SubI src1 src2));
9147 
9148   ins_cost(INSN_COST);
9149   format %{ "subw  $dst, $src1, $src2" %}
9150 
9151   ins_encode %{
9152     __ subw(as_Register($dst$$reg),
9153             as_Register($src1$$reg),
9154             as_Register($src2$$reg));
9155   %}
9156 
9157   ins_pipe(ialu_reg_reg);
9158 %}
9159 
9160 // Immediate Subtraction
9161 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9162   match(Set dst (SubI src1 src2));
9163 
9164   ins_cost(INSN_COST);
9165   format %{ "subw $dst, $src1, $src2" %}
9166 
9167   // use opcode to indicate that this is a sub not an add
9168   opcode(0x1);
9169 
9170   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9171 
9172   ins_pipe(ialu_reg_imm);
9173 %}
9174 
9175 // Long Subtraction
9176 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9177 
9178   match(Set dst (SubL src1 src2));
9179 
9180   ins_cost(INSN_COST);
9181   format %{ "sub  $dst, $src1, $src2" %}
9182 
9183   ins_encode %{
9184     __ sub(as_Register($dst$$reg),
9185            as_Register($src1$$reg),
9186            as_Register($src2$$reg));
9187   %}
9188 
9189   ins_pipe(ialu_reg_reg);
9190 %}
9191 
9192 // No constant pool entries requiredLong Immediate Subtraction.
9193 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9194   match(Set dst (SubL src1 src2));
9195 
9196   ins_cost(INSN_COST);
9197   format %{ "sub$dst, $src1, $src2" %}
9198 
9199   // use opcode to indicate that this is a sub not an add
9200   opcode(0x1);
9201 
9202   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9203 
9204   ins_pipe(ialu_reg_imm);
9205 %}
9206 
9207 // Integer Negation (special case for sub)
9208 
9209 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9210   match(Set dst (SubI zero src));
9211 
9212   ins_cost(INSN_COST);
9213   format %{ "negw $dst, $src\t# int" %}
9214 
9215   ins_encode %{
9216     __ negw(as_Register($dst$$reg),
9217             as_Register($src$$reg));
9218   %}
9219 
9220   ins_pipe(ialu_reg);
9221 %}
9222 
9223 // Long Negation
9224 
9225 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
9226   match(Set dst (SubL zero src));
9227 
9228   ins_cost(INSN_COST);
9229   format %{ "neg $dst, $src\t# long" %}
9230 
9231   ins_encode %{
9232     __ neg(as_Register($dst$$reg),
9233            as_Register($src$$reg));
9234   %}
9235 
9236   ins_pipe(ialu_reg);
9237 %}
9238 
9239 // Integer Multiply
9240 
9241 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9242   match(Set dst (MulI src1 src2));
9243 
9244   ins_cost(INSN_COST * 3);
9245   format %{ "mulw  $dst, $src1, $src2" %}
9246 
9247   ins_encode %{
9248     __ mulw(as_Register($dst$$reg),
9249             as_Register($src1$$reg),
9250             as_Register($src2$$reg));
9251   %}
9252 
9253   ins_pipe(imul_reg_reg);
9254 %}
9255 
9256 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9257   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9258 
9259   ins_cost(INSN_COST * 3);
9260   format %{ "smull  $dst, $src1, $src2" %}
9261 
9262   ins_encode %{
9263     __ smull(as_Register($dst$$reg),
9264              as_Register($src1$$reg),
9265              as_Register($src2$$reg));
9266   %}
9267 
9268   ins_pipe(imul_reg_reg);
9269 %}
9270 
9271 // Long Multiply
9272 
9273 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9274   match(Set dst (MulL src1 src2));
9275 
9276   ins_cost(INSN_COST * 5);
9277   format %{ "mul  $dst, $src1, $src2" %}
9278 
9279   ins_encode %{
9280     __ mul(as_Register($dst$$reg),
9281            as_Register($src1$$reg),
9282            as_Register($src2$$reg));
9283   %}
9284 
9285   ins_pipe(lmul_reg_reg);
9286 %}
9287 
9288 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
9289 %{
9290   match(Set dst (MulHiL src1 src2));
9291 
9292   ins_cost(INSN_COST * 7);
9293   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
9294 
9295   ins_encode %{
9296     __ smulh(as_Register($dst$$reg),
9297              as_Register($src1$$reg),
9298              as_Register($src2$$reg));
9299   %}
9300 
9301   ins_pipe(lmul_reg_reg);
9302 %}
9303 
9304 // Combined Integer Multiply & Add/Sub
9305 
9306 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9307   match(Set dst (AddI src3 (MulI src1 src2)));
9308 
9309   ins_cost(INSN_COST * 3);
9310   format %{ "madd  $dst, $src1, $src2, $src3" %}
9311 
9312   ins_encode %{
9313     __ maddw(as_Register($dst$$reg),
9314              as_Register($src1$$reg),
9315              as_Register($src2$$reg),
9316              as_Register($src3$$reg));
9317   %}
9318 
9319   ins_pipe(imac_reg_reg);
9320 %}
9321 
9322 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9323   match(Set dst (SubI src3 (MulI src1 src2)));
9324 
9325   ins_cost(INSN_COST * 3);
9326   format %{ "msub  $dst, $src1, $src2, $src3" %}
9327 
9328   ins_encode %{
9329     __ msubw(as_Register($dst$$reg),
9330              as_Register($src1$$reg),
9331              as_Register($src2$$reg),
9332              as_Register($src3$$reg));
9333   %}
9334 
9335   ins_pipe(imac_reg_reg);
9336 %}
9337 
9338 // Combined Long Multiply & Add/Sub
9339 
9340 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9341   match(Set dst (AddL src3 (MulL src1 src2)));
9342 
9343   ins_cost(INSN_COST * 5);
9344   format %{ "madd  $dst, $src1, $src2, $src3" %}
9345 
9346   ins_encode %{
9347     __ madd(as_Register($dst$$reg),
9348             as_Register($src1$$reg),
9349             as_Register($src2$$reg),
9350             as_Register($src3$$reg));
9351   %}
9352 
9353   ins_pipe(lmac_reg_reg);
9354 %}
9355 
9356 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9357   match(Set dst (SubL src3 (MulL src1 src2)));
9358 
9359   ins_cost(INSN_COST * 5);
9360   format %{ "msub  $dst, $src1, $src2, $src3" %}
9361 
9362   ins_encode %{
9363     __ msub(as_Register($dst$$reg),
9364             as_Register($src1$$reg),
9365             as_Register($src2$$reg),
9366             as_Register($src3$$reg));
9367   %}
9368 
9369   ins_pipe(lmac_reg_reg);
9370 %}
9371 
9372 // Integer Divide
9373 
9374 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9375   match(Set dst (DivI src1 src2));
9376 
9377   ins_cost(INSN_COST * 19);
9378   format %{ "sdivw  $dst, $src1, $src2" %}
9379 
9380   ins_encode(aarch64_enc_divw(dst, src1, src2));
9381   ins_pipe(idiv_reg_reg);
9382 %}
9383 
9384 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
9385   match(Set dst (URShiftI (RShiftI src1 div1) div2));
9386   ins_cost(INSN_COST);
9387   format %{ "lsrw $dst, $src1, $div1" %}
9388   ins_encode %{
9389     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
9390   %}
9391   ins_pipe(ialu_reg_shift);
9392 %}
9393 
9394 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
9395   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
9396   ins_cost(INSN_COST);
9397   format %{ "addw $dst, $src, LSR $div1" %}
9398 
9399   ins_encode %{
9400     __ addw(as_Register($dst$$reg),
9401               as_Register($src$$reg),
9402               as_Register($src$$reg),
9403               Assembler::LSR, 31);
9404   %}
9405   ins_pipe(ialu_reg);
9406 %}
9407 
9408 // Long Divide
9409 
9410 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9411   match(Set dst (DivL src1 src2));
9412 
9413   ins_cost(INSN_COST * 35);
9414   format %{ "sdiv   $dst, $src1, $src2" %}
9415 
9416   ins_encode(aarch64_enc_div(dst, src1, src2));
9417   ins_pipe(ldiv_reg_reg);
9418 %}
9419 
9420 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
9421   match(Set dst (URShiftL (RShiftL src1 div1) div2));
9422   ins_cost(INSN_COST);
9423   format %{ "lsr $dst, $src1, $div1" %}
9424   ins_encode %{
9425     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
9426   %}
9427   ins_pipe(ialu_reg_shift);
9428 %}
9429 
9430 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
9431   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
9432   ins_cost(INSN_COST);
9433   format %{ "add $dst, $src, $div1" %}
9434 
9435   ins_encode %{
9436     __ add(as_Register($dst$$reg),
9437               as_Register($src$$reg),
9438               as_Register($src$$reg),
9439               Assembler::LSR, 63);
9440   %}
9441   ins_pipe(ialu_reg);
9442 %}
9443 
9444 // Integer Remainder
9445 
9446 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9447   match(Set dst (ModI src1 src2));
9448 
9449   ins_cost(INSN_COST * 22);
9450   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
9451             "msubw($dst, rscratch1, $src2, $src1" %}
9452 
9453   ins_encode(aarch64_enc_modw(dst, src1, src2));
9454   ins_pipe(idiv_reg_reg);
9455 %}
9456 
9457 // Long Remainder
9458 
9459 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9460   match(Set dst (ModL src1 src2));
9461 
9462   ins_cost(INSN_COST * 38);
9463   format %{ "sdiv   rscratch1, $src1, $src2\n"
9464             "msub($dst, rscratch1, $src2, $src1" %}
9465 
9466   ins_encode(aarch64_enc_mod(dst, src1, src2));
9467   ins_pipe(ldiv_reg_reg);
9468 %}
9469 
9470 // Integer Shifts
9471 
9472 // Shift Left Register
9473 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9474   match(Set dst (LShiftI src1 src2));
9475 
9476   ins_cost(INSN_COST * 2);
9477   format %{ "lslvw  $dst, $src1, $src2" %}
9478 
9479   ins_encode %{
9480     __ lslvw(as_Register($dst$$reg),
9481              as_Register($src1$$reg),
9482              as_Register($src2$$reg));
9483   %}
9484 
9485   ins_pipe(ialu_reg_reg_vshift);
9486 %}
9487 
9488 // Shift Left Immediate
9489 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9490   match(Set dst (LShiftI src1 src2));
9491 
9492   ins_cost(INSN_COST);
9493   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
9494 
9495   ins_encode %{
9496     __ lslw(as_Register($dst$$reg),
9497             as_Register($src1$$reg),
9498             $src2$$constant & 0x1f);
9499   %}
9500 
9501   ins_pipe(ialu_reg_shift);
9502 %}
9503 
9504 // Shift Right Logical Register
9505 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9506   match(Set dst (URShiftI src1 src2));
9507 
9508   ins_cost(INSN_COST * 2);
9509   format %{ "lsrvw  $dst, $src1, $src2" %}
9510 
9511   ins_encode %{
9512     __ lsrvw(as_Register($dst$$reg),
9513              as_Register($src1$$reg),
9514              as_Register($src2$$reg));
9515   %}
9516 
9517   ins_pipe(ialu_reg_reg_vshift);
9518 %}
9519 
9520 // Shift Right Logical Immediate
9521 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9522   match(Set dst (URShiftI src1 src2));
9523 
9524   ins_cost(INSN_COST);
9525   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
9526 
9527   ins_encode %{
9528     __ lsrw(as_Register($dst$$reg),
9529             as_Register($src1$$reg),
9530             $src2$$constant & 0x1f);
9531   %}
9532 
9533   ins_pipe(ialu_reg_shift);
9534 %}
9535 
9536 // Shift Right Arithmetic Register
9537 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9538   match(Set dst (RShiftI src1 src2));
9539 
9540   ins_cost(INSN_COST * 2);
9541   format %{ "asrvw  $dst, $src1, $src2" %}
9542 
9543   ins_encode %{
9544     __ asrvw(as_Register($dst$$reg),
9545              as_Register($src1$$reg),
9546              as_Register($src2$$reg));
9547   %}
9548 
9549   ins_pipe(ialu_reg_reg_vshift);
9550 %}
9551 
9552 // Shift Right Arithmetic Immediate
9553 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9554   match(Set dst (RShiftI src1 src2));
9555 
9556   ins_cost(INSN_COST);
9557   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
9558 
9559   ins_encode %{
9560     __ asrw(as_Register($dst$$reg),
9561             as_Register($src1$$reg),
9562             $src2$$constant & 0x1f);
9563   %}
9564 
9565   ins_pipe(ialu_reg_shift);
9566 %}
9567 
9568 // Combined Int Mask and Right Shift (using UBFM)
9569 // TODO
9570 
9571 // Long Shifts
9572 
9573 // Shift Left Register
9574 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9575   match(Set dst (LShiftL src1 src2));
9576 
9577   ins_cost(INSN_COST * 2);
9578   format %{ "lslv  $dst, $src1, $src2" %}
9579 
9580   ins_encode %{
9581     __ lslv(as_Register($dst$$reg),
9582             as_Register($src1$$reg),
9583             as_Register($src2$$reg));
9584   %}
9585 
9586   ins_pipe(ialu_reg_reg_vshift);
9587 %}
9588 
9589 // Shift Left Immediate
9590 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9591   match(Set dst (LShiftL src1 src2));
9592 
9593   ins_cost(INSN_COST);
9594   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
9595 
9596   ins_encode %{
9597     __ lsl(as_Register($dst$$reg),
9598             as_Register($src1$$reg),
9599             $src2$$constant & 0x3f);
9600   %}
9601 
9602   ins_pipe(ialu_reg_shift);
9603 %}
9604 
9605 // Shift Right Logical Register
9606 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9607   match(Set dst (URShiftL src1 src2));
9608 
9609   ins_cost(INSN_COST * 2);
9610   format %{ "lsrv  $dst, $src1, $src2" %}
9611 
9612   ins_encode %{
9613     __ lsrv(as_Register($dst$$reg),
9614             as_Register($src1$$reg),
9615             as_Register($src2$$reg));
9616   %}
9617 
9618   ins_pipe(ialu_reg_reg_vshift);
9619 %}
9620 
9621 // Shift Right Logical Immediate
9622 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9623   match(Set dst (URShiftL src1 src2));
9624 
9625   ins_cost(INSN_COST);
9626   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
9627 
9628   ins_encode %{
9629     __ lsr(as_Register($dst$$reg),
9630            as_Register($src1$$reg),
9631            $src2$$constant & 0x3f);
9632   %}
9633 
9634   ins_pipe(ialu_reg_shift);
9635 %}
9636 
9637 // A special-case pattern for card table stores.
9638 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
9639   match(Set dst (URShiftL (CastP2X src1) src2));
9640 
9641   ins_cost(INSN_COST);
9642   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
9643 
9644   ins_encode %{
9645     __ lsr(as_Register($dst$$reg),
9646            as_Register($src1$$reg),
9647            $src2$$constant & 0x3f);
9648   %}
9649 
9650   ins_pipe(ialu_reg_shift);
9651 %}
9652 
9653 // Shift Right Arithmetic Register
9654 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9655   match(Set dst (RShiftL src1 src2));
9656 
9657   ins_cost(INSN_COST * 2);
9658   format %{ "asrv  $dst, $src1, $src2" %}
9659 
9660   ins_encode %{
9661     __ asrv(as_Register($dst$$reg),
9662             as_Register($src1$$reg),
9663             as_Register($src2$$reg));
9664   %}
9665 
9666   ins_pipe(ialu_reg_reg_vshift);
9667 %}
9668 
9669 // Shift Right Arithmetic Immediate
9670 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9671   match(Set dst (RShiftL src1 src2));
9672 
9673   ins_cost(INSN_COST);
9674   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
9675 
9676   ins_encode %{
9677     __ asr(as_Register($dst$$reg),
9678            as_Register($src1$$reg),
9679            $src2$$constant & 0x3f);
9680   %}
9681 
9682   ins_pipe(ialu_reg_shift);
9683 %}
9684 
9685 // BEGIN This section of the file is automatically generated. Do not edit --------------
9686 
9687 instruct regL_not_reg(iRegLNoSp dst,
9688                          iRegL src1, immL_M1 m1,
9689                          rFlagsReg cr) %{
9690   match(Set dst (XorL src1 m1));
9691   ins_cost(INSN_COST);
9692   format %{ "eon  $dst, $src1, zr" %}
9693 
9694   ins_encode %{
9695     __ eon(as_Register($dst$$reg),
9696               as_Register($src1$$reg),
9697               zr,
9698               Assembler::LSL, 0);
9699   %}
9700 
9701   ins_pipe(ialu_reg);
9702 %}
9703 instruct regI_not_reg(iRegINoSp dst,
9704                          iRegIorL2I src1, immI_M1 m1,
9705                          rFlagsReg cr) %{
9706   match(Set dst (XorI src1 m1));
9707   ins_cost(INSN_COST);
9708   format %{ "eonw  $dst, $src1, zr" %}
9709 
9710   ins_encode %{
9711     __ eonw(as_Register($dst$$reg),
9712               as_Register($src1$$reg),
9713               zr,
9714               Assembler::LSL, 0);
9715   %}
9716 
9717   ins_pipe(ialu_reg);
9718 %}
9719 
9720 instruct AndI_reg_not_reg(iRegINoSp dst,
9721                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9722                          rFlagsReg cr) %{
9723   match(Set dst (AndI src1 (XorI src2 m1)));
9724   ins_cost(INSN_COST);
9725   format %{ "bicw  $dst, $src1, $src2" %}
9726 
9727   ins_encode %{
9728     __ bicw(as_Register($dst$$reg),
9729               as_Register($src1$$reg),
9730               as_Register($src2$$reg),
9731               Assembler::LSL, 0);
9732   %}
9733 
9734   ins_pipe(ialu_reg_reg);
9735 %}
9736 
9737 instruct AndL_reg_not_reg(iRegLNoSp dst,
9738                          iRegL src1, iRegL src2, immL_M1 m1,
9739                          rFlagsReg cr) %{
9740   match(Set dst (AndL src1 (XorL src2 m1)));
9741   ins_cost(INSN_COST);
9742   format %{ "bic  $dst, $src1, $src2" %}
9743 
9744   ins_encode %{
9745     __ bic(as_Register($dst$$reg),
9746               as_Register($src1$$reg),
9747               as_Register($src2$$reg),
9748               Assembler::LSL, 0);
9749   %}
9750 
9751   ins_pipe(ialu_reg_reg);
9752 %}
9753 
9754 instruct OrI_reg_not_reg(iRegINoSp dst,
9755                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9756                          rFlagsReg cr) %{
9757   match(Set dst (OrI src1 (XorI src2 m1)));
9758   ins_cost(INSN_COST);
9759   format %{ "ornw  $dst, $src1, $src2" %}
9760 
9761   ins_encode %{
9762     __ ornw(as_Register($dst$$reg),
9763               as_Register($src1$$reg),
9764               as_Register($src2$$reg),
9765               Assembler::LSL, 0);
9766   %}
9767 
9768   ins_pipe(ialu_reg_reg);
9769 %}
9770 
9771 instruct OrL_reg_not_reg(iRegLNoSp dst,
9772                          iRegL src1, iRegL src2, immL_M1 m1,
9773                          rFlagsReg cr) %{
9774   match(Set dst (OrL src1 (XorL src2 m1)));
9775   ins_cost(INSN_COST);
9776   format %{ "orn  $dst, $src1, $src2" %}
9777 
9778   ins_encode %{
9779     __ orn(as_Register($dst$$reg),
9780               as_Register($src1$$reg),
9781               as_Register($src2$$reg),
9782               Assembler::LSL, 0);
9783   %}
9784 
9785   ins_pipe(ialu_reg_reg);
9786 %}
9787 
9788 instruct XorI_reg_not_reg(iRegINoSp dst,
9789                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9790                          rFlagsReg cr) %{
9791   match(Set dst (XorI m1 (XorI src2 src1)));
9792   ins_cost(INSN_COST);
9793   format %{ "eonw  $dst, $src1, $src2" %}
9794 
9795   ins_encode %{
9796     __ eonw(as_Register($dst$$reg),
9797               as_Register($src1$$reg),
9798               as_Register($src2$$reg),
9799               Assembler::LSL, 0);
9800   %}
9801 
9802   ins_pipe(ialu_reg_reg);
9803 %}
9804 
9805 instruct XorL_reg_not_reg(iRegLNoSp dst,
9806                          iRegL src1, iRegL src2, immL_M1 m1,
9807                          rFlagsReg cr) %{
9808   match(Set dst (XorL m1 (XorL src2 src1)));
9809   ins_cost(INSN_COST);
9810   format %{ "eon  $dst, $src1, $src2" %}
9811 
9812   ins_encode %{
9813     __ eon(as_Register($dst$$reg),
9814               as_Register($src1$$reg),
9815               as_Register($src2$$reg),
9816               Assembler::LSL, 0);
9817   %}
9818 
9819   ins_pipe(ialu_reg_reg);
9820 %}
9821 
9822 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
9823                          iRegIorL2I src1, iRegIorL2I src2,
9824                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9825   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
9826   ins_cost(1.9 * INSN_COST);
9827   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
9828 
9829   ins_encode %{
9830     __ bicw(as_Register($dst$$reg),
9831               as_Register($src1$$reg),
9832               as_Register($src2$$reg),
9833               Assembler::LSR,
9834               $src3$$constant & 0x1f);
9835   %}
9836 
9837   ins_pipe(ialu_reg_reg_shift);
9838 %}
9839 
9840 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
9841                          iRegL src1, iRegL src2,
9842                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9843   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
9844   ins_cost(1.9 * INSN_COST);
9845   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
9846 
9847   ins_encode %{
9848     __ bic(as_Register($dst$$reg),
9849               as_Register($src1$$reg),
9850               as_Register($src2$$reg),
9851               Assembler::LSR,
9852               $src3$$constant & 0x3f);
9853   %}
9854 
9855   ins_pipe(ialu_reg_reg_shift);
9856 %}
9857 
9858 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
9859                          iRegIorL2I src1, iRegIorL2I src2,
9860                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9861   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
9862   ins_cost(1.9 * INSN_COST);
9863   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
9864 
9865   ins_encode %{
9866     __ bicw(as_Register($dst$$reg),
9867               as_Register($src1$$reg),
9868               as_Register($src2$$reg),
9869               Assembler::ASR,
9870               $src3$$constant & 0x1f);
9871   %}
9872 
9873   ins_pipe(ialu_reg_reg_shift);
9874 %}
9875 
9876 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
9877                          iRegL src1, iRegL src2,
9878                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9879   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
9880   ins_cost(1.9 * INSN_COST);
9881   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
9882 
9883   ins_encode %{
9884     __ bic(as_Register($dst$$reg),
9885               as_Register($src1$$reg),
9886               as_Register($src2$$reg),
9887               Assembler::ASR,
9888               $src3$$constant & 0x3f);
9889   %}
9890 
9891   ins_pipe(ialu_reg_reg_shift);
9892 %}
9893 
9894 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
9895                          iRegIorL2I src1, iRegIorL2I src2,
9896                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9897   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
9898   ins_cost(1.9 * INSN_COST);
9899   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
9900 
9901   ins_encode %{
9902     __ bicw(as_Register($dst$$reg),
9903               as_Register($src1$$reg),
9904               as_Register($src2$$reg),
9905               Assembler::LSL,
9906               $src3$$constant & 0x1f);
9907   %}
9908 
9909   ins_pipe(ialu_reg_reg_shift);
9910 %}
9911 
9912 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
9913                          iRegL src1, iRegL src2,
9914                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9915   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
9916   ins_cost(1.9 * INSN_COST);
9917   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
9918 
9919   ins_encode %{
9920     __ bic(as_Register($dst$$reg),
9921               as_Register($src1$$reg),
9922               as_Register($src2$$reg),
9923               Assembler::LSL,
9924               $src3$$constant & 0x3f);
9925   %}
9926 
9927   ins_pipe(ialu_reg_reg_shift);
9928 %}
9929 
9930 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
9931                          iRegIorL2I src1, iRegIorL2I src2,
9932                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9933   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
9934   ins_cost(1.9 * INSN_COST);
9935   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
9936 
9937   ins_encode %{
9938     __ eonw(as_Register($dst$$reg),
9939               as_Register($src1$$reg),
9940               as_Register($src2$$reg),
9941               Assembler::LSR,
9942               $src3$$constant & 0x1f);
9943   %}
9944 
9945   ins_pipe(ialu_reg_reg_shift);
9946 %}
9947 
9948 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
9949                          iRegL src1, iRegL src2,
9950                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9951   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
9952   ins_cost(1.9 * INSN_COST);
9953   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
9954 
9955   ins_encode %{
9956     __ eon(as_Register($dst$$reg),
9957               as_Register($src1$$reg),
9958               as_Register($src2$$reg),
9959               Assembler::LSR,
9960               $src3$$constant & 0x3f);
9961   %}
9962 
9963   ins_pipe(ialu_reg_reg_shift);
9964 %}
9965 
9966 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
9967                          iRegIorL2I src1, iRegIorL2I src2,
9968                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9969   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
9970   ins_cost(1.9 * INSN_COST);
9971   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
9972 
9973   ins_encode %{
9974     __ eonw(as_Register($dst$$reg),
9975               as_Register($src1$$reg),
9976               as_Register($src2$$reg),
9977               Assembler::ASR,
9978               $src3$$constant & 0x1f);
9979   %}
9980 
9981   ins_pipe(ialu_reg_reg_shift);
9982 %}
9983 
9984 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
9985                          iRegL src1, iRegL src2,
9986                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9987   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
9988   ins_cost(1.9 * INSN_COST);
9989   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
9990 
9991   ins_encode %{
9992     __ eon(as_Register($dst$$reg),
9993               as_Register($src1$$reg),
9994               as_Register($src2$$reg),
9995               Assembler::ASR,
9996               $src3$$constant & 0x3f);
9997   %}
9998 
9999   ins_pipe(ialu_reg_reg_shift);
10000 %}
10001 
10002 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
10003                          iRegIorL2I src1, iRegIorL2I src2,
10004                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10005   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
10006   ins_cost(1.9 * INSN_COST);
10007   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10008 
10009   ins_encode %{
10010     __ eonw(as_Register($dst$$reg),
10011               as_Register($src1$$reg),
10012               as_Register($src2$$reg),
10013               Assembler::LSL,
10014               $src3$$constant & 0x1f);
10015   %}
10016 
10017   ins_pipe(ialu_reg_reg_shift);
10018 %}
10019 
10020 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10021                          iRegL src1, iRegL src2,
10022                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10023   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10024   ins_cost(1.9 * INSN_COST);
10025   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10026 
10027   ins_encode %{
10028     __ eon(as_Register($dst$$reg),
10029               as_Register($src1$$reg),
10030               as_Register($src2$$reg),
10031               Assembler::LSL,
10032               $src3$$constant & 0x3f);
10033   %}
10034 
10035   ins_pipe(ialu_reg_reg_shift);
10036 %}
10037 
10038 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10039                          iRegIorL2I src1, iRegIorL2I src2,
10040                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10041   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10042   ins_cost(1.9 * INSN_COST);
10043   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10044 
10045   ins_encode %{
10046     __ ornw(as_Register($dst$$reg),
10047               as_Register($src1$$reg),
10048               as_Register($src2$$reg),
10049               Assembler::LSR,
10050               $src3$$constant & 0x1f);
10051   %}
10052 
10053   ins_pipe(ialu_reg_reg_shift);
10054 %}
10055 
10056 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10057                          iRegL src1, iRegL src2,
10058                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10059   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10060   ins_cost(1.9 * INSN_COST);
10061   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10062 
10063   ins_encode %{
10064     __ orn(as_Register($dst$$reg),
10065               as_Register($src1$$reg),
10066               as_Register($src2$$reg),
10067               Assembler::LSR,
10068               $src3$$constant & 0x3f);
10069   %}
10070 
10071   ins_pipe(ialu_reg_reg_shift);
10072 %}
10073 
10074 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10075                          iRegIorL2I src1, iRegIorL2I src2,
10076                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10077   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10078   ins_cost(1.9 * INSN_COST);
10079   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10080 
10081   ins_encode %{
10082     __ ornw(as_Register($dst$$reg),
10083               as_Register($src1$$reg),
10084               as_Register($src2$$reg),
10085               Assembler::ASR,
10086               $src3$$constant & 0x1f);
10087   %}
10088 
10089   ins_pipe(ialu_reg_reg_shift);
10090 %}
10091 
10092 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10093                          iRegL src1, iRegL src2,
10094                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10095   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10096   ins_cost(1.9 * INSN_COST);
10097   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10098 
10099   ins_encode %{
10100     __ orn(as_Register($dst$$reg),
10101               as_Register($src1$$reg),
10102               as_Register($src2$$reg),
10103               Assembler::ASR,
10104               $src3$$constant & 0x3f);
10105   %}
10106 
10107   ins_pipe(ialu_reg_reg_shift);
10108 %}
10109 
10110 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10111                          iRegIorL2I src1, iRegIorL2I src2,
10112                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10113   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10114   ins_cost(1.9 * INSN_COST);
10115   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10116 
10117   ins_encode %{
10118     __ ornw(as_Register($dst$$reg),
10119               as_Register($src1$$reg),
10120               as_Register($src2$$reg),
10121               Assembler::LSL,
10122               $src3$$constant & 0x1f);
10123   %}
10124 
10125   ins_pipe(ialu_reg_reg_shift);
10126 %}
10127 
10128 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10129                          iRegL src1, iRegL src2,
10130                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10131   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10132   ins_cost(1.9 * INSN_COST);
10133   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10134 
10135   ins_encode %{
10136     __ orn(as_Register($dst$$reg),
10137               as_Register($src1$$reg),
10138               as_Register($src2$$reg),
10139               Assembler::LSL,
10140               $src3$$constant & 0x3f);
10141   %}
10142 
10143   ins_pipe(ialu_reg_reg_shift);
10144 %}
10145 
10146 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10147                          iRegIorL2I src1, iRegIorL2I src2,
10148                          immI src3, rFlagsReg cr) %{
10149   match(Set dst (AndI src1 (URShiftI src2 src3)));
10150 
10151   ins_cost(1.9 * INSN_COST);
10152   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10153 
10154   ins_encode %{
10155     __ andw(as_Register($dst$$reg),
10156               as_Register($src1$$reg),
10157               as_Register($src2$$reg),
10158               Assembler::LSR,
10159               $src3$$constant & 0x1f);
10160   %}
10161 
10162   ins_pipe(ialu_reg_reg_shift);
10163 %}
10164 
10165 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10166                          iRegL src1, iRegL src2,
10167                          immI src3, rFlagsReg cr) %{
10168   match(Set dst (AndL src1 (URShiftL src2 src3)));
10169 
10170   ins_cost(1.9 * INSN_COST);
10171   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10172 
10173   ins_encode %{
10174     __ andr(as_Register($dst$$reg),
10175               as_Register($src1$$reg),
10176               as_Register($src2$$reg),
10177               Assembler::LSR,
10178               $src3$$constant & 0x3f);
10179   %}
10180 
10181   ins_pipe(ialu_reg_reg_shift);
10182 %}
10183 
10184 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10185                          iRegIorL2I src1, iRegIorL2I src2,
10186                          immI src3, rFlagsReg cr) %{
10187   match(Set dst (AndI src1 (RShiftI src2 src3)));
10188 
10189   ins_cost(1.9 * INSN_COST);
10190   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10191 
10192   ins_encode %{
10193     __ andw(as_Register($dst$$reg),
10194               as_Register($src1$$reg),
10195               as_Register($src2$$reg),
10196               Assembler::ASR,
10197               $src3$$constant & 0x1f);
10198   %}
10199 
10200   ins_pipe(ialu_reg_reg_shift);
10201 %}
10202 
10203 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10204                          iRegL src1, iRegL src2,
10205                          immI src3, rFlagsReg cr) %{
10206   match(Set dst (AndL src1 (RShiftL src2 src3)));
10207 
10208   ins_cost(1.9 * INSN_COST);
10209   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10210 
10211   ins_encode %{
10212     __ andr(as_Register($dst$$reg),
10213               as_Register($src1$$reg),
10214               as_Register($src2$$reg),
10215               Assembler::ASR,
10216               $src3$$constant & 0x3f);
10217   %}
10218 
10219   ins_pipe(ialu_reg_reg_shift);
10220 %}
10221 
10222 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10223                          iRegIorL2I src1, iRegIorL2I src2,
10224                          immI src3, rFlagsReg cr) %{
10225   match(Set dst (AndI src1 (LShiftI src2 src3)));
10226 
10227   ins_cost(1.9 * INSN_COST);
10228   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10229 
10230   ins_encode %{
10231     __ andw(as_Register($dst$$reg),
10232               as_Register($src1$$reg),
10233               as_Register($src2$$reg),
10234               Assembler::LSL,
10235               $src3$$constant & 0x1f);
10236   %}
10237 
10238   ins_pipe(ialu_reg_reg_shift);
10239 %}
10240 
10241 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10242                          iRegL src1, iRegL src2,
10243                          immI src3, rFlagsReg cr) %{
10244   match(Set dst (AndL src1 (LShiftL src2 src3)));
10245 
10246   ins_cost(1.9 * INSN_COST);
10247   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10248 
10249   ins_encode %{
10250     __ andr(as_Register($dst$$reg),
10251               as_Register($src1$$reg),
10252               as_Register($src2$$reg),
10253               Assembler::LSL,
10254               $src3$$constant & 0x3f);
10255   %}
10256 
10257   ins_pipe(ialu_reg_reg_shift);
10258 %}
10259 
10260 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10261                          iRegIorL2I src1, iRegIorL2I src2,
10262                          immI src3, rFlagsReg cr) %{
10263   match(Set dst (XorI src1 (URShiftI src2 src3)));
10264 
10265   ins_cost(1.9 * INSN_COST);
10266   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10267 
10268   ins_encode %{
10269     __ eorw(as_Register($dst$$reg),
10270               as_Register($src1$$reg),
10271               as_Register($src2$$reg),
10272               Assembler::LSR,
10273               $src3$$constant & 0x1f);
10274   %}
10275 
10276   ins_pipe(ialu_reg_reg_shift);
10277 %}
10278 
10279 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10280                          iRegL src1, iRegL src2,
10281                          immI src3, rFlagsReg cr) %{
10282   match(Set dst (XorL src1 (URShiftL src2 src3)));
10283 
10284   ins_cost(1.9 * INSN_COST);
10285   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10286 
10287   ins_encode %{
10288     __ eor(as_Register($dst$$reg),
10289               as_Register($src1$$reg),
10290               as_Register($src2$$reg),
10291               Assembler::LSR,
10292               $src3$$constant & 0x3f);
10293   %}
10294 
10295   ins_pipe(ialu_reg_reg_shift);
10296 %}
10297 
10298 instruct XorI_reg_RShift_reg(iRegINoSp dst,
10299                          iRegIorL2I src1, iRegIorL2I src2,
10300                          immI src3, rFlagsReg cr) %{
10301   match(Set dst (XorI src1 (RShiftI src2 src3)));
10302 
10303   ins_cost(1.9 * INSN_COST);
10304   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
10305 
10306   ins_encode %{
10307     __ eorw(as_Register($dst$$reg),
10308               as_Register($src1$$reg),
10309               as_Register($src2$$reg),
10310               Assembler::ASR,
10311               $src3$$constant & 0x1f);
10312   %}
10313 
10314   ins_pipe(ialu_reg_reg_shift);
10315 %}
10316 
10317 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
10318                          iRegL src1, iRegL src2,
10319                          immI src3, rFlagsReg cr) %{
10320   match(Set dst (XorL src1 (RShiftL src2 src3)));
10321 
10322   ins_cost(1.9 * INSN_COST);
10323   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
10324 
10325   ins_encode %{
10326     __ eor(as_Register($dst$$reg),
10327               as_Register($src1$$reg),
10328               as_Register($src2$$reg),
10329               Assembler::ASR,
10330               $src3$$constant & 0x3f);
10331   %}
10332 
10333   ins_pipe(ialu_reg_reg_shift);
10334 %}
10335 
10336 instruct XorI_reg_LShift_reg(iRegINoSp dst,
10337                          iRegIorL2I src1, iRegIorL2I src2,
10338                          immI src3, rFlagsReg cr) %{
10339   match(Set dst (XorI src1 (LShiftI src2 src3)));
10340 
10341   ins_cost(1.9 * INSN_COST);
10342   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
10343 
10344   ins_encode %{
10345     __ eorw(as_Register($dst$$reg),
10346               as_Register($src1$$reg),
10347               as_Register($src2$$reg),
10348               Assembler::LSL,
10349               $src3$$constant & 0x1f);
10350   %}
10351 
10352   ins_pipe(ialu_reg_reg_shift);
10353 %}
10354 
10355 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
10356                          iRegL src1, iRegL src2,
10357                          immI src3, rFlagsReg cr) %{
10358   match(Set dst (XorL src1 (LShiftL src2 src3)));
10359 
10360   ins_cost(1.9 * INSN_COST);
10361   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
10362 
10363   ins_encode %{
10364     __ eor(as_Register($dst$$reg),
10365               as_Register($src1$$reg),
10366               as_Register($src2$$reg),
10367               Assembler::LSL,
10368               $src3$$constant & 0x3f);
10369   %}
10370 
10371   ins_pipe(ialu_reg_reg_shift);
10372 %}
10373 
10374 instruct OrI_reg_URShift_reg(iRegINoSp dst,
10375                          iRegIorL2I src1, iRegIorL2I src2,
10376                          immI src3, rFlagsReg cr) %{
10377   match(Set dst (OrI src1 (URShiftI src2 src3)));
10378 
10379   ins_cost(1.9 * INSN_COST);
10380   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
10381 
10382   ins_encode %{
10383     __ orrw(as_Register($dst$$reg),
10384               as_Register($src1$$reg),
10385               as_Register($src2$$reg),
10386               Assembler::LSR,
10387               $src3$$constant & 0x1f);
10388   %}
10389 
10390   ins_pipe(ialu_reg_reg_shift);
10391 %}
10392 
10393 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
10394                          iRegL src1, iRegL src2,
10395                          immI src3, rFlagsReg cr) %{
10396   match(Set dst (OrL src1 (URShiftL src2 src3)));
10397 
10398   ins_cost(1.9 * INSN_COST);
10399   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
10400 
10401   ins_encode %{
10402     __ orr(as_Register($dst$$reg),
10403               as_Register($src1$$reg),
10404               as_Register($src2$$reg),
10405               Assembler::LSR,
10406               $src3$$constant & 0x3f);
10407   %}
10408 
10409   ins_pipe(ialu_reg_reg_shift);
10410 %}
10411 
10412 instruct OrI_reg_RShift_reg(iRegINoSp dst,
10413                          iRegIorL2I src1, iRegIorL2I src2,
10414                          immI src3, rFlagsReg cr) %{
10415   match(Set dst (OrI src1 (RShiftI src2 src3)));
10416 
10417   ins_cost(1.9 * INSN_COST);
10418   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
10419 
10420   ins_encode %{
10421     __ orrw(as_Register($dst$$reg),
10422               as_Register($src1$$reg),
10423               as_Register($src2$$reg),
10424               Assembler::ASR,
10425               $src3$$constant & 0x1f);
10426   %}
10427 
10428   ins_pipe(ialu_reg_reg_shift);
10429 %}
10430 
10431 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
10432                          iRegL src1, iRegL src2,
10433                          immI src3, rFlagsReg cr) %{
10434   match(Set dst (OrL src1 (RShiftL src2 src3)));
10435 
10436   ins_cost(1.9 * INSN_COST);
10437   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
10438 
10439   ins_encode %{
10440     __ orr(as_Register($dst$$reg),
10441               as_Register($src1$$reg),
10442               as_Register($src2$$reg),
10443               Assembler::ASR,
10444               $src3$$constant & 0x3f);
10445   %}
10446 
10447   ins_pipe(ialu_reg_reg_shift);
10448 %}
10449 
10450 instruct OrI_reg_LShift_reg(iRegINoSp dst,
10451                          iRegIorL2I src1, iRegIorL2I src2,
10452                          immI src3, rFlagsReg cr) %{
10453   match(Set dst (OrI src1 (LShiftI src2 src3)));
10454 
10455   ins_cost(1.9 * INSN_COST);
10456   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
10457 
10458   ins_encode %{
10459     __ orrw(as_Register($dst$$reg),
10460               as_Register($src1$$reg),
10461               as_Register($src2$$reg),
10462               Assembler::LSL,
10463               $src3$$constant & 0x1f);
10464   %}
10465 
10466   ins_pipe(ialu_reg_reg_shift);
10467 %}
10468 
10469 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
10470                          iRegL src1, iRegL src2,
10471                          immI src3, rFlagsReg cr) %{
10472   match(Set dst (OrL src1 (LShiftL src2 src3)));
10473 
10474   ins_cost(1.9 * INSN_COST);
10475   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
10476 
10477   ins_encode %{
10478     __ orr(as_Register($dst$$reg),
10479               as_Register($src1$$reg),
10480               as_Register($src2$$reg),
10481               Assembler::LSL,
10482               $src3$$constant & 0x3f);
10483   %}
10484 
10485   ins_pipe(ialu_reg_reg_shift);
10486 %}
10487 
10488 instruct AddI_reg_URShift_reg(iRegINoSp dst,
10489                          iRegIorL2I src1, iRegIorL2I src2,
10490                          immI src3, rFlagsReg cr) %{
10491   match(Set dst (AddI src1 (URShiftI src2 src3)));
10492 
10493   ins_cost(1.9 * INSN_COST);
10494   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
10495 
10496   ins_encode %{
10497     __ addw(as_Register($dst$$reg),
10498               as_Register($src1$$reg),
10499               as_Register($src2$$reg),
10500               Assembler::LSR,
10501               $src3$$constant & 0x1f);
10502   %}
10503 
10504   ins_pipe(ialu_reg_reg_shift);
10505 %}
10506 
10507 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
10508                          iRegL src1, iRegL src2,
10509                          immI src3, rFlagsReg cr) %{
10510   match(Set dst (AddL src1 (URShiftL src2 src3)));
10511 
10512   ins_cost(1.9 * INSN_COST);
10513   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
10514 
10515   ins_encode %{
10516     __ add(as_Register($dst$$reg),
10517               as_Register($src1$$reg),
10518               as_Register($src2$$reg),
10519               Assembler::LSR,
10520               $src3$$constant & 0x3f);
10521   %}
10522 
10523   ins_pipe(ialu_reg_reg_shift);
10524 %}
10525 
10526 instruct AddI_reg_RShift_reg(iRegINoSp dst,
10527                          iRegIorL2I src1, iRegIorL2I src2,
10528                          immI src3, rFlagsReg cr) %{
10529   match(Set dst (AddI src1 (RShiftI src2 src3)));
10530 
10531   ins_cost(1.9 * INSN_COST);
10532   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
10533 
10534   ins_encode %{
10535     __ addw(as_Register($dst$$reg),
10536               as_Register($src1$$reg),
10537               as_Register($src2$$reg),
10538               Assembler::ASR,
10539               $src3$$constant & 0x1f);
10540   %}
10541 
10542   ins_pipe(ialu_reg_reg_shift);
10543 %}
10544 
10545 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
10546                          iRegL src1, iRegL src2,
10547                          immI src3, rFlagsReg cr) %{
10548   match(Set dst (AddL src1 (RShiftL src2 src3)));
10549 
10550   ins_cost(1.9 * INSN_COST);
10551   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
10552 
10553   ins_encode %{
10554     __ add(as_Register($dst$$reg),
10555               as_Register($src1$$reg),
10556               as_Register($src2$$reg),
10557               Assembler::ASR,
10558               $src3$$constant & 0x3f);
10559   %}
10560 
10561   ins_pipe(ialu_reg_reg_shift);
10562 %}
10563 
10564 instruct AddI_reg_LShift_reg(iRegINoSp dst,
10565                          iRegIorL2I src1, iRegIorL2I src2,
10566                          immI src3, rFlagsReg cr) %{
10567   match(Set dst (AddI src1 (LShiftI src2 src3)));
10568 
10569   ins_cost(1.9 * INSN_COST);
10570   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
10571 
10572   ins_encode %{
10573     __ addw(as_Register($dst$$reg),
10574               as_Register($src1$$reg),
10575               as_Register($src2$$reg),
10576               Assembler::LSL,
10577               $src3$$constant & 0x1f);
10578   %}
10579 
10580   ins_pipe(ialu_reg_reg_shift);
10581 %}
10582 
10583 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
10584                          iRegL src1, iRegL src2,
10585                          immI src3, rFlagsReg cr) %{
10586   match(Set dst (AddL src1 (LShiftL src2 src3)));
10587 
10588   ins_cost(1.9 * INSN_COST);
10589   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
10590 
10591   ins_encode %{
10592     __ add(as_Register($dst$$reg),
10593               as_Register($src1$$reg),
10594               as_Register($src2$$reg),
10595               Assembler::LSL,
10596               $src3$$constant & 0x3f);
10597   %}
10598 
10599   ins_pipe(ialu_reg_reg_shift);
10600 %}
10601 
10602 instruct SubI_reg_URShift_reg(iRegINoSp dst,
10603                          iRegIorL2I src1, iRegIorL2I src2,
10604                          immI src3, rFlagsReg cr) %{
10605   match(Set dst (SubI src1 (URShiftI src2 src3)));
10606 
10607   ins_cost(1.9 * INSN_COST);
10608   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
10609 
10610   ins_encode %{
10611     __ subw(as_Register($dst$$reg),
10612               as_Register($src1$$reg),
10613               as_Register($src2$$reg),
10614               Assembler::LSR,
10615               $src3$$constant & 0x1f);
10616   %}
10617 
10618   ins_pipe(ialu_reg_reg_shift);
10619 %}
10620 
10621 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
10622                          iRegL src1, iRegL src2,
10623                          immI src3, rFlagsReg cr) %{
10624   match(Set dst (SubL src1 (URShiftL src2 src3)));
10625 
10626   ins_cost(1.9 * INSN_COST);
10627   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
10628 
10629   ins_encode %{
10630     __ sub(as_Register($dst$$reg),
10631               as_Register($src1$$reg),
10632               as_Register($src2$$reg),
10633               Assembler::LSR,
10634               $src3$$constant & 0x3f);
10635   %}
10636 
10637   ins_pipe(ialu_reg_reg_shift);
10638 %}
10639 
10640 instruct SubI_reg_RShift_reg(iRegINoSp dst,
10641                          iRegIorL2I src1, iRegIorL2I src2,
10642                          immI src3, rFlagsReg cr) %{
10643   match(Set dst (SubI src1 (RShiftI src2 src3)));
10644 
10645   ins_cost(1.9 * INSN_COST);
10646   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
10647 
10648   ins_encode %{
10649     __ subw(as_Register($dst$$reg),
10650               as_Register($src1$$reg),
10651               as_Register($src2$$reg),
10652               Assembler::ASR,
10653               $src3$$constant & 0x1f);
10654   %}
10655 
10656   ins_pipe(ialu_reg_reg_shift);
10657 %}
10658 
10659 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
10660                          iRegL src1, iRegL src2,
10661                          immI src3, rFlagsReg cr) %{
10662   match(Set dst (SubL src1 (RShiftL src2 src3)));
10663 
10664   ins_cost(1.9 * INSN_COST);
10665   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
10666 
10667   ins_encode %{
10668     __ sub(as_Register($dst$$reg),
10669               as_Register($src1$$reg),
10670               as_Register($src2$$reg),
10671               Assembler::ASR,
10672               $src3$$constant & 0x3f);
10673   %}
10674 
10675   ins_pipe(ialu_reg_reg_shift);
10676 %}
10677 
10678 instruct SubI_reg_LShift_reg(iRegINoSp dst,
10679                          iRegIorL2I src1, iRegIorL2I src2,
10680                          immI src3, rFlagsReg cr) %{
10681   match(Set dst (SubI src1 (LShiftI src2 src3)));
10682 
10683   ins_cost(1.9 * INSN_COST);
10684   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
10685 
10686   ins_encode %{
10687     __ subw(as_Register($dst$$reg),
10688               as_Register($src1$$reg),
10689               as_Register($src2$$reg),
10690               Assembler::LSL,
10691               $src3$$constant & 0x1f);
10692   %}
10693 
10694   ins_pipe(ialu_reg_reg_shift);
10695 %}
10696 
10697 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
10698                          iRegL src1, iRegL src2,
10699                          immI src3, rFlagsReg cr) %{
10700   match(Set dst (SubL src1 (LShiftL src2 src3)));
10701 
10702   ins_cost(1.9 * INSN_COST);
10703   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
10704 
10705   ins_encode %{
10706     __ sub(as_Register($dst$$reg),
10707               as_Register($src1$$reg),
10708               as_Register($src2$$reg),
10709               Assembler::LSL,
10710               $src3$$constant & 0x3f);
10711   %}
10712 
10713   ins_pipe(ialu_reg_reg_shift);
10714 %}
10715 
10716 
10717 
10718 // Shift Left followed by Shift Right.
10719 // This idiom is used by the compiler for the i2b bytecode etc.
10720 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10721 %{
10722   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
10723   // Make sure we are not going to exceed what sbfm can do.
10724   predicate((unsigned int)n->in(2)->get_int() <= 63
10725             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10726 
10727   ins_cost(INSN_COST * 2);
10728   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10729   ins_encode %{
10730     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10731     int s = 63 - lshift;
10732     int r = (rshift - lshift) & 63;
10733     __ sbfm(as_Register($dst$$reg),
10734             as_Register($src$$reg),
10735             r, s);
10736   %}
10737 
10738   ins_pipe(ialu_reg_shift);
10739 %}
10740 
10741 // Shift Left followed by Shift Right.
10742 // This idiom is used by the compiler for the i2b bytecode etc.
10743 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10744 %{
10745   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
10746   // Make sure we are not going to exceed what sbfmw can do.
10747   predicate((unsigned int)n->in(2)->get_int() <= 31
10748             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10749 
10750   ins_cost(INSN_COST * 2);
10751   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10752   ins_encode %{
10753     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10754     int s = 31 - lshift;
10755     int r = (rshift - lshift) & 31;
10756     __ sbfmw(as_Register($dst$$reg),
10757             as_Register($src$$reg),
10758             r, s);
10759   %}
10760 
10761   ins_pipe(ialu_reg_shift);
10762 %}
10763 
10764 // Shift Left followed by Shift Right.
10765 // This idiom is used by the compiler for the i2b bytecode etc.
10766 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10767 %{
10768   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
10769   // Make sure we are not going to exceed what ubfm can do.
10770   predicate((unsigned int)n->in(2)->get_int() <= 63
10771             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10772 
10773   ins_cost(INSN_COST * 2);
10774   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10775   ins_encode %{
10776     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10777     int s = 63 - lshift;
10778     int r = (rshift - lshift) & 63;
10779     __ ubfm(as_Register($dst$$reg),
10780             as_Register($src$$reg),
10781             r, s);
10782   %}
10783 
10784   ins_pipe(ialu_reg_shift);
10785 %}
10786 
10787 // Shift Left followed by Shift Right.
10788 // This idiom is used by the compiler for the i2b bytecode etc.
10789 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10790 %{
10791   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
10792   // Make sure we are not going to exceed what ubfmw can do.
10793   predicate((unsigned int)n->in(2)->get_int() <= 31
10794             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10795 
10796   ins_cost(INSN_COST * 2);
10797   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10798   ins_encode %{
10799     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10800     int s = 31 - lshift;
10801     int r = (rshift - lshift) & 31;
10802     __ ubfmw(as_Register($dst$$reg),
10803             as_Register($src$$reg),
10804             r, s);
10805   %}
10806 
10807   ins_pipe(ialu_reg_shift);
10808 %}
10809 // Bitfield extract with shift & mask
10810 
10811 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10812 %{
10813   match(Set dst (AndI (URShiftI src rshift) mask));
10814   // Make sure we are not going to exceed what ubfxw can do.
10815   predicate((exact_log2(n->in(2)->get_int() + 1) + (n->in(1)->in(2)->get_int() & 31)) <= (31 + 1));
10816 
10817   ins_cost(INSN_COST);
10818   format %{ "ubfxw $dst, $src, $mask" %}
10819   ins_encode %{
10820     int rshift = $rshift$$constant & 31;
10821     long mask = $mask$$constant;
10822     int width = exact_log2(mask+1);
10823     __ ubfxw(as_Register($dst$$reg),
10824             as_Register($src$$reg), rshift, width);
10825   %}
10826   ins_pipe(ialu_reg_shift);
10827 %}
10828 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
10829 %{
10830   match(Set dst (AndL (URShiftL src rshift) mask));
10831   // Make sure we are not going to exceed what ubfx can do.
10832   predicate((exact_log2_long(n->in(2)->get_long() + 1) + (n->in(1)->in(2)->get_int() & 63)) <= (63 + 1));
10833 
10834   ins_cost(INSN_COST);
10835   format %{ "ubfx $dst, $src, $mask" %}
10836   ins_encode %{
10837     int rshift = $rshift$$constant & 63;
10838     long mask = $mask$$constant;
10839     int width = exact_log2_long(mask+1);
10840     __ ubfx(as_Register($dst$$reg),
10841             as_Register($src$$reg), rshift, width);
10842   %}
10843   ins_pipe(ialu_reg_shift);
10844 %}
10845 
10846 // We can use ubfx when extending an And with a mask when we know mask
10847 // is positive.  We know that because immI_bitmask guarantees it.
10848 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10849 %{
10850   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
10851   // Make sure we are not going to exceed what ubfxw can do.
10852   predicate((exact_log2(n->in(1)->in(2)->get_int() + 1) + (n->in(1)->in(1)->in(2)->get_int() & 31)) <= (31 + 1));
10853 
10854   ins_cost(INSN_COST * 2);
10855   format %{ "ubfx $dst, $src, $mask" %}
10856   ins_encode %{
10857     int rshift = $rshift$$constant & 31;
10858     long mask = $mask$$constant;
10859     int width = exact_log2(mask+1);
10860     __ ubfx(as_Register($dst$$reg),
10861             as_Register($src$$reg), rshift, width);
10862   %}
10863   ins_pipe(ialu_reg_shift);
10864 %}
10865 
10866 // Rotations
10867 
10868 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
10869 %{
10870   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
10871   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
10872 
10873   ins_cost(INSN_COST);
10874   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10875 
10876   ins_encode %{
10877     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10878             $rshift$$constant & 63);
10879   %}
10880   ins_pipe(ialu_reg_reg_extr);
10881 %}
10882 
10883 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
10884 %{
10885   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
10886   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
10887 
10888   ins_cost(INSN_COST);
10889   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10890 
10891   ins_encode %{
10892     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10893             $rshift$$constant & 31);
10894   %}
10895   ins_pipe(ialu_reg_reg_extr);
10896 %}
10897 
10898 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
10899 %{
10900   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
10901   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
10902 
10903   ins_cost(INSN_COST);
10904   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10905 
10906   ins_encode %{
10907     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10908             $rshift$$constant & 63);
10909   %}
10910   ins_pipe(ialu_reg_reg_extr);
10911 %}
10912 
10913 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
10914 %{
10915   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
10916   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
10917 
10918   ins_cost(INSN_COST);
10919   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10920 
10921   ins_encode %{
10922     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10923             $rshift$$constant & 31);
10924   %}
10925   ins_pipe(ialu_reg_reg_extr);
10926 %}
10927 
10928 
10929 // rol expander
10930 
10931 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
10932 %{
10933   effect(DEF dst, USE src, USE shift);
10934 
10935   format %{ "rol    $dst, $src, $shift" %}
10936   ins_cost(INSN_COST * 3);
10937   ins_encode %{
10938     __ subw(rscratch1, zr, as_Register($shift$$reg));
10939     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
10940             rscratch1);
10941     %}
10942   ins_pipe(ialu_reg_reg_vshift);
10943 %}
10944 
10945 // rol expander
10946 
10947 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
10948 %{
10949   effect(DEF dst, USE src, USE shift);
10950 
10951   format %{ "rol    $dst, $src, $shift" %}
10952   ins_cost(INSN_COST * 3);
10953   ins_encode %{
10954     __ subw(rscratch1, zr, as_Register($shift$$reg));
10955     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
10956             rscratch1);
10957     %}
10958   ins_pipe(ialu_reg_reg_vshift);
10959 %}
10960 
10961 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
10962 %{
10963   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
10964 
10965   expand %{
10966     rolL_rReg(dst, src, shift, cr);
10967   %}
10968 %}
10969 
10970 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
10971 %{
10972   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
10973 
10974   expand %{
10975     rolL_rReg(dst, src, shift, cr);
10976   %}
10977 %}
10978 
10979 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
10980 %{
10981   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
10982 
10983   expand %{
10984     rolI_rReg(dst, src, shift, cr);
10985   %}
10986 %}
10987 
10988 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
10989 %{
10990   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
10991 
10992   expand %{
10993     rolI_rReg(dst, src, shift, cr);
10994   %}
10995 %}
10996 
10997 // ror expander
10998 
10999 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11000 %{
11001   effect(DEF dst, USE src, USE shift);
11002 
11003   format %{ "ror    $dst, $src, $shift" %}
11004   ins_cost(INSN_COST);
11005   ins_encode %{
11006     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11007             as_Register($shift$$reg));
11008     %}
11009   ins_pipe(ialu_reg_reg_vshift);
11010 %}
11011 
11012 // ror expander
11013 
11014 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11015 %{
11016   effect(DEF dst, USE src, USE shift);
11017 
11018   format %{ "ror    $dst, $src, $shift" %}
11019   ins_cost(INSN_COST);
11020   ins_encode %{
11021     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11022             as_Register($shift$$reg));
11023     %}
11024   ins_pipe(ialu_reg_reg_vshift);
11025 %}
11026 
11027 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11028 %{
11029   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11030 
11031   expand %{
11032     rorL_rReg(dst, src, shift, cr);
11033   %}
11034 %}
11035 
11036 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11037 %{
11038   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11039 
11040   expand %{
11041     rorL_rReg(dst, src, shift, cr);
11042   %}
11043 %}
11044 
11045 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11046 %{
11047   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11048 
11049   expand %{
11050     rorI_rReg(dst, src, shift, cr);
11051   %}
11052 %}
11053 
11054 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11055 %{
11056   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11057 
11058   expand %{
11059     rorI_rReg(dst, src, shift, cr);
11060   %}
11061 %}
11062 
11063 // Add/subtract (extended)
11064 
11065 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11066 %{
11067   match(Set dst (AddL src1 (ConvI2L src2)));
11068   ins_cost(INSN_COST);
11069   format %{ "add  $dst, $src1, sxtw $src2" %}
11070 
11071    ins_encode %{
11072      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11073             as_Register($src2$$reg), ext::sxtw);
11074    %}
11075   ins_pipe(ialu_reg_reg);
11076 %};
11077 
11078 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11079 %{
11080   match(Set dst (SubL src1 (ConvI2L src2)));
11081   ins_cost(INSN_COST);
11082   format %{ "sub  $dst, $src1, sxtw $src2" %}
11083 
11084    ins_encode %{
11085      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11086             as_Register($src2$$reg), ext::sxtw);
11087    %}
11088   ins_pipe(ialu_reg_reg);
11089 %};
11090 
11091 
11092 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11093 %{
11094   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11095   ins_cost(INSN_COST);
11096   format %{ "add  $dst, $src1, sxth $src2" %}
11097 
11098    ins_encode %{
11099      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11100             as_Register($src2$$reg), ext::sxth);
11101    %}
11102   ins_pipe(ialu_reg_reg);
11103 %}
11104 
11105 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11106 %{
11107   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11108   ins_cost(INSN_COST);
11109   format %{ "add  $dst, $src1, sxtb $src2" %}
11110 
11111    ins_encode %{
11112      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11113             as_Register($src2$$reg), ext::sxtb);
11114    %}
11115   ins_pipe(ialu_reg_reg);
11116 %}
11117 
11118 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11119 %{
11120   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11121   ins_cost(INSN_COST);
11122   format %{ "add  $dst, $src1, uxtb $src2" %}
11123 
11124    ins_encode %{
11125      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11126             as_Register($src2$$reg), ext::uxtb);
11127    %}
11128   ins_pipe(ialu_reg_reg);
11129 %}
11130 
11131 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11132 %{
11133   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11134   ins_cost(INSN_COST);
11135   format %{ "add  $dst, $src1, sxth $src2" %}
11136 
11137    ins_encode %{
11138      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11139             as_Register($src2$$reg), ext::sxth);
11140    %}
11141   ins_pipe(ialu_reg_reg);
11142 %}
11143 
11144 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11145 %{
11146   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11147   ins_cost(INSN_COST);
11148   format %{ "add  $dst, $src1, sxtw $src2" %}
11149 
11150    ins_encode %{
11151      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11152             as_Register($src2$$reg), ext::sxtw);
11153    %}
11154   ins_pipe(ialu_reg_reg);
11155 %}
11156 
11157 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11158 %{
11159   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11160   ins_cost(INSN_COST);
11161   format %{ "add  $dst, $src1, sxtb $src2" %}
11162 
11163    ins_encode %{
11164      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11165             as_Register($src2$$reg), ext::sxtb);
11166    %}
11167   ins_pipe(ialu_reg_reg);
11168 %}
11169 
11170 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11171 %{
11172   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11173   ins_cost(INSN_COST);
11174   format %{ "add  $dst, $src1, uxtb $src2" %}
11175 
11176    ins_encode %{
11177      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11178             as_Register($src2$$reg), ext::uxtb);
11179    %}
11180   ins_pipe(ialu_reg_reg);
11181 %}
11182 
11183 
11184 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11185 %{
11186   match(Set dst (AddI src1 (AndI src2 mask)));
11187   ins_cost(INSN_COST);
11188   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11189 
11190    ins_encode %{
11191      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11192             as_Register($src2$$reg), ext::uxtb);
11193    %}
11194   ins_pipe(ialu_reg_reg);
11195 %}
11196 
11197 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11198 %{
11199   match(Set dst (AddI src1 (AndI src2 mask)));
11200   ins_cost(INSN_COST);
11201   format %{ "addw  $dst, $src1, $src2, uxth" %}
11202 
11203    ins_encode %{
11204      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11205             as_Register($src2$$reg), ext::uxth);
11206    %}
11207   ins_pipe(ialu_reg_reg);
11208 %}
11209 
11210 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11211 %{
11212   match(Set dst (AddL src1 (AndL src2 mask)));
11213   ins_cost(INSN_COST);
11214   format %{ "add  $dst, $src1, $src2, uxtb" %}
11215 
11216    ins_encode %{
11217      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11218             as_Register($src2$$reg), ext::uxtb);
11219    %}
11220   ins_pipe(ialu_reg_reg);
11221 %}
11222 
11223 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11224 %{
11225   match(Set dst (AddL src1 (AndL src2 mask)));
11226   ins_cost(INSN_COST);
11227   format %{ "add  $dst, $src1, $src2, uxth" %}
11228 
11229    ins_encode %{
11230      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11231             as_Register($src2$$reg), ext::uxth);
11232    %}
11233   ins_pipe(ialu_reg_reg);
11234 %}
11235 
11236 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11237 %{
11238   match(Set dst (AddL src1 (AndL src2 mask)));
11239   ins_cost(INSN_COST);
11240   format %{ "add  $dst, $src1, $src2, uxtw" %}
11241 
11242    ins_encode %{
11243      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11244             as_Register($src2$$reg), ext::uxtw);
11245    %}
11246   ins_pipe(ialu_reg_reg);
11247 %}
11248 
11249 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11250 %{
11251   match(Set dst (SubI src1 (AndI src2 mask)));
11252   ins_cost(INSN_COST);
11253   format %{ "subw  $dst, $src1, $src2, uxtb" %}
11254 
11255    ins_encode %{
11256      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11257             as_Register($src2$$reg), ext::uxtb);
11258    %}
11259   ins_pipe(ialu_reg_reg);
11260 %}
11261 
11262 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11263 %{
11264   match(Set dst (SubI src1 (AndI src2 mask)));
11265   ins_cost(INSN_COST);
11266   format %{ "subw  $dst, $src1, $src2, uxth" %}
11267 
11268    ins_encode %{
11269      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11270             as_Register($src2$$reg), ext::uxth);
11271    %}
11272   ins_pipe(ialu_reg_reg);
11273 %}
11274 
11275 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11276 %{
11277   match(Set dst (SubL src1 (AndL src2 mask)));
11278   ins_cost(INSN_COST);
11279   format %{ "sub  $dst, $src1, $src2, uxtb" %}
11280 
11281    ins_encode %{
11282      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11283             as_Register($src2$$reg), ext::uxtb);
11284    %}
11285   ins_pipe(ialu_reg_reg);
11286 %}
11287 
11288 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11289 %{
11290   match(Set dst (SubL src1 (AndL src2 mask)));
11291   ins_cost(INSN_COST);
11292   format %{ "sub  $dst, $src1, $src2, uxth" %}
11293 
11294    ins_encode %{
11295      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11296             as_Register($src2$$reg), ext::uxth);
11297    %}
11298   ins_pipe(ialu_reg_reg);
11299 %}
11300 
11301 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11302 %{
11303   match(Set dst (SubL src1 (AndL src2 mask)));
11304   ins_cost(INSN_COST);
11305   format %{ "sub  $dst, $src1, $src2, uxtw" %}
11306 
11307    ins_encode %{
11308      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11309             as_Register($src2$$reg), ext::uxtw);
11310    %}
11311   ins_pipe(ialu_reg_reg);
11312 %}
11313 
11314 // END This section of the file is automatically generated. Do not edit --------------
11315 
11316 // ============================================================================
11317 // Floating Point Arithmetic Instructions
11318 
11319 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11320   match(Set dst (AddF src1 src2));
11321 
11322   ins_cost(INSN_COST * 5);
11323   format %{ "fadds   $dst, $src1, $src2" %}
11324 
11325   ins_encode %{
11326     __ fadds(as_FloatRegister($dst$$reg),
11327              as_FloatRegister($src1$$reg),
11328              as_FloatRegister($src2$$reg));
11329   %}
11330 
11331   ins_pipe(fp_dop_reg_reg_s);
11332 %}
11333 
11334 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11335   match(Set dst (AddD src1 src2));
11336 
11337   ins_cost(INSN_COST * 5);
11338   format %{ "faddd   $dst, $src1, $src2" %}
11339 
11340   ins_encode %{
11341     __ faddd(as_FloatRegister($dst$$reg),
11342              as_FloatRegister($src1$$reg),
11343              as_FloatRegister($src2$$reg));
11344   %}
11345 
11346   ins_pipe(fp_dop_reg_reg_d);
11347 %}
11348 
11349 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11350   match(Set dst (SubF src1 src2));
11351 
11352   ins_cost(INSN_COST * 5);
11353   format %{ "fsubs   $dst, $src1, $src2" %}
11354 
11355   ins_encode %{
11356     __ fsubs(as_FloatRegister($dst$$reg),
11357              as_FloatRegister($src1$$reg),
11358              as_FloatRegister($src2$$reg));
11359   %}
11360 
11361   ins_pipe(fp_dop_reg_reg_s);
11362 %}
11363 
11364 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11365   match(Set dst (SubD src1 src2));
11366 
11367   ins_cost(INSN_COST * 5);
11368   format %{ "fsubd   $dst, $src1, $src2" %}
11369 
11370   ins_encode %{
11371     __ fsubd(as_FloatRegister($dst$$reg),
11372              as_FloatRegister($src1$$reg),
11373              as_FloatRegister($src2$$reg));
11374   %}
11375 
11376   ins_pipe(fp_dop_reg_reg_d);
11377 %}
11378 
11379 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11380   match(Set dst (MulF src1 src2));
11381 
11382   ins_cost(INSN_COST * 6);
11383   format %{ "fmuls   $dst, $src1, $src2" %}
11384 
11385   ins_encode %{
11386     __ fmuls(as_FloatRegister($dst$$reg),
11387              as_FloatRegister($src1$$reg),
11388              as_FloatRegister($src2$$reg));
11389   %}
11390 
11391   ins_pipe(fp_dop_reg_reg_s);
11392 %}
11393 
11394 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11395   match(Set dst (MulD src1 src2));
11396 
11397   ins_cost(INSN_COST * 6);
11398   format %{ "fmuld   $dst, $src1, $src2" %}
11399 
11400   ins_encode %{
11401     __ fmuld(as_FloatRegister($dst$$reg),
11402              as_FloatRegister($src1$$reg),
11403              as_FloatRegister($src2$$reg));
11404   %}
11405 
11406   ins_pipe(fp_dop_reg_reg_d);
11407 %}
11408 
11409 // We cannot use these fused mul w add/sub ops because they don't
11410 // produce the same result as the equivalent separated ops
11411 // (essentially they don't round the intermediate result). that's a
11412 // shame. leaving them here in case we can idenitfy cases where it is
11413 // legitimate to use them
11414 
11415 
11416 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11417 //   match(Set dst (AddF (MulF src1 src2) src3));
11418 
11419 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
11420 
11421 //   ins_encode %{
11422 //     __ fmadds(as_FloatRegister($dst$$reg),
11423 //              as_FloatRegister($src1$$reg),
11424 //              as_FloatRegister($src2$$reg),
11425 //              as_FloatRegister($src3$$reg));
11426 //   %}
11427 
11428 //   ins_pipe(pipe_class_default);
11429 // %}
11430 
11431 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11432 //   match(Set dst (AddD (MulD src1 src2) src3));
11433 
11434 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
11435 
11436 //   ins_encode %{
11437 //     __ fmaddd(as_FloatRegister($dst$$reg),
11438 //              as_FloatRegister($src1$$reg),
11439 //              as_FloatRegister($src2$$reg),
11440 //              as_FloatRegister($src3$$reg));
11441 //   %}
11442 
11443 //   ins_pipe(pipe_class_default);
11444 // %}
11445 
11446 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11447 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
11448 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
11449 
11450 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
11451 
11452 //   ins_encode %{
11453 //     __ fmsubs(as_FloatRegister($dst$$reg),
11454 //               as_FloatRegister($src1$$reg),
11455 //               as_FloatRegister($src2$$reg),
11456 //              as_FloatRegister($src3$$reg));
11457 //   %}
11458 
11459 //   ins_pipe(pipe_class_default);
11460 // %}
11461 
11462 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11463 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
11464 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
11465 
11466 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
11467 
11468 //   ins_encode %{
11469 //     __ fmsubd(as_FloatRegister($dst$$reg),
11470 //               as_FloatRegister($src1$$reg),
11471 //               as_FloatRegister($src2$$reg),
11472 //               as_FloatRegister($src3$$reg));
11473 //   %}
11474 
11475 //   ins_pipe(pipe_class_default);
11476 // %}
11477 
11478 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11479 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
11480 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
11481 
11482 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
11483 
11484 //   ins_encode %{
11485 //     __ fnmadds(as_FloatRegister($dst$$reg),
11486 //                as_FloatRegister($src1$$reg),
11487 //                as_FloatRegister($src2$$reg),
11488 //                as_FloatRegister($src3$$reg));
11489 //   %}
11490 
11491 //   ins_pipe(pipe_class_default);
11492 // %}
11493 
11494 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11495 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
11496 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
11497 
11498 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
11499 
11500 //   ins_encode %{
11501 //     __ fnmaddd(as_FloatRegister($dst$$reg),
11502 //                as_FloatRegister($src1$$reg),
11503 //                as_FloatRegister($src2$$reg),
11504 //                as_FloatRegister($src3$$reg));
11505 //   %}
11506 
11507 //   ins_pipe(pipe_class_default);
11508 // %}
11509 
11510 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
11511 //   match(Set dst (SubF (MulF src1 src2) src3));
11512 
11513 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
11514 
11515 //   ins_encode %{
11516 //     __ fnmsubs(as_FloatRegister($dst$$reg),
11517 //                as_FloatRegister($src1$$reg),
11518 //                as_FloatRegister($src2$$reg),
11519 //                as_FloatRegister($src3$$reg));
11520 //   %}
11521 
11522 //   ins_pipe(pipe_class_default);
11523 // %}
11524 
11525 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
11526 //   match(Set dst (SubD (MulD src1 src2) src3));
11527 
11528 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
11529 
11530 //   ins_encode %{
11531 //   // n.b. insn name should be fnmsubd
11532 //     __ fnmsub(as_FloatRegister($dst$$reg),
11533 //                as_FloatRegister($src1$$reg),
11534 //                as_FloatRegister($src2$$reg),
11535 //                as_FloatRegister($src3$$reg));
11536 //   %}
11537 
11538 //   ins_pipe(pipe_class_default);
11539 // %}
11540 
11541 
11542 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11543   match(Set dst (DivF src1  src2));
11544 
11545   ins_cost(INSN_COST * 18);
11546   format %{ "fdivs   $dst, $src1, $src2" %}
11547 
11548   ins_encode %{
11549     __ fdivs(as_FloatRegister($dst$$reg),
11550              as_FloatRegister($src1$$reg),
11551              as_FloatRegister($src2$$reg));
11552   %}
11553 
11554   ins_pipe(fp_div_s);
11555 %}
11556 
11557 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11558   match(Set dst (DivD src1  src2));
11559 
11560   ins_cost(INSN_COST * 32);
11561   format %{ "fdivd   $dst, $src1, $src2" %}
11562 
11563   ins_encode %{
11564     __ fdivd(as_FloatRegister($dst$$reg),
11565              as_FloatRegister($src1$$reg),
11566              as_FloatRegister($src2$$reg));
11567   %}
11568 
11569   ins_pipe(fp_div_d);
11570 %}
11571 
11572 instruct negF_reg_reg(vRegF dst, vRegF src) %{
11573   match(Set dst (NegF src));
11574 
11575   ins_cost(INSN_COST * 3);
11576   format %{ "fneg   $dst, $src" %}
11577 
11578   ins_encode %{
11579     __ fnegs(as_FloatRegister($dst$$reg),
11580              as_FloatRegister($src$$reg));
11581   %}
11582 
11583   ins_pipe(fp_uop_s);
11584 %}
11585 
11586 instruct negD_reg_reg(vRegD dst, vRegD src) %{
11587   match(Set dst (NegD src));
11588 
11589   ins_cost(INSN_COST * 3);
11590   format %{ "fnegd   $dst, $src" %}
11591 
11592   ins_encode %{
11593     __ fnegd(as_FloatRegister($dst$$reg),
11594              as_FloatRegister($src$$reg));
11595   %}
11596 
11597   ins_pipe(fp_uop_d);
11598 %}
11599 
11600 instruct absF_reg(vRegF dst, vRegF src) %{
11601   match(Set dst (AbsF src));
11602 
11603   ins_cost(INSN_COST * 3);
11604   format %{ "fabss   $dst, $src" %}
11605   ins_encode %{
11606     __ fabss(as_FloatRegister($dst$$reg),
11607              as_FloatRegister($src$$reg));
11608   %}
11609 
11610   ins_pipe(fp_uop_s);
11611 %}
11612 
11613 instruct absD_reg(vRegD dst, vRegD src) %{
11614   match(Set dst (AbsD src));
11615 
11616   ins_cost(INSN_COST * 3);
11617   format %{ "fabsd   $dst, $src" %}
11618   ins_encode %{
11619     __ fabsd(as_FloatRegister($dst$$reg),
11620              as_FloatRegister($src$$reg));
11621   %}
11622 
11623   ins_pipe(fp_uop_d);
11624 %}
11625 
11626 instruct sqrtD_reg(vRegD dst, vRegD src) %{
11627   match(Set dst (SqrtD src));
11628 
11629   ins_cost(INSN_COST * 50);
11630   format %{ "fsqrtd  $dst, $src" %}
11631   ins_encode %{
11632     __ fsqrtd(as_FloatRegister($dst$$reg),
11633              as_FloatRegister($src$$reg));
11634   %}
11635 
11636   ins_pipe(fp_div_s);
11637 %}
11638 
11639 instruct sqrtF_reg(vRegF dst, vRegF src) %{
11640   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11641 
11642   ins_cost(INSN_COST * 50);
11643   format %{ "fsqrts  $dst, $src" %}
11644   ins_encode %{
11645     __ fsqrts(as_FloatRegister($dst$$reg),
11646              as_FloatRegister($src$$reg));
11647   %}
11648 
11649   ins_pipe(fp_div_d);
11650 %}
11651 
11652 // ============================================================================
11653 // Logical Instructions
11654 
11655 // Integer Logical Instructions
11656 
11657 // And Instructions
11658 
11659 
11660 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
11661   match(Set dst (AndI src1 src2));
11662 
11663   format %{ "andw  $dst, $src1, $src2\t# int" %}
11664 
11665   ins_cost(INSN_COST);
11666   ins_encode %{
11667     __ andw(as_Register($dst$$reg),
11668             as_Register($src1$$reg),
11669             as_Register($src2$$reg));
11670   %}
11671 
11672   ins_pipe(ialu_reg_reg);
11673 %}
11674 
11675 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
11676   match(Set dst (AndI src1 src2));
11677 
11678   format %{ "andsw  $dst, $src1, $src2\t# int" %}
11679 
11680   ins_cost(INSN_COST);
11681   ins_encode %{
11682     __ andw(as_Register($dst$$reg),
11683             as_Register($src1$$reg),
11684             (unsigned long)($src2$$constant));
11685   %}
11686 
11687   ins_pipe(ialu_reg_imm);
11688 %}
11689 
11690 // Or Instructions
11691 
11692 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11693   match(Set dst (OrI src1 src2));
11694 
11695   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11696 
11697   ins_cost(INSN_COST);
11698   ins_encode %{
11699     __ orrw(as_Register($dst$$reg),
11700             as_Register($src1$$reg),
11701             as_Register($src2$$reg));
11702   %}
11703 
11704   ins_pipe(ialu_reg_reg);
11705 %}
11706 
11707 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11708   match(Set dst (OrI src1 src2));
11709 
11710   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11711 
11712   ins_cost(INSN_COST);
11713   ins_encode %{
11714     __ orrw(as_Register($dst$$reg),
11715             as_Register($src1$$reg),
11716             (unsigned long)($src2$$constant));
11717   %}
11718 
11719   ins_pipe(ialu_reg_imm);
11720 %}
11721 
11722 // Xor Instructions
11723 
11724 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11725   match(Set dst (XorI src1 src2));
11726 
11727   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11728 
11729   ins_cost(INSN_COST);
11730   ins_encode %{
11731     __ eorw(as_Register($dst$$reg),
11732             as_Register($src1$$reg),
11733             as_Register($src2$$reg));
11734   %}
11735 
11736   ins_pipe(ialu_reg_reg);
11737 %}
11738 
11739 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11740   match(Set dst (XorI src1 src2));
11741 
11742   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11743 
11744   ins_cost(INSN_COST);
11745   ins_encode %{
11746     __ eorw(as_Register($dst$$reg),
11747             as_Register($src1$$reg),
11748             (unsigned long)($src2$$constant));
11749   %}
11750 
11751   ins_pipe(ialu_reg_imm);
11752 %}
11753 
11754 // Long Logical Instructions
11755 // TODO
11756 
11757 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
11758   match(Set dst (AndL src1 src2));
11759 
11760   format %{ "and  $dst, $src1, $src2\t# int" %}
11761 
11762   ins_cost(INSN_COST);
11763   ins_encode %{
11764     __ andr(as_Register($dst$$reg),
11765             as_Register($src1$$reg),
11766             as_Register($src2$$reg));
11767   %}
11768 
11769   ins_pipe(ialu_reg_reg);
11770 %}
11771 
11772 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
11773   match(Set dst (AndL src1 src2));
11774 
11775   format %{ "and  $dst, $src1, $src2\t# int" %}
11776 
11777   ins_cost(INSN_COST);
11778   ins_encode %{
11779     __ andr(as_Register($dst$$reg),
11780             as_Register($src1$$reg),
11781             (unsigned long)($src2$$constant));
11782   %}
11783 
11784   ins_pipe(ialu_reg_imm);
11785 %}
11786 
11787 // Or Instructions
11788 
11789 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11790   match(Set dst (OrL src1 src2));
11791 
11792   format %{ "orr  $dst, $src1, $src2\t# int" %}
11793 
11794   ins_cost(INSN_COST);
11795   ins_encode %{
11796     __ orr(as_Register($dst$$reg),
11797            as_Register($src1$$reg),
11798            as_Register($src2$$reg));
11799   %}
11800 
11801   ins_pipe(ialu_reg_reg);
11802 %}
11803 
11804 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
11805   match(Set dst (OrL src1 src2));
11806 
11807   format %{ "orr  $dst, $src1, $src2\t# int" %}
11808 
11809   ins_cost(INSN_COST);
11810   ins_encode %{
11811     __ orr(as_Register($dst$$reg),
11812            as_Register($src1$$reg),
11813            (unsigned long)($src2$$constant));
11814   %}
11815 
11816   ins_pipe(ialu_reg_imm);
11817 %}
11818 
11819 // Xor Instructions
11820 
11821 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11822   match(Set dst (XorL src1 src2));
11823 
11824   format %{ "eor  $dst, $src1, $src2\t# int" %}
11825 
11826   ins_cost(INSN_COST);
11827   ins_encode %{
11828     __ eor(as_Register($dst$$reg),
11829            as_Register($src1$$reg),
11830            as_Register($src2$$reg));
11831   %}
11832 
11833   ins_pipe(ialu_reg_reg);
11834 %}
11835 
11836 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
11837   match(Set dst (XorL src1 src2));
11838 
11839   ins_cost(INSN_COST);
11840   format %{ "eor  $dst, $src1, $src2\t# int" %}
11841 
11842   ins_encode %{
11843     __ eor(as_Register($dst$$reg),
11844            as_Register($src1$$reg),
11845            (unsigned long)($src2$$constant));
11846   %}
11847 
11848   ins_pipe(ialu_reg_imm);
11849 %}
11850 
11851 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
11852 %{
11853   match(Set dst (ConvI2L src));
11854 
11855   ins_cost(INSN_COST);
11856   format %{ "sxtw  $dst, $src\t# i2l" %}
11857   ins_encode %{
11858     __ sbfm($dst$$Register, $src$$Register, 0, 31);
11859   %}
11860   ins_pipe(ialu_reg_shift);
11861 %}
11862 
11863 // this pattern occurs in bigmath arithmetic
11864 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
11865 %{
11866   match(Set dst (AndL (ConvI2L src) mask));
11867 
11868   ins_cost(INSN_COST);
11869   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
11870   ins_encode %{
11871     __ ubfm($dst$$Register, $src$$Register, 0, 31);
11872   %}
11873 
11874   ins_pipe(ialu_reg_shift);
11875 %}
11876 
11877 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
11878   match(Set dst (ConvL2I src));
11879 
11880   ins_cost(INSN_COST);
11881   format %{ "movw  $dst, $src \t// l2i" %}
11882 
11883   ins_encode %{
11884     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
11885   %}
11886 
11887   ins_pipe(ialu_reg);
11888 %}
11889 
11890 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
11891 %{
11892   match(Set dst (Conv2B src));
11893   effect(KILL cr);
11894 
11895   format %{
11896     "cmpw $src, zr\n\t"
11897     "cset $dst, ne"
11898   %}
11899 
11900   ins_encode %{
11901     __ cmpw(as_Register($src$$reg), zr);
11902     __ cset(as_Register($dst$$reg), Assembler::NE);
11903   %}
11904 
11905   ins_pipe(ialu_reg);
11906 %}
11907 
11908 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
11909 %{
11910   match(Set dst (Conv2B src));
11911   effect(KILL cr);
11912 
11913   format %{
11914     "cmp  $src, zr\n\t"
11915     "cset $dst, ne"
11916   %}
11917 
11918   ins_encode %{
11919     __ cmp(as_Register($src$$reg), zr);
11920     __ cset(as_Register($dst$$reg), Assembler::NE);
11921   %}
11922 
11923   ins_pipe(ialu_reg);
11924 %}
11925 
11926 instruct convD2F_reg(vRegF dst, vRegD src) %{
11927   match(Set dst (ConvD2F src));
11928 
11929   ins_cost(INSN_COST * 5);
11930   format %{ "fcvtd  $dst, $src \t// d2f" %}
11931 
11932   ins_encode %{
11933     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
11934   %}
11935 
11936   ins_pipe(fp_d2f);
11937 %}
11938 
11939 instruct convF2D_reg(vRegD dst, vRegF src) %{
11940   match(Set dst (ConvF2D src));
11941 
11942   ins_cost(INSN_COST * 5);
11943   format %{ "fcvts  $dst, $src \t// f2d" %}
11944 
11945   ins_encode %{
11946     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
11947   %}
11948 
11949   ins_pipe(fp_f2d);
11950 %}
11951 
11952 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
11953   match(Set dst (ConvF2I src));
11954 
11955   ins_cost(INSN_COST * 5);
11956   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
11957 
11958   ins_encode %{
11959     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
11960   %}
11961 
11962   ins_pipe(fp_f2i);
11963 %}
11964 
11965 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
11966   match(Set dst (ConvF2L src));
11967 
11968   ins_cost(INSN_COST * 5);
11969   format %{ "fcvtzs  $dst, $src \t// f2l" %}
11970 
11971   ins_encode %{
11972     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
11973   %}
11974 
11975   ins_pipe(fp_f2l);
11976 %}
11977 
11978 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
11979   match(Set dst (ConvI2F src));
11980 
11981   ins_cost(INSN_COST * 5);
11982   format %{ "scvtfws  $dst, $src \t// i2f" %}
11983 
11984   ins_encode %{
11985     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
11986   %}
11987 
11988   ins_pipe(fp_i2f);
11989 %}
11990 
11991 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
11992   match(Set dst (ConvL2F src));
11993 
11994   ins_cost(INSN_COST * 5);
11995   format %{ "scvtfs  $dst, $src \t// l2f" %}
11996 
11997   ins_encode %{
11998     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
11999   %}
12000 
12001   ins_pipe(fp_l2f);
12002 %}
12003 
12004 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
12005   match(Set dst (ConvD2I src));
12006 
12007   ins_cost(INSN_COST * 5);
12008   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
12009 
12010   ins_encode %{
12011     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12012   %}
12013 
12014   ins_pipe(fp_d2i);
12015 %}
12016 
12017 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12018   match(Set dst (ConvD2L src));
12019 
12020   ins_cost(INSN_COST * 5);
12021   format %{ "fcvtzd  $dst, $src \t// d2l" %}
12022 
12023   ins_encode %{
12024     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12025   %}
12026 
12027   ins_pipe(fp_d2l);
12028 %}
12029 
12030 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
12031   match(Set dst (ConvI2D src));
12032 
12033   ins_cost(INSN_COST * 5);
12034   format %{ "scvtfwd  $dst, $src \t// i2d" %}
12035 
12036   ins_encode %{
12037     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12038   %}
12039 
12040   ins_pipe(fp_i2d);
12041 %}
12042 
12043 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
12044   match(Set dst (ConvL2D src));
12045 
12046   ins_cost(INSN_COST * 5);
12047   format %{ "scvtfd  $dst, $src \t// l2d" %}
12048 
12049   ins_encode %{
12050     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12051   %}
12052 
12053   ins_pipe(fp_l2d);
12054 %}
12055 
12056 // stack <-> reg and reg <-> reg shuffles with no conversion
12057 
12058 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
12059 
12060   match(Set dst (MoveF2I src));
12061 
12062   effect(DEF dst, USE src);
12063 
12064   ins_cost(4 * INSN_COST);
12065 
12066   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
12067 
12068   ins_encode %{
12069     __ ldrw($dst$$Register, Address(sp, $src$$disp));
12070   %}
12071 
12072   ins_pipe(iload_reg_reg);
12073 
12074 %}
12075 
12076 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
12077 
12078   match(Set dst (MoveI2F src));
12079 
12080   effect(DEF dst, USE src);
12081 
12082   ins_cost(4 * INSN_COST);
12083 
12084   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
12085 
12086   ins_encode %{
12087     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12088   %}
12089 
12090   ins_pipe(pipe_class_memory);
12091 
12092 %}
12093 
12094 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
12095 
12096   match(Set dst (MoveD2L src));
12097 
12098   effect(DEF dst, USE src);
12099 
12100   ins_cost(4 * INSN_COST);
12101 
12102   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
12103 
12104   ins_encode %{
12105     __ ldr($dst$$Register, Address(sp, $src$$disp));
12106   %}
12107 
12108   ins_pipe(iload_reg_reg);
12109 
12110 %}
12111 
12112 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
12113 
12114   match(Set dst (MoveL2D src));
12115 
12116   effect(DEF dst, USE src);
12117 
12118   ins_cost(4 * INSN_COST);
12119 
12120   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
12121 
12122   ins_encode %{
12123     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12124   %}
12125 
12126   ins_pipe(pipe_class_memory);
12127 
12128 %}
12129 
12130 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
12131 
12132   match(Set dst (MoveF2I src));
12133 
12134   effect(DEF dst, USE src);
12135 
12136   ins_cost(INSN_COST);
12137 
12138   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
12139 
12140   ins_encode %{
12141     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12142   %}
12143 
12144   ins_pipe(pipe_class_memory);
12145 
12146 %}
12147 
12148 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
12149 
12150   match(Set dst (MoveI2F src));
12151 
12152   effect(DEF dst, USE src);
12153 
12154   ins_cost(INSN_COST);
12155 
12156   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
12157 
12158   ins_encode %{
12159     __ strw($src$$Register, Address(sp, $dst$$disp));
12160   %}
12161 
12162   ins_pipe(istore_reg_reg);
12163 
12164 %}
12165 
12166 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
12167 
12168   match(Set dst (MoveD2L src));
12169 
12170   effect(DEF dst, USE src);
12171 
12172   ins_cost(INSN_COST);
12173 
12174   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
12175 
12176   ins_encode %{
12177     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12178   %}
12179 
12180   ins_pipe(pipe_class_memory);
12181 
12182 %}
12183 
12184 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
12185 
12186   match(Set dst (MoveL2D src));
12187 
12188   effect(DEF dst, USE src);
12189 
12190   ins_cost(INSN_COST);
12191 
12192   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
12193 
12194   ins_encode %{
12195     __ str($src$$Register, Address(sp, $dst$$disp));
12196   %}
12197 
12198   ins_pipe(istore_reg_reg);
12199 
12200 %}
12201 
12202 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12203 
12204   match(Set dst (MoveF2I src));
12205 
12206   effect(DEF dst, USE src);
12207 
12208   ins_cost(INSN_COST);
12209 
12210   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
12211 
12212   ins_encode %{
12213     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
12214   %}
12215 
12216   ins_pipe(fp_f2i);
12217 
12218 %}
12219 
12220 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
12221 
12222   match(Set dst (MoveI2F src));
12223 
12224   effect(DEF dst, USE src);
12225 
12226   ins_cost(INSN_COST);
12227 
12228   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
12229 
12230   ins_encode %{
12231     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
12232   %}
12233 
12234   ins_pipe(fp_i2f);
12235 
12236 %}
12237 
12238 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12239 
12240   match(Set dst (MoveD2L src));
12241 
12242   effect(DEF dst, USE src);
12243 
12244   ins_cost(INSN_COST);
12245 
12246   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
12247 
12248   ins_encode %{
12249     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
12250   %}
12251 
12252   ins_pipe(fp_d2l);
12253 
12254 %}
12255 
12256 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
12257 
12258   match(Set dst (MoveL2D src));
12259 
12260   effect(DEF dst, USE src);
12261 
12262   ins_cost(INSN_COST);
12263 
12264   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
12265 
12266   ins_encode %{
12267     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
12268   %}
12269 
12270   ins_pipe(fp_l2d);
12271 
12272 %}
12273 
12274 // ============================================================================
12275 // clearing of an array
12276 
12277 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
12278 %{
12279   match(Set dummy (ClearArray cnt base));
12280   effect(USE_KILL cnt, USE_KILL base, KILL cr);
12281 
12282   ins_cost(4 * INSN_COST);
12283   format %{ "ClearArray $cnt, $base" %}
12284 
12285   ins_encode %{
12286     __ zero_words($base$$Register, $cnt$$Register);
12287   %}
12288 
12289   ins_pipe(pipe_class_memory);
12290 %}
12291 
12292 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 tmp, Universe dummy, rFlagsReg cr)
12293 %{
12294   match(Set dummy (ClearArray cnt base));
12295   effect(USE_KILL base, TEMP tmp, KILL cr);
12296 
12297   ins_cost(4 * INSN_COST);
12298   format %{ "ClearArray $cnt, $base" %}
12299 
12300   ins_encode %{
12301     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
12302   %}
12303 
12304   ins_pipe(pipe_class_memory);
12305 %}
12306 
12307 // ============================================================================
12308 // Overflow Math Instructions
12309 
12310 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12311 %{
12312   match(Set cr (OverflowAddI op1 op2));
12313 
12314   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12315   ins_cost(INSN_COST);
12316   ins_encode %{
12317     __ cmnw($op1$$Register, $op2$$Register);
12318   %}
12319 
12320   ins_pipe(icmp_reg_reg);
12321 %}
12322 
12323 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12324 %{
12325   match(Set cr (OverflowAddI op1 op2));
12326 
12327   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12328   ins_cost(INSN_COST);
12329   ins_encode %{
12330     __ cmnw($op1$$Register, $op2$$constant);
12331   %}
12332 
12333   ins_pipe(icmp_reg_imm);
12334 %}
12335 
12336 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12337 %{
12338   match(Set cr (OverflowAddL op1 op2));
12339 
12340   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12341   ins_cost(INSN_COST);
12342   ins_encode %{
12343     __ cmn($op1$$Register, $op2$$Register);
12344   %}
12345 
12346   ins_pipe(icmp_reg_reg);
12347 %}
12348 
12349 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12350 %{
12351   match(Set cr (OverflowAddL op1 op2));
12352 
12353   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12354   ins_cost(INSN_COST);
12355   ins_encode %{
12356     __ cmn($op1$$Register, $op2$$constant);
12357   %}
12358 
12359   ins_pipe(icmp_reg_imm);
12360 %}
12361 
12362 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12363 %{
12364   match(Set cr (OverflowSubI op1 op2));
12365 
12366   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12367   ins_cost(INSN_COST);
12368   ins_encode %{
12369     __ cmpw($op1$$Register, $op2$$Register);
12370   %}
12371 
12372   ins_pipe(icmp_reg_reg);
12373 %}
12374 
12375 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12376 %{
12377   match(Set cr (OverflowSubI op1 op2));
12378 
12379   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12380   ins_cost(INSN_COST);
12381   ins_encode %{
12382     __ cmpw($op1$$Register, $op2$$constant);
12383   %}
12384 
12385   ins_pipe(icmp_reg_imm);
12386 %}
12387 
12388 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12389 %{
12390   match(Set cr (OverflowSubL op1 op2));
12391 
12392   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12393   ins_cost(INSN_COST);
12394   ins_encode %{
12395     __ cmp($op1$$Register, $op2$$Register);
12396   %}
12397 
12398   ins_pipe(icmp_reg_reg);
12399 %}
12400 
12401 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12402 %{
12403   match(Set cr (OverflowSubL op1 op2));
12404 
12405   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12406   ins_cost(INSN_COST);
12407   ins_encode %{
12408     __ cmp($op1$$Register, $op2$$constant);
12409   %}
12410 
12411   ins_pipe(icmp_reg_imm);
12412 %}
12413 
12414 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
12415 %{
12416   match(Set cr (OverflowSubI zero op1));
12417 
12418   format %{ "cmpw  zr, $op1\t# overflow check int" %}
12419   ins_cost(INSN_COST);
12420   ins_encode %{
12421     __ cmpw(zr, $op1$$Register);
12422   %}
12423 
12424   ins_pipe(icmp_reg_imm);
12425 %}
12426 
12427 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
12428 %{
12429   match(Set cr (OverflowSubL zero op1));
12430 
12431   format %{ "cmp   zr, $op1\t# overflow check long" %}
12432   ins_cost(INSN_COST);
12433   ins_encode %{
12434     __ cmp(zr, $op1$$Register);
12435   %}
12436 
12437   ins_pipe(icmp_reg_imm);
12438 %}
12439 
12440 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12441 %{
12442   match(Set cr (OverflowMulI op1 op2));
12443 
12444   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12445             "cmp   rscratch1, rscratch1, sxtw\n\t"
12446             "movw  rscratch1, #0x80000000\n\t"
12447             "cselw rscratch1, rscratch1, zr, NE\n\t"
12448             "cmpw  rscratch1, #1" %}
12449   ins_cost(5 * INSN_COST);
12450   ins_encode %{
12451     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12452     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12453     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12454     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12455     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12456   %}
12457 
12458   ins_pipe(pipe_slow);
12459 %}
12460 
12461 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
12462 %{
12463   match(If cmp (OverflowMulI op1 op2));
12464   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12465             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12466   effect(USE labl, KILL cr);
12467 
12468   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12469             "cmp   rscratch1, rscratch1, sxtw\n\t"
12470             "b$cmp   $labl" %}
12471   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
12472   ins_encode %{
12473     Label* L = $labl$$label;
12474     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12475     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12476     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12477     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12478   %}
12479 
12480   ins_pipe(pipe_serial);
12481 %}
12482 
12483 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12484 %{
12485   match(Set cr (OverflowMulL op1 op2));
12486 
12487   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12488             "smulh rscratch2, $op1, $op2\n\t"
12489             "cmp   rscratch2, rscratch1, ASR #63\n\t"
12490             "movw  rscratch1, #0x80000000\n\t"
12491             "cselw rscratch1, rscratch1, zr, NE\n\t"
12492             "cmpw  rscratch1, #1" %}
12493   ins_cost(6 * INSN_COST);
12494   ins_encode %{
12495     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12496     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12497     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
12498     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12499     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12500     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12501   %}
12502 
12503   ins_pipe(pipe_slow);
12504 %}
12505 
12506 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
12507 %{
12508   match(If cmp (OverflowMulL op1 op2));
12509   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12510             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12511   effect(USE labl, KILL cr);
12512 
12513   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12514             "smulh rscratch2, $op1, $op2\n\t"
12515             "cmp   rscratch2, rscratch1, ASR #63\n\t"
12516             "b$cmp $labl" %}
12517   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
12518   ins_encode %{
12519     Label* L = $labl$$label;
12520     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12521     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12522     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12523     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
12524     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12525   %}
12526 
12527   ins_pipe(pipe_serial);
12528 %}
12529 
12530 // ============================================================================
12531 // Compare Instructions
12532 
12533 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
12534 %{
12535   match(Set cr (CmpI op1 op2));
12536 
12537   effect(DEF cr, USE op1, USE op2);
12538 
12539   ins_cost(INSN_COST);
12540   format %{ "cmpw  $op1, $op2" %}
12541 
12542   ins_encode(aarch64_enc_cmpw(op1, op2));
12543 
12544   ins_pipe(icmp_reg_reg);
12545 %}
12546 
12547 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
12548 %{
12549   match(Set cr (CmpI op1 zero));
12550 
12551   effect(DEF cr, USE op1);
12552 
12553   ins_cost(INSN_COST);
12554   format %{ "cmpw $op1, 0" %}
12555 
12556   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12557 
12558   ins_pipe(icmp_reg_imm);
12559 %}
12560 
12561 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
12562 %{
12563   match(Set cr (CmpI op1 op2));
12564 
12565   effect(DEF cr, USE op1);
12566 
12567   ins_cost(INSN_COST);
12568   format %{ "cmpw  $op1, $op2" %}
12569 
12570   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12571 
12572   ins_pipe(icmp_reg_imm);
12573 %}
12574 
12575 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
12576 %{
12577   match(Set cr (CmpI op1 op2));
12578 
12579   effect(DEF cr, USE op1);
12580 
12581   ins_cost(INSN_COST * 2);
12582   format %{ "cmpw  $op1, $op2" %}
12583 
12584   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12585 
12586   ins_pipe(icmp_reg_imm);
12587 %}
12588 
12589 // Unsigned compare Instructions; really, same as signed compare
12590 // except it should only be used to feed an If or a CMovI which takes a
12591 // cmpOpU.
12592 
12593 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
12594 %{
12595   match(Set cr (CmpU op1 op2));
12596 
12597   effect(DEF cr, USE op1, USE op2);
12598 
12599   ins_cost(INSN_COST);
12600   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12601 
12602   ins_encode(aarch64_enc_cmpw(op1, op2));
12603 
12604   ins_pipe(icmp_reg_reg);
12605 %}
12606 
12607 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
12608 %{
12609   match(Set cr (CmpU op1 zero));
12610 
12611   effect(DEF cr, USE op1);
12612 
12613   ins_cost(INSN_COST);
12614   format %{ "cmpw $op1, #0\t# unsigned" %}
12615 
12616   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12617 
12618   ins_pipe(icmp_reg_imm);
12619 %}
12620 
12621 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
12622 %{
12623   match(Set cr (CmpU op1 op2));
12624 
12625   effect(DEF cr, USE op1);
12626 
12627   ins_cost(INSN_COST);
12628   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12629 
12630   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12631 
12632   ins_pipe(icmp_reg_imm);
12633 %}
12634 
12635 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
12636 %{
12637   match(Set cr (CmpU op1 op2));
12638 
12639   effect(DEF cr, USE op1);
12640 
12641   ins_cost(INSN_COST * 2);
12642   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12643 
12644   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12645 
12646   ins_pipe(icmp_reg_imm);
12647 %}
12648 
12649 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12650 %{
12651   match(Set cr (CmpL op1 op2));
12652 
12653   effect(DEF cr, USE op1, USE op2);
12654 
12655   ins_cost(INSN_COST);
12656   format %{ "cmp  $op1, $op2" %}
12657 
12658   ins_encode(aarch64_enc_cmp(op1, op2));
12659 
12660   ins_pipe(icmp_reg_reg);
12661 %}
12662 
12663 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
12664 %{
12665   match(Set cr (CmpL op1 zero));
12666 
12667   effect(DEF cr, USE op1);
12668 
12669   ins_cost(INSN_COST);
12670   format %{ "tst  $op1" %}
12671 
12672   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
12673 
12674   ins_pipe(icmp_reg_imm);
12675 %}
12676 
12677 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
12678 %{
12679   match(Set cr (CmpL op1 op2));
12680 
12681   effect(DEF cr, USE op1);
12682 
12683   ins_cost(INSN_COST);
12684   format %{ "cmp  $op1, $op2" %}
12685 
12686   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
12687 
12688   ins_pipe(icmp_reg_imm);
12689 %}
12690 
12691 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
12692 %{
12693   match(Set cr (CmpL op1 op2));
12694 
12695   effect(DEF cr, USE op1);
12696 
12697   ins_cost(INSN_COST * 2);
12698   format %{ "cmp  $op1, $op2" %}
12699 
12700   ins_encode(aarch64_enc_cmp_imm(op1, op2));
12701 
12702   ins_pipe(icmp_reg_imm);
12703 %}
12704 
12705 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
12706 %{
12707   match(Set cr (CmpUL op1 op2));
12708 
12709   effect(DEF cr, USE op1, USE op2);
12710 
12711   ins_cost(INSN_COST);
12712   format %{ "cmp  $op1, $op2" %}
12713 
12714   ins_encode(aarch64_enc_cmp(op1, op2));
12715 
12716   ins_pipe(icmp_reg_reg);
12717 %}
12718 
12719 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
12720 %{
12721   match(Set cr (CmpUL op1 zero));
12722 
12723   effect(DEF cr, USE op1);
12724 
12725   ins_cost(INSN_COST);
12726   format %{ "tst  $op1" %}
12727 
12728   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
12729 
12730   ins_pipe(icmp_reg_imm);
12731 %}
12732 
12733 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
12734 %{
12735   match(Set cr (CmpUL op1 op2));
12736 
12737   effect(DEF cr, USE op1);
12738 
12739   ins_cost(INSN_COST);
12740   format %{ "cmp  $op1, $op2" %}
12741 
12742   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
12743 
12744   ins_pipe(icmp_reg_imm);
12745 %}
12746 
12747 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
12748 %{
12749   match(Set cr (CmpUL op1 op2));
12750 
12751   effect(DEF cr, USE op1);
12752 
12753   ins_cost(INSN_COST * 2);
12754   format %{ "cmp  $op1, $op2" %}
12755 
12756   ins_encode(aarch64_enc_cmp_imm(op1, op2));
12757 
12758   ins_pipe(icmp_reg_imm);
12759 %}
12760 
12761 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
12762 %{
12763   match(Set cr (CmpP op1 op2));
12764 
12765   effect(DEF cr, USE op1, USE op2);
12766 
12767   ins_cost(INSN_COST);
12768   format %{ "cmp  $op1, $op2\t // ptr" %}
12769 
12770   ins_encode(aarch64_enc_cmpp(op1, op2));
12771 
12772   ins_pipe(icmp_reg_reg);
12773 %}
12774 
12775 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
12776 %{
12777   match(Set cr (CmpN op1 op2));
12778 
12779   effect(DEF cr, USE op1, USE op2);
12780 
12781   ins_cost(INSN_COST);
12782   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
12783 
12784   ins_encode(aarch64_enc_cmpn(op1, op2));
12785 
12786   ins_pipe(icmp_reg_reg);
12787 %}
12788 
12789 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
12790 %{
12791   match(Set cr (CmpP op1 zero));
12792 
12793   effect(DEF cr, USE op1, USE zero);
12794 
12795   ins_cost(INSN_COST);
12796   format %{ "cmp  $op1, 0\t // ptr" %}
12797 
12798   ins_encode(aarch64_enc_testp(op1));
12799 
12800   ins_pipe(icmp_reg_imm);
12801 %}
12802 
12803 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
12804 %{
12805   match(Set cr (CmpN op1 zero));
12806 
12807   effect(DEF cr, USE op1, USE zero);
12808 
12809   ins_cost(INSN_COST);
12810   format %{ "cmp  $op1, 0\t // compressed ptr" %}
12811 
12812   ins_encode(aarch64_enc_testn(op1));
12813 
12814   ins_pipe(icmp_reg_imm);
12815 %}
12816 
12817 // FP comparisons
12818 //
12819 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
12820 // using normal cmpOp. See declaration of rFlagsReg for details.
12821 
12822 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
12823 %{
12824   match(Set cr (CmpF src1 src2));
12825 
12826   ins_cost(3 * INSN_COST);
12827   format %{ "fcmps $src1, $src2" %}
12828 
12829   ins_encode %{
12830     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12831   %}
12832 
12833   ins_pipe(pipe_class_compare);
12834 %}
12835 
12836 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
12837 %{
12838   match(Set cr (CmpF src1 src2));
12839 
12840   ins_cost(3 * INSN_COST);
12841   format %{ "fcmps $src1, 0.0" %}
12842 
12843   ins_encode %{
12844     __ fcmps(as_FloatRegister($src1$$reg), 0.0);
12845   %}
12846 
12847   ins_pipe(pipe_class_compare);
12848 %}
12849 // FROM HERE
12850 
12851 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
12852 %{
12853   match(Set cr (CmpD src1 src2));
12854 
12855   ins_cost(3 * INSN_COST);
12856   format %{ "fcmpd $src1, $src2" %}
12857 
12858   ins_encode %{
12859     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12860   %}
12861 
12862   ins_pipe(pipe_class_compare);
12863 %}
12864 
12865 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
12866 %{
12867   match(Set cr (CmpD src1 src2));
12868 
12869   ins_cost(3 * INSN_COST);
12870   format %{ "fcmpd $src1, 0.0" %}
12871 
12872   ins_encode %{
12873     __ fcmpd(as_FloatRegister($src1$$reg), 0.0);
12874   %}
12875 
12876   ins_pipe(pipe_class_compare);
12877 %}
12878 
12879 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
12880 %{
12881   match(Set dst (CmpF3 src1 src2));
12882   effect(KILL cr);
12883 
12884   ins_cost(5 * INSN_COST);
12885   format %{ "fcmps $src1, $src2\n\t"
12886             "csinvw($dst, zr, zr, eq\n\t"
12887             "csnegw($dst, $dst, $dst, lt)"
12888   %}
12889 
12890   ins_encode %{
12891     Label done;
12892     FloatRegister s1 = as_FloatRegister($src1$$reg);
12893     FloatRegister s2 = as_FloatRegister($src2$$reg);
12894     Register d = as_Register($dst$$reg);
12895     __ fcmps(s1, s2);
12896     // installs 0 if EQ else -1
12897     __ csinvw(d, zr, zr, Assembler::EQ);
12898     // keeps -1 if less or unordered else installs 1
12899     __ csnegw(d, d, d, Assembler::LT);
12900     __ bind(done);
12901   %}
12902 
12903   ins_pipe(pipe_class_default);
12904 
12905 %}
12906 
12907 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
12908 %{
12909   match(Set dst (CmpD3 src1 src2));
12910   effect(KILL cr);
12911 
12912   ins_cost(5 * INSN_COST);
12913   format %{ "fcmpd $src1, $src2\n\t"
12914             "csinvw($dst, zr, zr, eq\n\t"
12915             "csnegw($dst, $dst, $dst, lt)"
12916   %}
12917 
12918   ins_encode %{
12919     Label done;
12920     FloatRegister s1 = as_FloatRegister($src1$$reg);
12921     FloatRegister s2 = as_FloatRegister($src2$$reg);
12922     Register d = as_Register($dst$$reg);
12923     __ fcmpd(s1, s2);
12924     // installs 0 if EQ else -1
12925     __ csinvw(d, zr, zr, Assembler::EQ);
12926     // keeps -1 if less or unordered else installs 1
12927     __ csnegw(d, d, d, Assembler::LT);
12928     __ bind(done);
12929   %}
12930   ins_pipe(pipe_class_default);
12931 
12932 %}
12933 
12934 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
12935 %{
12936   match(Set dst (CmpF3 src1 zero));
12937   effect(KILL cr);
12938 
12939   ins_cost(5 * INSN_COST);
12940   format %{ "fcmps $src1, 0.0\n\t"
12941             "csinvw($dst, zr, zr, eq\n\t"
12942             "csnegw($dst, $dst, $dst, lt)"
12943   %}
12944 
12945   ins_encode %{
12946     Label done;
12947     FloatRegister s1 = as_FloatRegister($src1$$reg);
12948     Register d = as_Register($dst$$reg);
12949     __ fcmps(s1, 0.0);
12950     // installs 0 if EQ else -1
12951     __ csinvw(d, zr, zr, Assembler::EQ);
12952     // keeps -1 if less or unordered else installs 1
12953     __ csnegw(d, d, d, Assembler::LT);
12954     __ bind(done);
12955   %}
12956 
12957   ins_pipe(pipe_class_default);
12958 
12959 %}
12960 
12961 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
12962 %{
12963   match(Set dst (CmpD3 src1 zero));
12964   effect(KILL cr);
12965 
12966   ins_cost(5 * INSN_COST);
12967   format %{ "fcmpd $src1, 0.0\n\t"
12968             "csinvw($dst, zr, zr, eq\n\t"
12969             "csnegw($dst, $dst, $dst, lt)"
12970   %}
12971 
12972   ins_encode %{
12973     Label done;
12974     FloatRegister s1 = as_FloatRegister($src1$$reg);
12975     Register d = as_Register($dst$$reg);
12976     __ fcmpd(s1, 0.0);
12977     // installs 0 if EQ else -1
12978     __ csinvw(d, zr, zr, Assembler::EQ);
12979     // keeps -1 if less or unordered else installs 1
12980     __ csnegw(d, d, d, Assembler::LT);
12981     __ bind(done);
12982   %}
12983   ins_pipe(pipe_class_default);
12984 
12985 %}
12986 
12987 // Manifest a CmpL result in an integer register.
12988 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
12989 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
12990 %{
12991   match(Set dst (CmpL3 src1 src2));
12992   effect(KILL flags);
12993 
12994   ins_cost(INSN_COST * 6);
12995   format %{
12996       "cmp $src1, $src2"
12997       "csetw $dst, ne"
12998       "cnegw $dst, lt"
12999   %}
13000   // format %{ "CmpL3 $dst, $src1, $src2" %}
13001   ins_encode %{
13002     __ cmp($src1$$Register, $src2$$Register);
13003     __ csetw($dst$$Register, Assembler::NE);
13004     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
13005   %}
13006 
13007   ins_pipe(ialu_reg_reg);
13008 %}
13009 
13010 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
13011 %{
13012   match(Set dst (CmpLTMask p q));
13013   effect(KILL cr);
13014 
13015   ins_cost(3 * INSN_COST);
13016 
13017   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
13018             "csetw $dst, lt\n\t"
13019             "subw $dst, zr, $dst"
13020   %}
13021 
13022   ins_encode %{
13023     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
13024     __ csetw(as_Register($dst$$reg), Assembler::LT);
13025     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
13026   %}
13027 
13028   ins_pipe(ialu_reg_reg);
13029 %}
13030 
13031 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
13032 %{
13033   match(Set dst (CmpLTMask src zero));
13034   effect(KILL cr);
13035 
13036   ins_cost(INSN_COST);
13037 
13038   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
13039 
13040   ins_encode %{
13041     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
13042   %}
13043 
13044   ins_pipe(ialu_reg_shift);
13045 %}
13046 
13047 // ============================================================================
13048 // Max and Min
13049 
13050 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13051 %{
13052   match(Set dst (MinI src1 src2));
13053 
13054   effect(DEF dst, USE src1, USE src2, KILL cr);
13055   size(8);
13056 
13057   ins_cost(INSN_COST * 3);
13058   format %{
13059     "cmpw $src1 $src2\t signed int\n\t"
13060     "cselw $dst, $src1, $src2 lt\t"
13061   %}
13062 
13063   ins_encode %{
13064     __ cmpw(as_Register($src1$$reg),
13065             as_Register($src2$$reg));
13066     __ cselw(as_Register($dst$$reg),
13067              as_Register($src1$$reg),
13068              as_Register($src2$$reg),
13069              Assembler::LT);
13070   %}
13071 
13072   ins_pipe(ialu_reg_reg);
13073 %}
13074 // FROM HERE
13075 
13076 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13077 %{
13078   match(Set dst (MaxI src1 src2));
13079 
13080   effect(DEF dst, USE src1, USE src2, KILL cr);
13081   size(8);
13082 
13083   ins_cost(INSN_COST * 3);
13084   format %{
13085     "cmpw $src1 $src2\t signed int\n\t"
13086     "cselw $dst, $src1, $src2 gt\t"
13087   %}
13088 
13089   ins_encode %{
13090     __ cmpw(as_Register($src1$$reg),
13091             as_Register($src2$$reg));
13092     __ cselw(as_Register($dst$$reg),
13093              as_Register($src1$$reg),
13094              as_Register($src2$$reg),
13095              Assembler::GT);
13096   %}
13097 
13098   ins_pipe(ialu_reg_reg);
13099 %}
13100 
13101 // ============================================================================
13102 // Branch Instructions
13103 
13104 // Direct Branch.
13105 instruct branch(label lbl)
13106 %{
13107   match(Goto);
13108 
13109   effect(USE lbl);
13110 
13111   ins_cost(BRANCH_COST);
13112   format %{ "b  $lbl" %}
13113 
13114   ins_encode(aarch64_enc_b(lbl));
13115 
13116   ins_pipe(pipe_branch);
13117 %}
13118 
13119 // Conditional Near Branch
13120 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
13121 %{
13122   // Same match rule as `branchConFar'.
13123   match(If cmp cr);
13124 
13125   effect(USE lbl);
13126 
13127   ins_cost(BRANCH_COST);
13128   // If set to 1 this indicates that the current instruction is a
13129   // short variant of a long branch. This avoids using this
13130   // instruction in first-pass matching. It will then only be used in
13131   // the `Shorten_branches' pass.
13132   // ins_short_branch(1);
13133   format %{ "b$cmp  $lbl" %}
13134 
13135   ins_encode(aarch64_enc_br_con(cmp, lbl));
13136 
13137   ins_pipe(pipe_branch_cond);
13138 %}
13139 
13140 // Conditional Near Branch Unsigned
13141 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13142 %{
13143   // Same match rule as `branchConFar'.
13144   match(If cmp cr);
13145 
13146   effect(USE lbl);
13147 
13148   ins_cost(BRANCH_COST);
13149   // If set to 1 this indicates that the current instruction is a
13150   // short variant of a long branch. This avoids using this
13151   // instruction in first-pass matching. It will then only be used in
13152   // the `Shorten_branches' pass.
13153   // ins_short_branch(1);
13154   format %{ "b$cmp  $lbl\t# unsigned" %}
13155 
13156   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13157 
13158   ins_pipe(pipe_branch_cond);
13159 %}
13160 
13161 // Make use of CBZ and CBNZ.  These instructions, as well as being
13162 // shorter than (cmp; branch), have the additional benefit of not
13163 // killing the flags.
13164 
13165 instruct cmpI_imm0_branch(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
13166   match(If cmp (CmpI op1 op2));
13167   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13168             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13169   effect(USE labl);
13170 
13171   ins_cost(BRANCH_COST);
13172   format %{ "cbw$cmp   $op1, $labl" %}
13173   ins_encode %{
13174     Label* L = $labl$$label;
13175     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13176     if (cond == Assembler::EQ)
13177       __ cbzw($op1$$Register, *L);
13178     else
13179       __ cbnzw($op1$$Register, *L);
13180   %}
13181   ins_pipe(pipe_cmp_branch);
13182 %}
13183 
13184 instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
13185   match(If cmp (CmpL op1 op2));
13186   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13187             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13188   effect(USE labl);
13189 
13190   ins_cost(BRANCH_COST);
13191   format %{ "cb$cmp   $op1, $labl" %}
13192   ins_encode %{
13193     Label* L = $labl$$label;
13194     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13195     if (cond == Assembler::EQ)
13196       __ cbz($op1$$Register, *L);
13197     else
13198       __ cbnz($op1$$Register, *L);
13199   %}
13200   ins_pipe(pipe_cmp_branch);
13201 %}
13202 
13203 instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
13204   match(If cmp (CmpP op1 op2));
13205   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13206             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13207   effect(USE labl);
13208 
13209   ins_cost(BRANCH_COST);
13210   format %{ "cb$cmp   $op1, $labl" %}
13211   ins_encode %{
13212     Label* L = $labl$$label;
13213     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13214     if (cond == Assembler::EQ)
13215       __ cbz($op1$$Register, *L);
13216     else
13217       __ cbnz($op1$$Register, *L);
13218   %}
13219   ins_pipe(pipe_cmp_branch);
13220 %}
13221 
13222 instruct cmpN_imm0_branch(cmpOp cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
13223   match(If cmp (CmpN op1 op2));
13224   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13225             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13226   effect(USE labl);
13227 
13228   ins_cost(BRANCH_COST);
13229   format %{ "cbw$cmp   $op1, $labl" %}
13230   ins_encode %{
13231     Label* L = $labl$$label;
13232     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13233     if (cond == Assembler::EQ)
13234       __ cbzw($op1$$Register, *L);
13235     else
13236       __ cbnzw($op1$$Register, *L);
13237   %}
13238   ins_pipe(pipe_cmp_branch);
13239 %}
13240 
13241 instruct cmpP_narrowOop_imm0_branch(cmpOp cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
13242   match(If cmp (CmpP (DecodeN oop) zero));
13243   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13244             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13245   effect(USE labl);
13246 
13247   ins_cost(BRANCH_COST);
13248   format %{ "cb$cmp   $oop, $labl" %}
13249   ins_encode %{
13250     Label* L = $labl$$label;
13251     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13252     if (cond == Assembler::EQ)
13253       __ cbzw($oop$$Register, *L);
13254     else
13255       __ cbnzw($oop$$Register, *L);
13256   %}
13257   ins_pipe(pipe_cmp_branch);
13258 %}
13259 
13260 instruct cmpUI_imm0_branch(cmpOpU cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
13261   match(If cmp (CmpU op1 op2));
13262   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13263             || n->in(1)->as_Bool()->_test._test == BoolTest::eq
13264             || n->in(1)->as_Bool()->_test._test == BoolTest::gt
13265             ||  n->in(1)->as_Bool()->_test._test == BoolTest::le);
13266   effect(USE labl);
13267 
13268   ins_cost(BRANCH_COST);
13269   format %{ "cbw$cmp   $op1, $labl" %}
13270   ins_encode %{
13271     Label* L = $labl$$label;
13272     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13273     if (cond == Assembler::EQ || cond == Assembler::LS)
13274       __ cbzw($op1$$Register, *L);
13275     else
13276       __ cbnzw($op1$$Register, *L);
13277   %}
13278   ins_pipe(pipe_cmp_branch);
13279 %}
13280 
13281 instruct cmpUL_imm0_branch(cmpOpU cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
13282   match(If cmp (CmpUL op1 op2));
13283   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13284             || n->in(1)->as_Bool()->_test._test == BoolTest::eq
13285             || n->in(1)->as_Bool()->_test._test == BoolTest::gt
13286             || n->in(1)->as_Bool()->_test._test == BoolTest::le);
13287   effect(USE labl);
13288 
13289   ins_cost(BRANCH_COST);
13290   format %{ "cb$cmp   $op1, $labl" %}
13291   ins_encode %{
13292     Label* L = $labl$$label;
13293     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13294     if (cond == Assembler::EQ || cond == Assembler::LS)
13295       __ cbz($op1$$Register, *L);
13296     else
13297       __ cbnz($op1$$Register, *L);
13298   %}
13299   ins_pipe(pipe_cmp_branch);
13300 %}
13301 
13302 // Test bit and Branch
13303 
13304 // Patterns for short (< 32KiB) variants
13305 instruct cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{
13306   match(If cmp (CmpL op1 op2));
13307   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13308             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13309   effect(USE labl);
13310 
13311   ins_cost(BRANCH_COST);
13312   format %{ "cb$cmp   $op1, $labl # long" %}
13313   ins_encode %{
13314     Label* L = $labl$$label;
13315     Assembler::Condition cond =
13316       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13317     __ tbr(cond, $op1$$Register, 63, *L);
13318   %}
13319   ins_pipe(pipe_cmp_branch);
13320   ins_short_branch(1);
13321 %}
13322 
13323 instruct cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13324   match(If cmp (CmpI op1 op2));
13325   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13326             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13327   effect(USE labl);
13328 
13329   ins_cost(BRANCH_COST);
13330   format %{ "cb$cmp   $op1, $labl # int" %}
13331   ins_encode %{
13332     Label* L = $labl$$label;
13333     Assembler::Condition cond =
13334       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13335     __ tbr(cond, $op1$$Register, 31, *L);
13336   %}
13337   ins_pipe(pipe_cmp_branch);
13338   ins_short_branch(1);
13339 %}
13340 
13341 instruct cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13342   match(If cmp (CmpL (AndL op1 op2) op3));
13343   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13344             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13345             && is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13346   effect(USE labl);
13347 
13348   ins_cost(BRANCH_COST);
13349   format %{ "tb$cmp   $op1, $op2, $labl" %}
13350   ins_encode %{
13351     Label* L = $labl$$label;
13352     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13353     int bit = exact_log2($op2$$constant);
13354     __ tbr(cond, $op1$$Register, bit, *L);
13355   %}
13356   ins_pipe(pipe_cmp_branch);
13357   ins_short_branch(1);
13358 %}
13359 
13360 instruct cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
13361   match(If cmp (CmpI (AndI op1 op2) op3));
13362   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13363             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13364             && is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
13365   effect(USE labl);
13366 
13367   ins_cost(BRANCH_COST);
13368   format %{ "tb$cmp   $op1, $op2, $labl" %}
13369   ins_encode %{
13370     Label* L = $labl$$label;
13371     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13372     int bit = exact_log2($op2$$constant);
13373     __ tbr(cond, $op1$$Register, bit, *L);
13374   %}
13375   ins_pipe(pipe_cmp_branch);
13376   ins_short_branch(1);
13377 %}
13378 
13379 // And far variants
13380 instruct far_cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{
13381   match(If cmp (CmpL op1 op2));
13382   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13383             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13384   effect(USE labl);
13385 
13386   ins_cost(BRANCH_COST);
13387   format %{ "cb$cmp   $op1, $labl # long" %}
13388   ins_encode %{
13389     Label* L = $labl$$label;
13390     Assembler::Condition cond =
13391       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13392     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
13393   %}
13394   ins_pipe(pipe_cmp_branch);
13395 %}
13396 
13397 instruct far_cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13398   match(If cmp (CmpI op1 op2));
13399   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13400             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13401   effect(USE labl);
13402 
13403   ins_cost(BRANCH_COST);
13404   format %{ "cb$cmp   $op1, $labl # int" %}
13405   ins_encode %{
13406     Label* L = $labl$$label;
13407     Assembler::Condition cond =
13408       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13409     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
13410   %}
13411   ins_pipe(pipe_cmp_branch);
13412 %}
13413 
13414 instruct far_cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13415   match(If cmp (CmpL (AndL op1 op2) op3));
13416   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13417             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13418             && is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13419   effect(USE labl);
13420 
13421   ins_cost(BRANCH_COST);
13422   format %{ "tb$cmp   $op1, $op2, $labl" %}
13423   ins_encode %{
13424     Label* L = $labl$$label;
13425     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13426     int bit = exact_log2($op2$$constant);
13427     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
13428   %}
13429   ins_pipe(pipe_cmp_branch);
13430 %}
13431 
13432 instruct far_cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
13433   match(If cmp (CmpI (AndI op1 op2) op3));
13434   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13435             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13436             && is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
13437   effect(USE labl);
13438 
13439   ins_cost(BRANCH_COST);
13440   format %{ "tb$cmp   $op1, $op2, $labl" %}
13441   ins_encode %{
13442     Label* L = $labl$$label;
13443     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13444     int bit = exact_log2($op2$$constant);
13445     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
13446   %}
13447   ins_pipe(pipe_cmp_branch);
13448 %}
13449 
13450 // Test bits
13451 
13452 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
13453   match(Set cr (CmpL (AndL op1 op2) op3));
13454   predicate(Assembler::operand_valid_for_logical_immediate
13455             (/*is_32*/false, n->in(1)->in(2)->get_long()));
13456 
13457   ins_cost(INSN_COST);
13458   format %{ "tst $op1, $op2 # long" %}
13459   ins_encode %{
13460     __ tst($op1$$Register, $op2$$constant);
13461   %}
13462   ins_pipe(ialu_reg_reg);
13463 %}
13464 
13465 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
13466   match(Set cr (CmpI (AndI op1 op2) op3));
13467   predicate(Assembler::operand_valid_for_logical_immediate
13468             (/*is_32*/true, n->in(1)->in(2)->get_int()));
13469 
13470   ins_cost(INSN_COST);
13471   format %{ "tst $op1, $op2 # int" %}
13472   ins_encode %{
13473     __ tstw($op1$$Register, $op2$$constant);
13474   %}
13475   ins_pipe(ialu_reg_reg);
13476 %}
13477 
13478 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
13479   match(Set cr (CmpL (AndL op1 op2) op3));
13480 
13481   ins_cost(INSN_COST);
13482   format %{ "tst $op1, $op2 # long" %}
13483   ins_encode %{
13484     __ tst($op1$$Register, $op2$$Register);
13485   %}
13486   ins_pipe(ialu_reg_reg);
13487 %}
13488 
13489 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
13490   match(Set cr (CmpI (AndI op1 op2) op3));
13491 
13492   ins_cost(INSN_COST);
13493   format %{ "tstw $op1, $op2 # int" %}
13494   ins_encode %{
13495     __ tstw($op1$$Register, $op2$$Register);
13496   %}
13497   ins_pipe(ialu_reg_reg);
13498 %}
13499 
13500 
13501 // Conditional Far Branch
13502 // Conditional Far Branch Unsigned
13503 // TODO: fixme
13504 
13505 // counted loop end branch near
13506 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
13507 %{
13508   match(CountedLoopEnd cmp cr);
13509 
13510   effect(USE lbl);
13511 
13512   ins_cost(BRANCH_COST);
13513   // short variant.
13514   // ins_short_branch(1);
13515   format %{ "b$cmp $lbl \t// counted loop end" %}
13516 
13517   ins_encode(aarch64_enc_br_con(cmp, lbl));
13518 
13519   ins_pipe(pipe_branch);
13520 %}
13521 
13522 // counted loop end branch near Unsigned
13523 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13524 %{
13525   match(CountedLoopEnd cmp cr);
13526 
13527   effect(USE lbl);
13528 
13529   ins_cost(BRANCH_COST);
13530   // short variant.
13531   // ins_short_branch(1);
13532   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
13533 
13534   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13535 
13536   ins_pipe(pipe_branch);
13537 %}
13538 
13539 // counted loop end branch far
13540 // counted loop end branch far unsigned
13541 // TODO: fixme
13542 
13543 // ============================================================================
13544 // inlined locking and unlocking
13545 
13546 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13547 %{
13548   match(Set cr (FastLock object box));
13549   effect(TEMP tmp, TEMP tmp2);
13550 
13551   // TODO
13552   // identify correct cost
13553   ins_cost(5 * INSN_COST);
13554   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
13555 
13556   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
13557 
13558   ins_pipe(pipe_serial);
13559 %}
13560 
13561 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13562 %{
13563   match(Set cr (FastUnlock object box));
13564   effect(TEMP tmp, TEMP tmp2);
13565 
13566   ins_cost(5 * INSN_COST);
13567   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
13568 
13569   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
13570 
13571   ins_pipe(pipe_serial);
13572 %}
13573 
13574 
13575 // ============================================================================
13576 // Safepoint Instructions
13577 
13578 // TODO
13579 // provide a near and far version of this code
13580 
13581 instruct safePoint(rFlagsReg cr, iRegP poll)
13582 %{
13583   match(SafePoint poll);
13584   effect(KILL cr);
13585 
13586   format %{
13587     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
13588   %}
13589   ins_encode %{
13590     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
13591   %}
13592   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
13593 %}
13594 
13595 
13596 // ============================================================================
13597 // Procedure Call/Return Instructions
13598 
13599 // Call Java Static Instruction
13600 
13601 instruct CallStaticJavaDirect(method meth)
13602 %{
13603   match(CallStaticJava);
13604 
13605   effect(USE meth);
13606 
13607   predicate(!((CallStaticJavaNode*)n)->is_method_handle_invoke());
13608 
13609   ins_cost(CALL_COST);
13610 
13611   format %{ "call,static $meth \t// ==> " %}
13612 
13613   ins_encode( aarch64_enc_java_static_call(meth),
13614               aarch64_enc_call_epilog );
13615 
13616   ins_pipe(pipe_class_call);
13617 %}
13618 
13619 // TO HERE
13620 
13621 // Call Java Static Instruction (method handle version)
13622 
13623 instruct CallStaticJavaDirectHandle(method meth, iRegP_FP reg_mh_save)
13624 %{
13625   match(CallStaticJava);
13626 
13627   effect(USE meth);
13628 
13629   predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
13630 
13631   ins_cost(CALL_COST);
13632 
13633   format %{ "call,static $meth \t// (methodhandle) ==> " %}
13634 
13635   ins_encode( aarch64_enc_java_handle_call(meth),
13636               aarch64_enc_call_epilog );
13637 
13638   ins_pipe(pipe_class_call);
13639 %}
13640 
13641 // Call Java Dynamic Instruction
13642 instruct CallDynamicJavaDirect(method meth)
13643 %{
13644   match(CallDynamicJava);
13645 
13646   effect(USE meth);
13647 
13648   ins_cost(CALL_COST);
13649 
13650   format %{ "CALL,dynamic $meth \t// ==> " %}
13651 
13652   ins_encode( aarch64_enc_java_dynamic_call(meth),
13653                aarch64_enc_call_epilog );
13654 
13655   ins_pipe(pipe_class_call);
13656 %}
13657 
13658 // Call Runtime Instruction
13659 
13660 instruct CallRuntimeDirect(method meth)
13661 %{
13662   match(CallRuntime);
13663 
13664   effect(USE meth);
13665 
13666   ins_cost(CALL_COST);
13667 
13668   format %{ "CALL, runtime $meth" %}
13669 
13670   ins_encode( aarch64_enc_java_to_runtime(meth) );
13671 
13672   ins_pipe(pipe_class_call);
13673 %}
13674 
13675 // Call Runtime Instruction
13676 
13677 instruct CallLeafDirect(method meth)
13678 %{
13679   match(CallLeaf);
13680 
13681   effect(USE meth);
13682 
13683   ins_cost(CALL_COST);
13684 
13685   format %{ "CALL, runtime leaf $meth" %}
13686 
13687   ins_encode( aarch64_enc_java_to_runtime(meth) );
13688 
13689   ins_pipe(pipe_class_call);
13690 %}
13691 
13692 // Call Runtime Instruction
13693 
13694 instruct CallLeafNoFPDirect(method meth)
13695 %{
13696   match(CallLeafNoFP);
13697 
13698   effect(USE meth);
13699 
13700   ins_cost(CALL_COST);
13701 
13702   format %{ "CALL, runtime leaf nofp $meth" %}
13703 
13704   ins_encode( aarch64_enc_java_to_runtime(meth) );
13705 
13706   ins_pipe(pipe_class_call);
13707 %}
13708 
13709 // Tail Call; Jump from runtime stub to Java code.
13710 // Also known as an 'interprocedural jump'.
13711 // Target of jump will eventually return to caller.
13712 // TailJump below removes the return address.
13713 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
13714 %{
13715   match(TailCall jump_target method_oop);
13716 
13717   ins_cost(CALL_COST);
13718 
13719   format %{ "br $jump_target\t# $method_oop holds method oop" %}
13720 
13721   ins_encode(aarch64_enc_tail_call(jump_target));
13722 
13723   ins_pipe(pipe_class_call);
13724 %}
13725 
13726 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
13727 %{
13728   match(TailJump jump_target ex_oop);
13729 
13730   ins_cost(CALL_COST);
13731 
13732   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
13733 
13734   ins_encode(aarch64_enc_tail_jmp(jump_target));
13735 
13736   ins_pipe(pipe_class_call);
13737 %}
13738 
13739 // Create exception oop: created by stack-crawling runtime code.
13740 // Created exception is now available to this handler, and is setup
13741 // just prior to jumping to this handler. No code emitted.
13742 // TODO check
13743 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
13744 instruct CreateException(iRegP_R0 ex_oop)
13745 %{
13746   match(Set ex_oop (CreateEx));
13747 
13748   format %{ " -- \t// exception oop; no code emitted" %}
13749 
13750   size(0);
13751 
13752   ins_encode( /*empty*/ );
13753 
13754   ins_pipe(pipe_class_empty);
13755 %}
13756 
13757 // Rethrow exception: The exception oop will come in the first
13758 // argument position. Then JUMP (not call) to the rethrow stub code.
13759 instruct RethrowException() %{
13760   match(Rethrow);
13761   ins_cost(CALL_COST);
13762 
13763   format %{ "b rethrow_stub" %}
13764 
13765   ins_encode( aarch64_enc_rethrow() );
13766 
13767   ins_pipe(pipe_class_call);
13768 %}
13769 
13770 
13771 // Return Instruction
13772 // epilog node loads ret address into lr as part of frame pop
13773 instruct Ret()
13774 %{
13775   match(Return);
13776 
13777   format %{ "ret\t// return register" %}
13778 
13779   ins_encode( aarch64_enc_ret() );
13780 
13781   ins_pipe(pipe_branch);
13782 %}
13783 
13784 // Die now.
13785 instruct ShouldNotReachHere() %{
13786   match(Halt);
13787 
13788   ins_cost(CALL_COST);
13789   format %{ "ShouldNotReachHere" %}
13790 
13791   ins_encode %{
13792     // TODO
13793     // implement proper trap call here
13794     __ brk(999);
13795   %}
13796 
13797   ins_pipe(pipe_class_default);
13798 %}
13799 
13800 // ============================================================================
13801 // Partial Subtype Check
13802 // 
13803 // superklass array for an instance of the superklass.  Set a hidden
13804 // internal cache on a hit (cache is checked with exposed code in
13805 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
13806 // encoding ALSO sets flags.
13807 
13808 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
13809 %{
13810   match(Set result (PartialSubtypeCheck sub super));
13811   effect(KILL cr, KILL temp);
13812 
13813   ins_cost(1100);  // slightly larger than the next version
13814   format %{ "partialSubtypeCheck $result, $sub, $super" %}
13815 
13816   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13817 
13818   opcode(0x1); // Force zero of result reg on hit
13819 
13820   ins_pipe(pipe_class_memory);
13821 %}
13822 
13823 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
13824 %{
13825   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13826   effect(KILL temp, KILL result);
13827 
13828   ins_cost(1100);  // slightly larger than the next version
13829   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
13830 
13831   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13832 
13833   opcode(0x0); // Don't zero result reg on hit
13834 
13835   ins_pipe(pipe_class_memory);
13836 %}
13837 
13838 instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
13839                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
13840 %{
13841   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
13842   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
13843 
13844   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
13845   ins_encode %{
13846     __ string_compare($str1$$Register, $str2$$Register,
13847                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
13848                       $tmp1$$Register);
13849   %}
13850   ins_pipe(pipe_class_memory);
13851 %}
13852 
13853 instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
13854        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
13855 %{
13856   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
13857   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
13858          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13859   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %}
13860 
13861   ins_encode %{
13862     __ string_indexof($str1$$Register, $str2$$Register,
13863                       $cnt1$$Register, $cnt2$$Register,
13864                       $tmp1$$Register, $tmp2$$Register,
13865                       $tmp3$$Register, $tmp4$$Register,
13866                       -1, $result$$Register);
13867   %}
13868   ins_pipe(pipe_class_memory);
13869 %}
13870 
13871 instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
13872                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
13873                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
13874 %{
13875   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
13876   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
13877          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13878   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %}
13879 
13880   ins_encode %{
13881     int icnt2 = (int)$int_cnt2$$constant;
13882     __ string_indexof($str1$$Register, $str2$$Register,
13883                       $cnt1$$Register, zr,
13884                       $tmp1$$Register, $tmp2$$Register,
13885                       $tmp3$$Register, $tmp4$$Register,
13886                       icnt2, $result$$Register);
13887   %}
13888   ins_pipe(pipe_class_memory);
13889 %}
13890 
13891 instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
13892                         iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
13893 %{
13894   match(Set result (StrEquals (Binary str1 str2) cnt));
13895   effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
13896 
13897   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp" %}
13898   ins_encode %{
13899     __ string_equals($str1$$Register, $str2$$Register,
13900                       $cnt$$Register, $result$$Register,
13901                       $tmp$$Register);
13902   %}
13903   ins_pipe(pipe_class_memory);
13904 %}
13905 
13906 instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
13907                       iRegP_R10 tmp, rFlagsReg cr)
13908 %{
13909   match(Set result (AryEq ary1 ary2));
13910   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
13911 
13912   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
13913   ins_encode %{
13914     __ char_arrays_equals($ary1$$Register, $ary2$$Register,
13915                           $result$$Register, $tmp$$Register);
13916   %}
13917   ins_pipe(pipe_class_memory);
13918 %}
13919 
13920 // encode char[] to byte[] in ISO_8859_1
13921 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
13922                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
13923                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
13924                           iRegI_R0 result, rFlagsReg cr)
13925 %{
13926   match(Set result (EncodeISOArray src (Binary dst len)));
13927   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
13928          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
13929 
13930   format %{ "Encode array $src,$dst,$len -> $result" %}
13931   ins_encode %{
13932     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
13933          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
13934          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
13935   %}
13936   ins_pipe( pipe_class_memory );
13937 %}
13938 
13939 // ============================================================================
13940 // This name is KNOWN by the ADLC and cannot be changed.
13941 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13942 // for this guy.
13943 instruct tlsLoadP(thread_RegP dst)
13944 %{
13945   match(Set dst (ThreadLocal));
13946 
13947   ins_cost(0);
13948 
13949   format %{ " -- \t// $dst=Thread::current(), empty" %}
13950 
13951   size(0);
13952 
13953   ins_encode( /*empty*/ );
13954 
13955   ins_pipe(pipe_class_empty);
13956 %}
13957 
13958 // ====================VECTOR INSTRUCTIONS=====================================
13959 
13960 // Load vector (32 bits)
13961 instruct loadV4(vecD dst, vmem4 mem)
13962 %{
13963   predicate(n->as_LoadVector()->memory_size() == 4);
13964   match(Set dst (LoadVector mem));
13965   ins_cost(4 * INSN_COST);
13966   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
13967   ins_encode( aarch64_enc_ldrvS(dst, mem) );
13968   ins_pipe(vload_reg_mem64);
13969 %}
13970 
13971 // Load vector (64 bits)
13972 instruct loadV8(vecD dst, vmem8 mem)
13973 %{
13974   predicate(n->as_LoadVector()->memory_size() == 8);
13975   match(Set dst (LoadVector mem));
13976   ins_cost(4 * INSN_COST);
13977   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
13978   ins_encode( aarch64_enc_ldrvD(dst, mem) );
13979   ins_pipe(vload_reg_mem64);
13980 %}
13981 
13982 // Load Vector (128 bits)
13983 instruct loadV16(vecX dst, vmem16 mem)
13984 %{
13985   predicate(n->as_LoadVector()->memory_size() == 16);
13986   match(Set dst (LoadVector mem));
13987   ins_cost(4 * INSN_COST);
13988   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
13989   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
13990   ins_pipe(vload_reg_mem128);
13991 %}
13992 
13993 // Store Vector (32 bits)
13994 instruct storeV4(vecD src, vmem4 mem)
13995 %{
13996   predicate(n->as_StoreVector()->memory_size() == 4);
13997   match(Set mem (StoreVector mem src));
13998   ins_cost(4 * INSN_COST);
13999   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
14000   ins_encode( aarch64_enc_strvS(src, mem) );
14001   ins_pipe(vstore_reg_mem64);
14002 %}
14003 
14004 // Store Vector (64 bits)
14005 instruct storeV8(vecD src, vmem8 mem)
14006 %{
14007   predicate(n->as_StoreVector()->memory_size() == 8);
14008   match(Set mem (StoreVector mem src));
14009   ins_cost(4 * INSN_COST);
14010   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
14011   ins_encode( aarch64_enc_strvD(src, mem) );
14012   ins_pipe(vstore_reg_mem64);
14013 %}
14014 
14015 // Store Vector (128 bits)
14016 instruct storeV16(vecX src, vmem16 mem)
14017 %{
14018   predicate(n->as_StoreVector()->memory_size() == 16);
14019   match(Set mem (StoreVector mem src));
14020   ins_cost(4 * INSN_COST);
14021   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
14022   ins_encode( aarch64_enc_strvQ(src, mem) );
14023   ins_pipe(vstore_reg_mem128);
14024 %}
14025 
14026 instruct replicate8B(vecD dst, iRegIorL2I src)
14027 %{
14028   predicate(n->as_Vector()->length() == 4 ||
14029             n->as_Vector()->length() == 8);
14030   match(Set dst (ReplicateB src));
14031   ins_cost(INSN_COST);
14032   format %{ "dup  $dst, $src\t# vector (8B)" %}
14033   ins_encode %{
14034     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
14035   %}
14036   ins_pipe(vdup_reg_reg64);
14037 %}
14038 
14039 instruct replicate16B(vecX dst, iRegIorL2I src)
14040 %{
14041   predicate(n->as_Vector()->length() == 16);
14042   match(Set dst (ReplicateB src));
14043   ins_cost(INSN_COST);
14044   format %{ "dup  $dst, $src\t# vector (16B)" %}
14045   ins_encode %{
14046     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
14047   %}
14048   ins_pipe(vdup_reg_reg128);
14049 %}
14050 
14051 instruct replicate8B_imm(vecD dst, immI con)
14052 %{
14053   predicate(n->as_Vector()->length() == 4 ||
14054             n->as_Vector()->length() == 8);
14055   match(Set dst (ReplicateB con));
14056   ins_cost(INSN_COST);
14057   format %{ "movi  $dst, $con\t# vector(8B)" %}
14058   ins_encode %{
14059     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
14060   %}
14061   ins_pipe(vmovi_reg_imm64);
14062 %}
14063 
14064 instruct replicate16B_imm(vecX dst, immI con)
14065 %{
14066   predicate(n->as_Vector()->length() == 16);
14067   match(Set dst (ReplicateB con));
14068   ins_cost(INSN_COST);
14069   format %{ "movi  $dst, $con\t# vector(16B)" %}
14070   ins_encode %{
14071     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
14072   %}
14073   ins_pipe(vmovi_reg_imm128);
14074 %}
14075 
14076 instruct replicate4S(vecD dst, iRegIorL2I src)
14077 %{
14078   predicate(n->as_Vector()->length() == 2 ||
14079             n->as_Vector()->length() == 4);
14080   match(Set dst (ReplicateS src));
14081   ins_cost(INSN_COST);
14082   format %{ "dup  $dst, $src\t# vector (4S)" %}
14083   ins_encode %{
14084     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
14085   %}
14086   ins_pipe(vdup_reg_reg64);
14087 %}
14088 
14089 instruct replicate8S(vecX dst, iRegIorL2I src)
14090 %{
14091   predicate(n->as_Vector()->length() == 8);
14092   match(Set dst (ReplicateS src));
14093   ins_cost(INSN_COST);
14094   format %{ "dup  $dst, $src\t# vector (8S)" %}
14095   ins_encode %{
14096     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
14097   %}
14098   ins_pipe(vdup_reg_reg128);
14099 %}
14100 
14101 instruct replicate4S_imm(vecD dst, immI con)
14102 %{
14103   predicate(n->as_Vector()->length() == 2 ||
14104             n->as_Vector()->length() == 4);
14105   match(Set dst (ReplicateS con));
14106   ins_cost(INSN_COST);
14107   format %{ "movi  $dst, $con\t# vector(4H)" %}
14108   ins_encode %{
14109     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
14110   %}
14111   ins_pipe(vmovi_reg_imm64);
14112 %}
14113 
14114 instruct replicate8S_imm(vecX dst, immI con)
14115 %{
14116   predicate(n->as_Vector()->length() == 8);
14117   match(Set dst (ReplicateS con));
14118   ins_cost(INSN_COST);
14119   format %{ "movi  $dst, $con\t# vector(8H)" %}
14120   ins_encode %{
14121     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
14122   %}
14123   ins_pipe(vmovi_reg_imm128);
14124 %}
14125 
14126 instruct replicate2I(vecD dst, iRegIorL2I src)
14127 %{
14128   predicate(n->as_Vector()->length() == 2);
14129   match(Set dst (ReplicateI src));
14130   ins_cost(INSN_COST);
14131   format %{ "dup  $dst, $src\t# vector (2I)" %}
14132   ins_encode %{
14133     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
14134   %}
14135   ins_pipe(vdup_reg_reg64);
14136 %}
14137 
14138 instruct replicate4I(vecX dst, iRegIorL2I src)
14139 %{
14140   predicate(n->as_Vector()->length() == 4);
14141   match(Set dst (ReplicateI src));
14142   ins_cost(INSN_COST);
14143   format %{ "dup  $dst, $src\t# vector (4I)" %}
14144   ins_encode %{
14145     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
14146   %}
14147   ins_pipe(vdup_reg_reg128);
14148 %}
14149 
14150 instruct replicate2I_imm(vecD dst, immI con)
14151 %{
14152   predicate(n->as_Vector()->length() == 2);
14153   match(Set dst (ReplicateI con));
14154   ins_cost(INSN_COST);
14155   format %{ "movi  $dst, $con\t# vector(2I)" %}
14156   ins_encode %{
14157     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
14158   %}
14159   ins_pipe(vmovi_reg_imm64);
14160 %}
14161 
14162 instruct replicate4I_imm(vecX dst, immI con)
14163 %{
14164   predicate(n->as_Vector()->length() == 4);
14165   match(Set dst (ReplicateI con));
14166   ins_cost(INSN_COST);
14167   format %{ "movi  $dst, $con\t# vector(4I)" %}
14168   ins_encode %{
14169     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
14170   %}
14171   ins_pipe(vmovi_reg_imm128);
14172 %}
14173 
14174 instruct replicate2L(vecX dst, iRegL src)
14175 %{
14176   predicate(n->as_Vector()->length() == 2);
14177   match(Set dst (ReplicateL src));
14178   ins_cost(INSN_COST);
14179   format %{ "dup  $dst, $src\t# vector (2L)" %}
14180   ins_encode %{
14181     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
14182   %}
14183   ins_pipe(vdup_reg_reg128);
14184 %}
14185 
14186 instruct replicate2L_zero(vecX dst, immI0 zero)
14187 %{
14188   predicate(n->as_Vector()->length() == 2);
14189   match(Set dst (ReplicateI zero));
14190   ins_cost(INSN_COST);
14191   format %{ "movi  $dst, $zero\t# vector(4I)" %}
14192   ins_encode %{
14193     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14194            as_FloatRegister($dst$$reg),
14195            as_FloatRegister($dst$$reg));
14196   %}
14197   ins_pipe(vmovi_reg_imm128);
14198 %}
14199 
14200 instruct replicate2F(vecD dst, vRegF src)
14201 %{
14202   predicate(n->as_Vector()->length() == 2);
14203   match(Set dst (ReplicateF src));
14204   ins_cost(INSN_COST);
14205   format %{ "dup  $dst, $src\t# vector (2F)" %}
14206   ins_encode %{
14207     __ dup(as_FloatRegister($dst$$reg), __ T2S,
14208            as_FloatRegister($src$$reg));
14209   %}
14210   ins_pipe(vdup_reg_freg64);
14211 %}
14212 
14213 instruct replicate4F(vecX dst, vRegF src)
14214 %{
14215   predicate(n->as_Vector()->length() == 4);
14216   match(Set dst (ReplicateF src));
14217   ins_cost(INSN_COST);
14218   format %{ "dup  $dst, $src\t# vector (4F)" %}
14219   ins_encode %{
14220     __ dup(as_FloatRegister($dst$$reg), __ T4S,
14221            as_FloatRegister($src$$reg));
14222   %}
14223   ins_pipe(vdup_reg_freg128);
14224 %}
14225 
14226 instruct replicate2D(vecX dst, vRegD src)
14227 %{
14228   predicate(n->as_Vector()->length() == 2);
14229   match(Set dst (ReplicateD src));
14230   ins_cost(INSN_COST);
14231   format %{ "dup  $dst, $src\t# vector (2D)" %}
14232   ins_encode %{
14233     __ dup(as_FloatRegister($dst$$reg), __ T2D,
14234            as_FloatRegister($src$$reg));
14235   %}
14236   ins_pipe(vdup_reg_dreg128);
14237 %}
14238 
14239 // ====================VECTOR ARITHMETIC=======================================
14240 
14241 // --------------------------------- ADD --------------------------------------
14242 
14243 instruct vadd8B(vecD dst, vecD src1, vecD src2)
14244 %{
14245   predicate(n->as_Vector()->length() == 4 ||
14246             n->as_Vector()->length() == 8);
14247   match(Set dst (AddVB src1 src2));
14248   ins_cost(INSN_COST);
14249   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
14250   ins_encode %{
14251     __ addv(as_FloatRegister($dst$$reg), __ T8B,
14252             as_FloatRegister($src1$$reg),
14253             as_FloatRegister($src2$$reg));
14254   %}
14255   ins_pipe(vdop64);
14256 %}
14257 
14258 instruct vadd16B(vecX dst, vecX src1, vecX src2)
14259 %{
14260   predicate(n->as_Vector()->length() == 16);
14261   match(Set dst (AddVB src1 src2));
14262   ins_cost(INSN_COST);
14263   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
14264   ins_encode %{
14265     __ addv(as_FloatRegister($dst$$reg), __ T16B,
14266             as_FloatRegister($src1$$reg),
14267             as_FloatRegister($src2$$reg));
14268   %}
14269   ins_pipe(vdop128);
14270 %}
14271 
14272 instruct vadd4S(vecD dst, vecD src1, vecD src2)
14273 %{
14274   predicate(n->as_Vector()->length() == 2 ||
14275             n->as_Vector()->length() == 4);
14276   match(Set dst (AddVS src1 src2));
14277   ins_cost(INSN_COST);
14278   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
14279   ins_encode %{
14280     __ addv(as_FloatRegister($dst$$reg), __ T4H,
14281             as_FloatRegister($src1$$reg),
14282             as_FloatRegister($src2$$reg));
14283   %}
14284   ins_pipe(vdop64);
14285 %}
14286 
14287 instruct vadd8S(vecX dst, vecX src1, vecX src2)
14288 %{
14289   predicate(n->as_Vector()->length() == 8);
14290   match(Set dst (AddVS src1 src2));
14291   ins_cost(INSN_COST);
14292   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
14293   ins_encode %{
14294     __ addv(as_FloatRegister($dst$$reg), __ T8H,
14295             as_FloatRegister($src1$$reg),
14296             as_FloatRegister($src2$$reg));
14297   %}
14298   ins_pipe(vdop128);
14299 %}
14300 
14301 instruct vadd2I(vecD dst, vecD src1, vecD src2)
14302 %{
14303   predicate(n->as_Vector()->length() == 2);
14304   match(Set dst (AddVI src1 src2));
14305   ins_cost(INSN_COST);
14306   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
14307   ins_encode %{
14308     __ addv(as_FloatRegister($dst$$reg), __ T2S,
14309             as_FloatRegister($src1$$reg),
14310             as_FloatRegister($src2$$reg));
14311   %}
14312   ins_pipe(vdop64);
14313 %}
14314 
14315 instruct vadd4I(vecX dst, vecX src1, vecX src2)
14316 %{
14317   predicate(n->as_Vector()->length() == 4);
14318   match(Set dst (AddVI src1 src2));
14319   ins_cost(INSN_COST);
14320   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
14321   ins_encode %{
14322     __ addv(as_FloatRegister($dst$$reg), __ T4S,
14323             as_FloatRegister($src1$$reg),
14324             as_FloatRegister($src2$$reg));
14325   %}
14326   ins_pipe(vdop128);
14327 %}
14328 
14329 instruct vadd2L(vecX dst, vecX src1, vecX src2)
14330 %{
14331   predicate(n->as_Vector()->length() == 2);
14332   match(Set dst (AddVL src1 src2));
14333   ins_cost(INSN_COST);
14334   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
14335   ins_encode %{
14336     __ addv(as_FloatRegister($dst$$reg), __ T2D,
14337             as_FloatRegister($src1$$reg),
14338             as_FloatRegister($src2$$reg));
14339   %}
14340   ins_pipe(vdop128);
14341 %}
14342 
14343 instruct vadd2F(vecD dst, vecD src1, vecD src2)
14344 %{
14345   predicate(n->as_Vector()->length() == 2);
14346   match(Set dst (AddVF src1 src2));
14347   ins_cost(INSN_COST);
14348   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
14349   ins_encode %{
14350     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
14351             as_FloatRegister($src1$$reg),
14352             as_FloatRegister($src2$$reg));
14353   %}
14354   ins_pipe(vdop_fp64);
14355 %}
14356 
14357 instruct vadd4F(vecX dst, vecX src1, vecX src2)
14358 %{
14359   predicate(n->as_Vector()->length() == 4);
14360   match(Set dst (AddVF src1 src2));
14361   ins_cost(INSN_COST);
14362   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
14363   ins_encode %{
14364     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
14365             as_FloatRegister($src1$$reg),
14366             as_FloatRegister($src2$$reg));
14367   %}
14368   ins_pipe(vdop_fp128);
14369 %}
14370 
14371 instruct vadd2D(vecX dst, vecX src1, vecX src2)
14372 %{
14373   match(Set dst (AddVD src1 src2));
14374   ins_cost(INSN_COST);
14375   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
14376   ins_encode %{
14377     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
14378             as_FloatRegister($src1$$reg),
14379             as_FloatRegister($src2$$reg));
14380   %}
14381   ins_pipe(vdop_fp128);
14382 %}
14383 
14384 // --------------------------------- SUB --------------------------------------
14385 
14386 instruct vsub8B(vecD dst, vecD src1, vecD src2)
14387 %{
14388   predicate(n->as_Vector()->length() == 4 ||
14389             n->as_Vector()->length() == 8);
14390   match(Set dst (SubVB src1 src2));
14391   ins_cost(INSN_COST);
14392   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
14393   ins_encode %{
14394     __ subv(as_FloatRegister($dst$$reg), __ T8B,
14395             as_FloatRegister($src1$$reg),
14396             as_FloatRegister($src2$$reg));
14397   %}
14398   ins_pipe(vdop64);
14399 %}
14400 
14401 instruct vsub16B(vecX dst, vecX src1, vecX src2)
14402 %{
14403   predicate(n->as_Vector()->length() == 16);
14404   match(Set dst (SubVB src1 src2));
14405   ins_cost(INSN_COST);
14406   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
14407   ins_encode %{
14408     __ subv(as_FloatRegister($dst$$reg), __ T16B,
14409             as_FloatRegister($src1$$reg),
14410             as_FloatRegister($src2$$reg));
14411   %}
14412   ins_pipe(vdop128);
14413 %}
14414 
14415 instruct vsub4S(vecD dst, vecD src1, vecD src2)
14416 %{
14417   predicate(n->as_Vector()->length() == 2 ||
14418             n->as_Vector()->length() == 4);
14419   match(Set dst (SubVS src1 src2));
14420   ins_cost(INSN_COST);
14421   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
14422   ins_encode %{
14423     __ subv(as_FloatRegister($dst$$reg), __ T4H,
14424             as_FloatRegister($src1$$reg),
14425             as_FloatRegister($src2$$reg));
14426   %}
14427   ins_pipe(vdop64);
14428 %}
14429 
14430 instruct vsub8S(vecX dst, vecX src1, vecX src2)
14431 %{
14432   predicate(n->as_Vector()->length() == 8);
14433   match(Set dst (SubVS src1 src2));
14434   ins_cost(INSN_COST);
14435   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
14436   ins_encode %{
14437     __ subv(as_FloatRegister($dst$$reg), __ T8H,
14438             as_FloatRegister($src1$$reg),
14439             as_FloatRegister($src2$$reg));
14440   %}
14441   ins_pipe(vdop128);
14442 %}
14443 
14444 instruct vsub2I(vecD dst, vecD src1, vecD src2)
14445 %{
14446   predicate(n->as_Vector()->length() == 2);
14447   match(Set dst (SubVI src1 src2));
14448   ins_cost(INSN_COST);
14449   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
14450   ins_encode %{
14451     __ subv(as_FloatRegister($dst$$reg), __ T2S,
14452             as_FloatRegister($src1$$reg),
14453             as_FloatRegister($src2$$reg));
14454   %}
14455   ins_pipe(vdop64);
14456 %}
14457 
14458 instruct vsub4I(vecX dst, vecX src1, vecX src2)
14459 %{
14460   predicate(n->as_Vector()->length() == 4);
14461   match(Set dst (SubVI src1 src2));
14462   ins_cost(INSN_COST);
14463   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
14464   ins_encode %{
14465     __ subv(as_FloatRegister($dst$$reg), __ T4S,
14466             as_FloatRegister($src1$$reg),
14467             as_FloatRegister($src2$$reg));
14468   %}
14469   ins_pipe(vdop128);
14470 %}
14471 
14472 instruct vsub2L(vecX dst, vecX src1, vecX src2)
14473 %{
14474   predicate(n->as_Vector()->length() == 2);
14475   match(Set dst (SubVL src1 src2));
14476   ins_cost(INSN_COST);
14477   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
14478   ins_encode %{
14479     __ subv(as_FloatRegister($dst$$reg), __ T2D,
14480             as_FloatRegister($src1$$reg),
14481             as_FloatRegister($src2$$reg));
14482   %}
14483   ins_pipe(vdop128);
14484 %}
14485 
14486 instruct vsub2F(vecD dst, vecD src1, vecD src2)
14487 %{
14488   predicate(n->as_Vector()->length() == 2);
14489   match(Set dst (SubVF src1 src2));
14490   ins_cost(INSN_COST);
14491   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
14492   ins_encode %{
14493     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
14494             as_FloatRegister($src1$$reg),
14495             as_FloatRegister($src2$$reg));
14496   %}
14497   ins_pipe(vdop_fp64);
14498 %}
14499 
14500 instruct vsub4F(vecX dst, vecX src1, vecX src2)
14501 %{
14502   predicate(n->as_Vector()->length() == 4);
14503   match(Set dst (SubVF src1 src2));
14504   ins_cost(INSN_COST);
14505   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
14506   ins_encode %{
14507     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
14508             as_FloatRegister($src1$$reg),
14509             as_FloatRegister($src2$$reg));
14510   %}
14511   ins_pipe(vdop_fp128);
14512 %}
14513 
14514 instruct vsub2D(vecX dst, vecX src1, vecX src2)
14515 %{
14516   predicate(n->as_Vector()->length() == 2);
14517   match(Set dst (SubVD src1 src2));
14518   ins_cost(INSN_COST);
14519   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
14520   ins_encode %{
14521     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
14522             as_FloatRegister($src1$$reg),
14523             as_FloatRegister($src2$$reg));
14524   %}
14525   ins_pipe(vdop_fp128);
14526 %}
14527 
14528 // --------------------------------- MUL --------------------------------------
14529 
14530 instruct vmul4S(vecD dst, vecD src1, vecD src2)
14531 %{
14532   predicate(n->as_Vector()->length() == 2 ||
14533             n->as_Vector()->length() == 4);
14534   match(Set dst (MulVS src1 src2));
14535   ins_cost(INSN_COST);
14536   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
14537   ins_encode %{
14538     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
14539             as_FloatRegister($src1$$reg),
14540             as_FloatRegister($src2$$reg));
14541   %}
14542   ins_pipe(vmul64);
14543 %}
14544 
14545 instruct vmul8S(vecX dst, vecX src1, vecX src2)
14546 %{
14547   predicate(n->as_Vector()->length() == 8);
14548   match(Set dst (MulVS src1 src2));
14549   ins_cost(INSN_COST);
14550   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
14551   ins_encode %{
14552     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
14553             as_FloatRegister($src1$$reg),
14554             as_FloatRegister($src2$$reg));
14555   %}
14556   ins_pipe(vmul128);
14557 %}
14558 
14559 instruct vmul2I(vecD dst, vecD src1, vecD src2)
14560 %{
14561   predicate(n->as_Vector()->length() == 2);
14562   match(Set dst (MulVI src1 src2));
14563   ins_cost(INSN_COST);
14564   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
14565   ins_encode %{
14566     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
14567             as_FloatRegister($src1$$reg),
14568             as_FloatRegister($src2$$reg));
14569   %}
14570   ins_pipe(vmul64);
14571 %}
14572 
14573 instruct vmul4I(vecX dst, vecX src1, vecX src2)
14574 %{
14575   predicate(n->as_Vector()->length() == 4);
14576   match(Set dst (MulVI src1 src2));
14577   ins_cost(INSN_COST);
14578   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
14579   ins_encode %{
14580     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
14581             as_FloatRegister($src1$$reg),
14582             as_FloatRegister($src2$$reg));
14583   %}
14584   ins_pipe(vmul128);
14585 %}
14586 
14587 instruct vmul2F(vecD dst, vecD src1, vecD src2)
14588 %{
14589   predicate(n->as_Vector()->length() == 2);
14590   match(Set dst (MulVF src1 src2));
14591   ins_cost(INSN_COST);
14592   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
14593   ins_encode %{
14594     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
14595             as_FloatRegister($src1$$reg),
14596             as_FloatRegister($src2$$reg));
14597   %}
14598   ins_pipe(vmuldiv_fp64);
14599 %}
14600 
14601 instruct vmul4F(vecX dst, vecX src1, vecX src2)
14602 %{
14603   predicate(n->as_Vector()->length() == 4);
14604   match(Set dst (MulVF src1 src2));
14605   ins_cost(INSN_COST);
14606   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
14607   ins_encode %{
14608     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
14609             as_FloatRegister($src1$$reg),
14610             as_FloatRegister($src2$$reg));
14611   %}
14612   ins_pipe(vmuldiv_fp128);
14613 %}
14614 
14615 instruct vmul2D(vecX dst, vecX src1, vecX src2)
14616 %{
14617   predicate(n->as_Vector()->length() == 2);
14618   match(Set dst (MulVD src1 src2));
14619   ins_cost(INSN_COST);
14620   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
14621   ins_encode %{
14622     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
14623             as_FloatRegister($src1$$reg),
14624             as_FloatRegister($src2$$reg));
14625   %}
14626   ins_pipe(vmuldiv_fp128);
14627 %}
14628 
14629 // --------------------------------- MLA --------------------------------------
14630 
14631 instruct vmla4S(vecD dst, vecD src1, vecD src2)
14632 %{
14633   predicate(n->as_Vector()->length() == 2 ||
14634             n->as_Vector()->length() == 4);
14635   match(Set dst (AddVS dst (MulVS src1 src2)));
14636   ins_cost(INSN_COST);
14637   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
14638   ins_encode %{
14639     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
14640             as_FloatRegister($src1$$reg),
14641             as_FloatRegister($src2$$reg));
14642   %}
14643   ins_pipe(vmla64);
14644 %}
14645 
14646 instruct vmla8S(vecX dst, vecX src1, vecX src2)
14647 %{
14648   predicate(n->as_Vector()->length() == 8);
14649   match(Set dst (AddVS dst (MulVS src1 src2)));
14650   ins_cost(INSN_COST);
14651   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
14652   ins_encode %{
14653     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
14654             as_FloatRegister($src1$$reg),
14655             as_FloatRegister($src2$$reg));
14656   %}
14657   ins_pipe(vmla128);
14658 %}
14659 
14660 instruct vmla2I(vecD dst, vecD src1, vecD src2)
14661 %{
14662   predicate(n->as_Vector()->length() == 2);
14663   match(Set dst (AddVI dst (MulVI src1 src2)));
14664   ins_cost(INSN_COST);
14665   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
14666   ins_encode %{
14667     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
14668             as_FloatRegister($src1$$reg),
14669             as_FloatRegister($src2$$reg));
14670   %}
14671   ins_pipe(vmla64);
14672 %}
14673 
14674 instruct vmla4I(vecX dst, vecX src1, vecX src2)
14675 %{
14676   predicate(n->as_Vector()->length() == 4);
14677   match(Set dst (AddVI dst (MulVI src1 src2)));
14678   ins_cost(INSN_COST);
14679   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
14680   ins_encode %{
14681     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
14682             as_FloatRegister($src1$$reg),
14683             as_FloatRegister($src2$$reg));
14684   %}
14685   ins_pipe(vmla128);
14686 %}
14687 
14688 // --------------------------------- MLS --------------------------------------
14689 
14690 instruct vmls4S(vecD dst, vecD src1, vecD src2)
14691 %{
14692   predicate(n->as_Vector()->length() == 2 ||
14693             n->as_Vector()->length() == 4);
14694   match(Set dst (SubVS dst (MulVS src1 src2)));
14695   ins_cost(INSN_COST);
14696   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
14697   ins_encode %{
14698     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
14699             as_FloatRegister($src1$$reg),
14700             as_FloatRegister($src2$$reg));
14701   %}
14702   ins_pipe(vmla64);
14703 %}
14704 
14705 instruct vmls8S(vecX dst, vecX src1, vecX src2)
14706 %{
14707   predicate(n->as_Vector()->length() == 8);
14708   match(Set dst (SubVS dst (MulVS src1 src2)));
14709   ins_cost(INSN_COST);
14710   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
14711   ins_encode %{
14712     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
14713             as_FloatRegister($src1$$reg),
14714             as_FloatRegister($src2$$reg));
14715   %}
14716   ins_pipe(vmla128);
14717 %}
14718 
14719 instruct vmls2I(vecD dst, vecD src1, vecD src2)
14720 %{
14721   predicate(n->as_Vector()->length() == 2);
14722   match(Set dst (SubVI dst (MulVI src1 src2)));
14723   ins_cost(INSN_COST);
14724   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
14725   ins_encode %{
14726     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
14727             as_FloatRegister($src1$$reg),
14728             as_FloatRegister($src2$$reg));
14729   %}
14730   ins_pipe(vmla64);
14731 %}
14732 
14733 instruct vmls4I(vecX dst, vecX src1, vecX src2)
14734 %{
14735   predicate(n->as_Vector()->length() == 4);
14736   match(Set dst (SubVI dst (MulVI src1 src2)));
14737   ins_cost(INSN_COST);
14738   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
14739   ins_encode %{
14740     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
14741             as_FloatRegister($src1$$reg),
14742             as_FloatRegister($src2$$reg));
14743   %}
14744   ins_pipe(vmla128);
14745 %}
14746 
14747 // --------------------------------- DIV --------------------------------------
14748 
14749 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
14750 %{
14751   predicate(n->as_Vector()->length() == 2);
14752   match(Set dst (DivVF src1 src2));
14753   ins_cost(INSN_COST);
14754   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
14755   ins_encode %{
14756     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
14757             as_FloatRegister($src1$$reg),
14758             as_FloatRegister($src2$$reg));
14759   %}
14760   ins_pipe(vmuldiv_fp64);
14761 %}
14762 
14763 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
14764 %{
14765   predicate(n->as_Vector()->length() == 4);
14766   match(Set dst (DivVF src1 src2));
14767   ins_cost(INSN_COST);
14768   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
14769   ins_encode %{
14770     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
14771             as_FloatRegister($src1$$reg),
14772             as_FloatRegister($src2$$reg));
14773   %}
14774   ins_pipe(vmuldiv_fp128);
14775 %}
14776 
14777 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
14778 %{
14779   predicate(n->as_Vector()->length() == 2);
14780   match(Set dst (DivVD src1 src2));
14781   ins_cost(INSN_COST);
14782   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
14783   ins_encode %{
14784     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
14785             as_FloatRegister($src1$$reg),
14786             as_FloatRegister($src2$$reg));
14787   %}
14788   ins_pipe(vmuldiv_fp128);
14789 %}
14790 
14791 // --------------------------------- AND --------------------------------------
14792 
14793 instruct vand8B(vecD dst, vecD src1, vecD src2)
14794 %{
14795   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14796             n->as_Vector()->length_in_bytes() == 8);
14797   match(Set dst (AndV src1 src2));
14798   ins_cost(INSN_COST);
14799   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14800   ins_encode %{
14801     __ andr(as_FloatRegister($dst$$reg), __ T8B,
14802             as_FloatRegister($src1$$reg),
14803             as_FloatRegister($src2$$reg));
14804   %}
14805   ins_pipe(vlogical64);
14806 %}
14807 
14808 instruct vand16B(vecX dst, vecX src1, vecX src2)
14809 %{
14810   predicate(n->as_Vector()->length_in_bytes() == 16);
14811   match(Set dst (AndV src1 src2));
14812   ins_cost(INSN_COST);
14813   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
14814   ins_encode %{
14815     __ andr(as_FloatRegister($dst$$reg), __ T16B,
14816             as_FloatRegister($src1$$reg),
14817             as_FloatRegister($src2$$reg));
14818   %}
14819   ins_pipe(vlogical128);
14820 %}
14821 
14822 // --------------------------------- OR ---------------------------------------
14823 
14824 instruct vor8B(vecD dst, vecD src1, vecD src2)
14825 %{
14826   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14827             n->as_Vector()->length_in_bytes() == 8);
14828   match(Set dst (OrV src1 src2));
14829   ins_cost(INSN_COST);
14830   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14831   ins_encode %{
14832     __ orr(as_FloatRegister($dst$$reg), __ T8B,
14833             as_FloatRegister($src1$$reg),
14834             as_FloatRegister($src2$$reg));
14835   %}
14836   ins_pipe(vlogical64);
14837 %}
14838 
14839 instruct vor16B(vecX dst, vecX src1, vecX src2)
14840 %{
14841   predicate(n->as_Vector()->length_in_bytes() == 16);
14842   match(Set dst (OrV src1 src2));
14843   ins_cost(INSN_COST);
14844   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
14845   ins_encode %{
14846     __ orr(as_FloatRegister($dst$$reg), __ T16B,
14847             as_FloatRegister($src1$$reg),
14848             as_FloatRegister($src2$$reg));
14849   %}
14850   ins_pipe(vlogical128);
14851 %}
14852 
14853 // --------------------------------- XOR --------------------------------------
14854 
14855 instruct vxor8B(vecD dst, vecD src1, vecD src2)
14856 %{
14857   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14858             n->as_Vector()->length_in_bytes() == 8);
14859   match(Set dst (XorV src1 src2));
14860   ins_cost(INSN_COST);
14861   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
14862   ins_encode %{
14863     __ eor(as_FloatRegister($dst$$reg), __ T8B,
14864             as_FloatRegister($src1$$reg),
14865             as_FloatRegister($src2$$reg));
14866   %}
14867   ins_pipe(vlogical64);
14868 %}
14869 
14870 instruct vxor16B(vecX dst, vecX src1, vecX src2)
14871 %{
14872   predicate(n->as_Vector()->length_in_bytes() == 16);
14873   match(Set dst (XorV src1 src2));
14874   ins_cost(INSN_COST);
14875   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
14876   ins_encode %{
14877     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14878             as_FloatRegister($src1$$reg),
14879             as_FloatRegister($src2$$reg));
14880   %}
14881   ins_pipe(vlogical128);
14882 %}
14883 
14884 // ------------------------------ Shift ---------------------------------------
14885 instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
14886   predicate(n->as_Vector()->length_in_bytes() == 8);
14887   match(Set dst (LShiftCntV cnt));
14888   match(Set dst (RShiftCntV cnt));
14889   format %{ "dup  $dst, $cnt\t# shift count vector (8B)" %}
14890   ins_encode %{
14891     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg));
14892   %}
14893   ins_pipe(vdup_reg_reg64);
14894 %}
14895 
14896 instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{
14897   predicate(n->as_Vector()->length_in_bytes() == 16);
14898   match(Set dst (LShiftCntV cnt));
14899   match(Set dst (RShiftCntV cnt));
14900   format %{ "dup  $dst, $cnt\t# shift count vector (16B)" %}
14901   ins_encode %{
14902     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
14903   %}
14904   ins_pipe(vdup_reg_reg128);
14905 %}
14906 
14907 instruct vsll8B(vecD dst, vecD src, vecD shift) %{
14908   predicate(n->as_Vector()->length() == 4 ||
14909             n->as_Vector()->length() == 8);
14910   match(Set dst (LShiftVB src shift));
14911   ins_cost(INSN_COST);
14912   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
14913   ins_encode %{
14914     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
14915             as_FloatRegister($src$$reg),
14916             as_FloatRegister($shift$$reg));
14917   %}
14918   ins_pipe(vshift64);
14919 %}
14920 
14921 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
14922   predicate(n->as_Vector()->length() == 16);
14923   match(Set dst (LShiftVB src shift));
14924   ins_cost(INSN_COST);
14925   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
14926   ins_encode %{
14927     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
14928             as_FloatRegister($src$$reg),
14929             as_FloatRegister($shift$$reg));
14930   %}
14931   ins_pipe(vshift128);
14932 %}
14933 
14934 // Right shifts with vector shift count on aarch64 SIMD are implemented
14935 // as left shift by negative shift count.
14936 // There are two cases for vector shift count.
14937 //
14938 // Case 1: The vector shift count is from replication.
14939 //        |            |
14940 //    LoadVector  RShiftCntV
14941 //        |       /
14942 //     RShiftVI
14943 // Note: In inner loop, multiple neg instructions are used, which can be
14944 // moved to outer loop and merge into one neg instruction.
14945 //
14946 // Case 2: The vector shift count is from loading.
14947 // This case isn't supported by middle-end now. But it's supported by
14948 // panama/vectorIntrinsics(JEP 338: Vector API).
14949 //        |            |
14950 //    LoadVector  LoadVector
14951 //        |       /
14952 //     RShiftVI
14953 //
14954 
14955 instruct vsra8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
14956   predicate(n->as_Vector()->length() == 4 ||
14957             n->as_Vector()->length() == 8);
14958   match(Set dst (RShiftVB src shift));
14959   ins_cost(INSN_COST);
14960   effect(TEMP tmp);
14961   format %{ "negr  $tmp,$shift\t"
14962             "sshl  $dst,$src,$tmp\t# vector (8B)" %}
14963   ins_encode %{
14964     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
14965             as_FloatRegister($shift$$reg));
14966     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
14967             as_FloatRegister($src$$reg),
14968             as_FloatRegister($tmp$$reg));
14969   %}
14970   ins_pipe(vshift64);
14971 %}
14972 
14973 instruct vsra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
14974   predicate(n->as_Vector()->length() == 16);
14975   match(Set dst (RShiftVB src shift));
14976   ins_cost(INSN_COST);
14977   effect(TEMP tmp);
14978   format %{ "negr  $tmp,$shift\t"
14979             "sshl  $dst,$src,$tmp\t# vector (16B)" %}
14980   ins_encode %{
14981     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
14982             as_FloatRegister($shift$$reg));
14983     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
14984             as_FloatRegister($src$$reg),
14985             as_FloatRegister($tmp$$reg));
14986   %}
14987   ins_pipe(vshift128);
14988 %}
14989 
14990 instruct vsrl8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
14991   predicate(n->as_Vector()->length() == 4 ||
14992             n->as_Vector()->length() == 8);
14993   match(Set dst (URShiftVB src shift));
14994   ins_cost(INSN_COST);
14995   effect(TEMP tmp);
14996   format %{ "negr  $tmp,$shift\t"
14997             "ushl  $dst,$src,$tmp\t# vector (8B)" %}
14998   ins_encode %{
14999     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15000             as_FloatRegister($shift$$reg));
15001     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
15002             as_FloatRegister($src$$reg),
15003             as_FloatRegister($tmp$$reg));
15004   %}
15005   ins_pipe(vshift64);
15006 %}
15007 
15008 instruct vsrl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
15009   predicate(n->as_Vector()->length() == 16);
15010   match(Set dst (URShiftVB src shift));
15011   ins_cost(INSN_COST);
15012   effect(TEMP tmp);
15013   format %{ "negr  $tmp,$shift\t"
15014             "ushl  $dst,$src,$tmp\t# vector (16B)" %}
15015   ins_encode %{
15016     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15017             as_FloatRegister($shift$$reg));
15018     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
15019             as_FloatRegister($src$$reg),
15020             as_FloatRegister($tmp$$reg));
15021   %}
15022   ins_pipe(vshift128);
15023 %}
15024 
15025 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
15026   predicate(n->as_Vector()->length() == 4 ||
15027             n->as_Vector()->length() == 8);
15028   match(Set dst (LShiftVB src shift));
15029   ins_cost(INSN_COST);
15030   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
15031   ins_encode %{
15032     int sh = (int)$shift$$constant & 31;
15033     if (sh >= 8) {
15034       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15035              as_FloatRegister($src$$reg),
15036              as_FloatRegister($src$$reg));
15037     } else {
15038       __ shl(as_FloatRegister($dst$$reg), __ T8B,
15039              as_FloatRegister($src$$reg), sh);
15040     }
15041   %}
15042   ins_pipe(vshift64_imm);
15043 %}
15044 
15045 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
15046   predicate(n->as_Vector()->length() == 16);
15047   match(Set dst (LShiftVB src shift));
15048   ins_cost(INSN_COST);
15049   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
15050   ins_encode %{
15051     int sh = (int)$shift$$constant & 31;
15052     if (sh >= 8) {
15053       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15054              as_FloatRegister($src$$reg),
15055              as_FloatRegister($src$$reg));
15056     } else {
15057       __ shl(as_FloatRegister($dst$$reg), __ T16B,
15058              as_FloatRegister($src$$reg), sh);
15059     }
15060   %}
15061   ins_pipe(vshift128_imm);
15062 %}
15063 
15064 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
15065   predicate(n->as_Vector()->length() == 4 ||
15066             n->as_Vector()->length() == 8);
15067   match(Set dst (RShiftVB src shift));
15068   ins_cost(INSN_COST);
15069   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
15070   ins_encode %{
15071     int sh = (int)$shift$$constant & 31;
15072     if (sh >= 8) sh = 7;
15073     sh = -sh & 7;
15074     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
15075            as_FloatRegister($src$$reg), sh);
15076   %}
15077   ins_pipe(vshift64_imm);
15078 %}
15079 
15080 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
15081   predicate(n->as_Vector()->length() == 16);
15082   match(Set dst (RShiftVB src shift));
15083   ins_cost(INSN_COST);
15084   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
15085   ins_encode %{
15086     int sh = (int)$shift$$constant & 31;
15087     if (sh >= 8) sh = 7;
15088     sh = -sh & 7;
15089     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
15090            as_FloatRegister($src$$reg), sh);
15091   %}
15092   ins_pipe(vshift128_imm);
15093 %}
15094 
15095 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
15096   predicate(n->as_Vector()->length() == 4 ||
15097             n->as_Vector()->length() == 8);
15098   match(Set dst (URShiftVB src shift));
15099   ins_cost(INSN_COST);
15100   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
15101   ins_encode %{
15102     int sh = (int)$shift$$constant & 31;
15103     if (sh >= 8) {
15104       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15105              as_FloatRegister($src$$reg),
15106              as_FloatRegister($src$$reg));
15107     } else {
15108       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
15109              as_FloatRegister($src$$reg), -sh & 7);
15110     }
15111   %}
15112   ins_pipe(vshift64_imm);
15113 %}
15114 
15115 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
15116   predicate(n->as_Vector()->length() == 16);
15117   match(Set dst (URShiftVB src shift));
15118   ins_cost(INSN_COST);
15119   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
15120   ins_encode %{
15121     int sh = (int)$shift$$constant & 31;
15122     if (sh >= 8) {
15123       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15124              as_FloatRegister($src$$reg),
15125              as_FloatRegister($src$$reg));
15126     } else {
15127       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
15128              as_FloatRegister($src$$reg), -sh & 7);
15129     }
15130   %}
15131   ins_pipe(vshift128_imm);
15132 %}
15133 
15134 instruct vsll4S(vecD dst, vecD src, vecD shift) %{
15135   predicate(n->as_Vector()->length() == 2 ||
15136             n->as_Vector()->length() == 4);
15137   match(Set dst (LShiftVS src shift));
15138   ins_cost(INSN_COST);
15139   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
15140   ins_encode %{
15141     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
15142             as_FloatRegister($src$$reg),
15143             as_FloatRegister($shift$$reg));
15144   %}
15145   ins_pipe(vshift64);
15146 %}
15147 
15148 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
15149   predicate(n->as_Vector()->length() == 8);
15150   match(Set dst (LShiftVS src shift));
15151   ins_cost(INSN_COST);
15152   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
15153   ins_encode %{
15154     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
15155             as_FloatRegister($src$$reg),
15156             as_FloatRegister($shift$$reg));
15157   %}
15158   ins_pipe(vshift128);
15159 %}
15160 
15161 instruct vsra4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
15162   predicate(n->as_Vector()->length() == 2 ||
15163             n->as_Vector()->length() == 4);
15164   match(Set dst (RShiftVS src shift));
15165   ins_cost(INSN_COST);
15166   effect(TEMP tmp);
15167   format %{ "negr  $tmp,$shift\t"
15168             "sshl  $dst,$src,$tmp\t# vector (4H)" %}
15169   ins_encode %{
15170     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15171             as_FloatRegister($shift$$reg));
15172     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
15173             as_FloatRegister($src$$reg),
15174             as_FloatRegister($tmp$$reg));
15175   %}
15176   ins_pipe(vshift64);
15177 %}
15178 
15179 instruct vsra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
15180   predicate(n->as_Vector()->length() == 8);
15181   match(Set dst (RShiftVS src shift));
15182   ins_cost(INSN_COST);
15183   effect(TEMP tmp);
15184   format %{ "negr  $tmp,$shift\t"
15185             "sshl  $dst,$src,$tmp\t# vector (8H)" %}
15186   ins_encode %{
15187     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15188             as_FloatRegister($shift$$reg));
15189     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
15190             as_FloatRegister($src$$reg),
15191             as_FloatRegister($tmp$$reg));
15192   %}
15193   ins_pipe(vshift128);
15194 %}
15195 
15196 instruct vsrl4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
15197   predicate(n->as_Vector()->length() == 2 ||
15198             n->as_Vector()->length() == 4);
15199   match(Set dst (URShiftVS src shift));
15200   ins_cost(INSN_COST);
15201   effect(TEMP tmp);
15202   format %{ "negr  $tmp,$shift\t"
15203             "ushl  $dst,$src,$tmp\t# vector (4H)" %}
15204   ins_encode %{
15205     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15206             as_FloatRegister($shift$$reg));
15207     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
15208             as_FloatRegister($src$$reg),
15209             as_FloatRegister($tmp$$reg));
15210   %}
15211   ins_pipe(vshift64);
15212 %}
15213 
15214 instruct vsrl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
15215   predicate(n->as_Vector()->length() == 8);
15216   match(Set dst (URShiftVS src shift));
15217   ins_cost(INSN_COST);
15218   effect(TEMP tmp);
15219   format %{ "negr  $tmp,$shift\t"
15220             "ushl  $dst,$src,$tmp\t# vector (8H)" %}
15221   ins_encode %{
15222     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15223             as_FloatRegister($shift$$reg));
15224     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
15225             as_FloatRegister($src$$reg),
15226             as_FloatRegister($tmp$$reg));
15227   %}
15228   ins_pipe(vshift128);
15229 %}
15230 
15231 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
15232   predicate(n->as_Vector()->length() == 2 ||
15233             n->as_Vector()->length() == 4);
15234   match(Set dst (LShiftVS src shift));
15235   ins_cost(INSN_COST);
15236   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
15237   ins_encode %{
15238     int sh = (int)$shift$$constant & 31;
15239     if (sh >= 16) {
15240       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15241              as_FloatRegister($src$$reg),
15242              as_FloatRegister($src$$reg));
15243     } else {
15244       __ shl(as_FloatRegister($dst$$reg), __ T4H,
15245              as_FloatRegister($src$$reg), sh);
15246     }
15247   %}
15248   ins_pipe(vshift64_imm);
15249 %}
15250 
15251 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
15252   predicate(n->as_Vector()->length() == 8);
15253   match(Set dst (LShiftVS src shift));
15254   ins_cost(INSN_COST);
15255   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
15256   ins_encode %{
15257     int sh = (int)$shift$$constant & 31;
15258     if (sh >= 16) {
15259       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15260              as_FloatRegister($src$$reg),
15261              as_FloatRegister($src$$reg));
15262     } else {
15263       __ shl(as_FloatRegister($dst$$reg), __ T8H,
15264              as_FloatRegister($src$$reg), sh);
15265     }
15266   %}
15267   ins_pipe(vshift128_imm);
15268 %}
15269 
15270 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
15271   predicate(n->as_Vector()->length() == 2 ||
15272             n->as_Vector()->length() == 4);
15273   match(Set dst (RShiftVS src shift));
15274   ins_cost(INSN_COST);
15275   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
15276   ins_encode %{
15277     int sh = (int)$shift$$constant & 31;
15278     if (sh >= 16) sh = 15;
15279     sh = -sh & 15;
15280     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
15281            as_FloatRegister($src$$reg), sh);
15282   %}
15283   ins_pipe(vshift64_imm);
15284 %}
15285 
15286 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
15287   predicate(n->as_Vector()->length() == 8);
15288   match(Set dst (RShiftVS src shift));
15289   ins_cost(INSN_COST);
15290   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
15291   ins_encode %{
15292     int sh = (int)$shift$$constant & 31;
15293     if (sh >= 16) sh = 15;
15294     sh = -sh & 15;
15295     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
15296            as_FloatRegister($src$$reg), sh);
15297   %}
15298   ins_pipe(vshift128_imm);
15299 %}
15300 
15301 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
15302   predicate(n->as_Vector()->length() == 2 ||
15303             n->as_Vector()->length() == 4);
15304   match(Set dst (URShiftVS src shift));
15305   ins_cost(INSN_COST);
15306   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
15307   ins_encode %{
15308     int sh = (int)$shift$$constant & 31;
15309     if (sh >= 16) {
15310       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15311              as_FloatRegister($src$$reg),
15312              as_FloatRegister($src$$reg));
15313     } else {
15314       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
15315              as_FloatRegister($src$$reg), -sh & 15);
15316     }
15317   %}
15318   ins_pipe(vshift64_imm);
15319 %}
15320 
15321 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
15322   predicate(n->as_Vector()->length() == 8);
15323   match(Set dst (URShiftVS src shift));
15324   ins_cost(INSN_COST);
15325   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
15326   ins_encode %{
15327     int sh = (int)$shift$$constant & 31;
15328     if (sh >= 16) {
15329       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15330              as_FloatRegister($src$$reg),
15331              as_FloatRegister($src$$reg));
15332     } else {
15333       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
15334              as_FloatRegister($src$$reg), -sh & 15);
15335     }
15336   %}
15337   ins_pipe(vshift128_imm);
15338 %}
15339 
15340 instruct vsll2I(vecD dst, vecD src, vecD shift) %{
15341   predicate(n->as_Vector()->length() == 2);
15342   match(Set dst (LShiftVI src shift));
15343   ins_cost(INSN_COST);
15344   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
15345   ins_encode %{
15346     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
15347             as_FloatRegister($src$$reg),
15348             as_FloatRegister($shift$$reg));
15349   %}
15350   ins_pipe(vshift64);
15351 %}
15352 
15353 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
15354   predicate(n->as_Vector()->length() == 4);
15355   match(Set dst (LShiftVI src shift));
15356   ins_cost(INSN_COST);
15357   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
15358   ins_encode %{
15359     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
15360             as_FloatRegister($src$$reg),
15361             as_FloatRegister($shift$$reg));
15362   %}
15363   ins_pipe(vshift128);
15364 %}
15365 
15366 instruct vsra2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
15367   predicate(n->as_Vector()->length() == 2);
15368   match(Set dst (RShiftVI src shift));
15369   ins_cost(INSN_COST);
15370   effect(TEMP tmp);
15371   format %{ "negr  $tmp,$shift\t"
15372             "sshl  $dst,$src,$tmp\t# vector (2S)" %}
15373   ins_encode %{
15374     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15375             as_FloatRegister($shift$$reg));
15376     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
15377             as_FloatRegister($src$$reg),
15378             as_FloatRegister($tmp$$reg));
15379   %}
15380   ins_pipe(vshift64);
15381 %}
15382 
15383 instruct vsra4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
15384   predicate(n->as_Vector()->length() == 4);
15385   match(Set dst (RShiftVI src shift));
15386   ins_cost(INSN_COST);
15387   effect(TEMP tmp);
15388   format %{ "negr  $tmp,$shift\t"
15389             "sshl  $dst,$src,$tmp\t# vector (4S)" %}
15390   ins_encode %{
15391     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15392             as_FloatRegister($shift$$reg));
15393     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
15394             as_FloatRegister($src$$reg),
15395             as_FloatRegister($tmp$$reg));
15396   %}
15397   ins_pipe(vshift128);
15398 %}
15399 
15400 instruct vsrl2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
15401   predicate(n->as_Vector()->length() == 2);
15402   match(Set dst (URShiftVI src shift));
15403   ins_cost(INSN_COST);
15404   effect(TEMP tmp);
15405   format %{ "negr  $tmp,$shift\t"
15406             "ushl  $dst,$src,$tmp\t# vector (2S)" %}
15407   ins_encode %{
15408     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15409             as_FloatRegister($shift$$reg));
15410     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
15411             as_FloatRegister($src$$reg),
15412             as_FloatRegister($tmp$$reg));
15413   %}
15414   ins_pipe(vshift64);
15415 %}
15416 
15417 instruct vsrl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
15418   predicate(n->as_Vector()->length() == 4);
15419   match(Set dst (URShiftVI src shift));
15420   ins_cost(INSN_COST);
15421   effect(TEMP tmp);
15422   format %{ "negr  $tmp,$shift\t"
15423             "ushl  $dst,$src,$tmp\t# vector (4S)" %}
15424   ins_encode %{
15425     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15426             as_FloatRegister($shift$$reg));
15427     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
15428             as_FloatRegister($src$$reg),
15429             as_FloatRegister($tmp$$reg));
15430   %}
15431   ins_pipe(vshift128);
15432 %}
15433 
15434 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
15435   predicate(n->as_Vector()->length() == 2);
15436   match(Set dst (LShiftVI src shift));
15437   ins_cost(INSN_COST);
15438   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
15439   ins_encode %{
15440     __ shl(as_FloatRegister($dst$$reg), __ T2S,
15441            as_FloatRegister($src$$reg),
15442            (int)$shift$$constant & 31);
15443   %}
15444   ins_pipe(vshift64_imm);
15445 %}
15446 
15447 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
15448   predicate(n->as_Vector()->length() == 4);
15449   match(Set dst (LShiftVI src shift));
15450   ins_cost(INSN_COST);
15451   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
15452   ins_encode %{
15453     __ shl(as_FloatRegister($dst$$reg), __ T4S,
15454            as_FloatRegister($src$$reg),
15455            (int)$shift$$constant & 31);
15456   %}
15457   ins_pipe(vshift128_imm);
15458 %}
15459 
15460 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
15461   predicate(n->as_Vector()->length() == 2);
15462   match(Set dst (RShiftVI src shift));
15463   ins_cost(INSN_COST);
15464   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
15465   ins_encode %{
15466     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
15467             as_FloatRegister($src$$reg),
15468             -(int)$shift$$constant & 31);
15469   %}
15470   ins_pipe(vshift64_imm);
15471 %}
15472 
15473 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
15474   predicate(n->as_Vector()->length() == 4);
15475   match(Set dst (RShiftVI src shift));
15476   ins_cost(INSN_COST);
15477   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
15478   ins_encode %{
15479     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
15480             as_FloatRegister($src$$reg),
15481             -(int)$shift$$constant & 31);
15482   %}
15483   ins_pipe(vshift128_imm);
15484 %}
15485 
15486 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
15487   predicate(n->as_Vector()->length() == 2);
15488   match(Set dst (URShiftVI src shift));
15489   ins_cost(INSN_COST);
15490   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
15491   ins_encode %{
15492     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
15493             as_FloatRegister($src$$reg),
15494             -(int)$shift$$constant & 31);
15495   %}
15496   ins_pipe(vshift64_imm);
15497 %}
15498 
15499 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
15500   predicate(n->as_Vector()->length() == 4);
15501   match(Set dst (URShiftVI src shift));
15502   ins_cost(INSN_COST);
15503   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
15504   ins_encode %{
15505     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
15506             as_FloatRegister($src$$reg),
15507             -(int)$shift$$constant & 31);
15508   %}
15509   ins_pipe(vshift128_imm);
15510 %}
15511 
15512 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
15513   predicate(n->as_Vector()->length() == 2);
15514   match(Set dst (LShiftVL src shift));
15515   ins_cost(INSN_COST);
15516   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
15517   ins_encode %{
15518     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
15519             as_FloatRegister($src$$reg),
15520             as_FloatRegister($shift$$reg));
15521   %}
15522   ins_pipe(vshift128);
15523 %}
15524 
15525 instruct vsra2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
15526   predicate(n->as_Vector()->length() == 2);
15527   match(Set dst (RShiftVL src shift));
15528   ins_cost(INSN_COST);
15529   effect(TEMP tmp);
15530   format %{ "negr  $tmp,$shift\t"
15531             "sshl  $dst,$src,$tmp\t# vector (2D)" %}
15532   ins_encode %{
15533     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15534             as_FloatRegister($shift$$reg));
15535     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
15536             as_FloatRegister($src$$reg),
15537             as_FloatRegister($tmp$$reg));
15538   %}
15539   ins_pipe(vshift128);
15540 %}
15541 
15542 instruct vsrl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
15543   predicate(n->as_Vector()->length() == 2);
15544   match(Set dst (URShiftVL src shift));
15545   ins_cost(INSN_COST);
15546   effect(TEMP tmp);
15547   format %{ "negr  $tmp,$shift\t"
15548             "ushl  $dst,$src,$tmp\t# vector (2D)" %}
15549   ins_encode %{
15550     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15551             as_FloatRegister($shift$$reg));
15552     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
15553             as_FloatRegister($src$$reg),
15554             as_FloatRegister($tmp$$reg));
15555   %}
15556   ins_pipe(vshift128);
15557 %}
15558 
15559 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
15560   predicate(n->as_Vector()->length() == 2);
15561   match(Set dst (LShiftVL src shift));
15562   ins_cost(INSN_COST);
15563   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
15564   ins_encode %{
15565     __ shl(as_FloatRegister($dst$$reg), __ T2D,
15566            as_FloatRegister($src$$reg),
15567            (int)$shift$$constant & 63);
15568   %}
15569   ins_pipe(vshift128_imm);
15570 %}
15571 
15572 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
15573   predicate(n->as_Vector()->length() == 2);
15574   match(Set dst (RShiftVL src shift));
15575   ins_cost(INSN_COST);
15576   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
15577   ins_encode %{
15578     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
15579             as_FloatRegister($src$$reg),
15580             -(int)$shift$$constant & 63);
15581   %}
15582   ins_pipe(vshift128_imm);
15583 %}
15584 
15585 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
15586   predicate(n->as_Vector()->length() == 2);
15587   match(Set dst (URShiftVL src shift));
15588   ins_cost(INSN_COST);
15589   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
15590   ins_encode %{
15591     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
15592             as_FloatRegister($src$$reg),
15593             -(int)$shift$$constant & 63);
15594   %}
15595   ins_pipe(vshift128_imm);
15596 %}
15597 
15598 //----------PEEPHOLE RULES-----------------------------------------------------
15599 // These must follow all instruction definitions as they use the names
15600 // defined in the instructions definitions.
15601 //
15602 // peepmatch ( root_instr_name [preceding_instruction]* );
15603 //
15604 // peepconstraint %{
15605 // (instruction_number.operand_name relational_op instruction_number.operand_name
15606 //  [, ...] );
15607 // // instruction numbers are zero-based using left to right order in peepmatch
15608 //
15609 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
15610 // // provide an instruction_number.operand_name for each operand that appears
15611 // // in the replacement instruction's match rule
15612 //
15613 // ---------VM FLAGS---------------------------------------------------------
15614 //
15615 // All peephole optimizations can be turned off using -XX:-OptoPeephole
15616 //
15617 // Each peephole rule is given an identifying number starting with zero and
15618 // increasing by one in the order seen by the parser.  An individual peephole
15619 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
15620 // on the command-line.
15621 //
15622 // ---------CURRENT LIMITATIONS----------------------------------------------
15623 //
15624 // Only match adjacent instructions in same basic block
15625 // Only equality constraints
15626 // Only constraints between operands, not (0.dest_reg == RAX_enc)
15627 // Only one replacement instruction
15628 //
15629 // ---------EXAMPLE----------------------------------------------------------
15630 //
15631 // // pertinent parts of existing instructions in architecture description
15632 // instruct movI(iRegINoSp dst, iRegI src)
15633 // %{
15634 //   match(Set dst (CopyI src));
15635 // %}
15636 //
15637 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
15638 // %{
15639 //   match(Set dst (AddI dst src));
15640 //   effect(KILL cr);
15641 // %}
15642 //
15643 // // Change (inc mov) to lea
15644 // peephole %{
15645 //   // increment preceeded by register-register move
15646 //   peepmatch ( incI_iReg movI );
15647 //   // require that the destination register of the increment
15648 //   // match the destination register of the move
15649 //   peepconstraint ( 0.dst == 1.dst );
15650 //   // construct a replacement instruction that sets
15651 //   // the destination to ( move's source register + one )
15652 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
15653 // %}
15654 //
15655 
15656 // Implementation no longer uses movX instructions since
15657 // machine-independent system no longer uses CopyX nodes.
15658 //
15659 // peephole
15660 // %{
15661 //   peepmatch (incI_iReg movI);
15662 //   peepconstraint (0.dst == 1.dst);
15663 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15664 // %}
15665 
15666 // peephole
15667 // %{
15668 //   peepmatch (decI_iReg movI);
15669 //   peepconstraint (0.dst == 1.dst);
15670 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15671 // %}
15672 
15673 // peephole
15674 // %{
15675 //   peepmatch (addI_iReg_imm movI);
15676 //   peepconstraint (0.dst == 1.dst);
15677 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15678 // %}
15679 
15680 // peephole
15681 // %{
15682 //   peepmatch (incL_iReg movL);
15683 //   peepconstraint (0.dst == 1.dst);
15684 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15685 // %}
15686 
15687 // peephole
15688 // %{
15689 //   peepmatch (decL_iReg movL);
15690 //   peepconstraint (0.dst == 1.dst);
15691 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15692 // %}
15693 
15694 // peephole
15695 // %{
15696 //   peepmatch (addL_iReg_imm movL);
15697 //   peepconstraint (0.dst == 1.dst);
15698 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15699 // %}
15700 
15701 // peephole
15702 // %{
15703 //   peepmatch (addP_iReg_imm movP);
15704 //   peepconstraint (0.dst == 1.dst);
15705 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
15706 // %}
15707 
15708 // // Change load of spilled value to only a spill
15709 // instruct storeI(memory mem, iRegI src)
15710 // %{
15711 //   match(Set mem (StoreI mem src));
15712 // %}
15713 //
15714 // instruct loadI(iRegINoSp dst, memory mem)
15715 // %{
15716 //   match(Set dst (LoadI mem));
15717 // %}
15718 //
15719 
15720 //----------SMARTSPILL RULES---------------------------------------------------
15721 // These must follow all instruction definitions as they use the names
15722 // defined in the instructions definitions.
15723 
15724 // Local Variables:
15725 // mode: c++
15726 // End: