1 //
   2 // Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, 2019, Red Hat, Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "asm/macroAssembler.hpp"
 999 #include "gc/shared/cardTable.hpp"
1000 #include "gc/shared/cardTableBarrierSet.hpp"
1001 #include "gc/shared/collectedHeap.hpp"
1002 #include "opto/addnode.hpp"
1003 
1004 class CallStubImpl {
1005 
1006   //--------------------------------------------------------------
1007   //---<  Used for optimization in Compile::shorten_branches  >---
1008   //--------------------------------------------------------------
1009 
1010  public:
1011   // Size of call trampoline stub.
1012   static uint size_call_trampoline() {
1013     return 0; // no call trampolines on this platform
1014   }
1015 
1016   // number of relocations needed by a call trampoline stub
1017   static uint reloc_call_trampoline() {
1018     return 0; // no call trampolines on this platform
1019   }
1020 };
1021 
1022 class HandlerImpl {
1023 
1024  public:
1025 
1026   static int emit_exception_handler(CodeBuffer &cbuf);
1027   static int emit_deopt_handler(CodeBuffer& cbuf);
1028 
1029   static uint size_exception_handler() {
1030     return MacroAssembler::far_branch_size();
1031   }
1032 
1033   static uint size_deopt_handler() {
1034     // count one adr and one far branch instruction
1035     return 4 * NativeInstruction::instruction_size;
1036   }
1037 };
1038 
1039   bool is_CAS(int opcode);
1040 
1041   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1042 
1043   bool unnecessary_acquire(const Node *barrier);
1044   bool needs_acquiring_load(const Node *load);
1045 
1046   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1047 
1048   bool unnecessary_release(const Node *barrier);
1049   bool unnecessary_volatile(const Node *barrier);
1050   bool needs_releasing_store(const Node *store);
1051 
1052   // predicate controlling translation of CompareAndSwapX
1053   bool needs_acquiring_load_exclusive(const Node *load);
1054 
1055   // predicate controlling translation of StoreCM
1056   bool unnecessary_storestore(const Node *storecm);
1057 
1058   // predicate controlling addressing modes
1059   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1060 %}
1061 
1062 source %{
1063 
1064   // Optimizaton of volatile gets and puts
1065   // -------------------------------------
1066   //
1067   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1068   // use to implement volatile reads and writes. For a volatile read
1069   // we simply need
1070   //
1071   //   ldar<x>
1072   //
1073   // and for a volatile write we need
1074   //
1075   //   stlr<x>
1076   //
1077   // Alternatively, we can implement them by pairing a normal
1078   // load/store with a memory barrier. For a volatile read we need
1079   //
1080   //   ldr<x>
1081   //   dmb ishld
1082   //
1083   // for a volatile write
1084   //
1085   //   dmb ish
1086   //   str<x>
1087   //   dmb ish
1088   //
1089   // We can also use ldaxr and stlxr to implement compare and swap CAS
1090   // sequences. These are normally translated to an instruction
1091   // sequence like the following
1092   //
1093   //   dmb      ish
1094   // retry:
1095   //   ldxr<x>   rval raddr
1096   //   cmp       rval rold
1097   //   b.ne done
1098   //   stlxr<x>  rval, rnew, rold
1099   //   cbnz      rval retry
1100   // done:
1101   //   cset      r0, eq
1102   //   dmb ishld
1103   //
1104   // Note that the exclusive store is already using an stlxr
1105   // instruction. That is required to ensure visibility to other
1106   // threads of the exclusive write (assuming it succeeds) before that
1107   // of any subsequent writes.
1108   //
1109   // The following instruction sequence is an improvement on the above
1110   //
1111   // retry:
1112   //   ldaxr<x>  rval raddr
1113   //   cmp       rval rold
1114   //   b.ne done
1115   //   stlxr<x>  rval, rnew, rold
1116   //   cbnz      rval retry
1117   // done:
1118   //   cset      r0, eq
1119   //
1120   // We don't need the leading dmb ish since the stlxr guarantees
1121   // visibility of prior writes in the case that the swap is
1122   // successful. Crucially we don't have to worry about the case where
1123   // the swap is not successful since no valid program should be
1124   // relying on visibility of prior changes by the attempting thread
1125   // in the case where the CAS fails.
1126   //
1127   // Similarly, we don't need the trailing dmb ishld if we substitute
1128   // an ldaxr instruction since that will provide all the guarantees we
1129   // require regarding observation of changes made by other threads
1130   // before any change to the CAS address observed by the load.
1131   //
1132   // In order to generate the desired instruction sequence we need to
1133   // be able to identify specific 'signature' ideal graph node
1134   // sequences which i) occur as a translation of a volatile reads or
1135   // writes or CAS operations and ii) do not occur through any other
1136   // translation or graph transformation. We can then provide
1137   // alternative aldc matching rules which translate these node
1138   // sequences to the desired machine code sequences. Selection of the
1139   // alternative rules can be implemented by predicates which identify
1140   // the relevant node sequences.
1141   //
1142   // The ideal graph generator translates a volatile read to the node
1143   // sequence
1144   //
1145   //   LoadX[mo_acquire]
1146   //   MemBarAcquire
1147   //
1148   // As a special case when using the compressed oops optimization we
1149   // may also see this variant
1150   //
1151   //   LoadN[mo_acquire]
1152   //   DecodeN
1153   //   MemBarAcquire
1154   //
1155   // A volatile write is translated to the node sequence
1156   //
1157   //   MemBarRelease
1158   //   StoreX[mo_release] {CardMark}-optional
1159   //   MemBarVolatile
1160   //
1161   // n.b. the above node patterns are generated with a strict
1162   // 'signature' configuration of input and output dependencies (see
1163   // the predicates below for exact details). The card mark may be as
1164   // simple as a few extra nodes or, in a few GC configurations, may
1165   // include more complex control flow between the leading and
1166   // trailing memory barriers. However, whatever the card mark
1167   // configuration these signatures are unique to translated volatile
1168   // reads/stores -- they will not appear as a result of any other
1169   // bytecode translation or inlining nor as a consequence of
1170   // optimizing transforms.
1171   //
1172   // We also want to catch inlined unsafe volatile gets and puts and
1173   // be able to implement them using either ldar<x>/stlr<x> or some
1174   // combination of ldr<x>/stlr<x> and dmb instructions.
1175   //
1176   // Inlined unsafe volatiles puts manifest as a minor variant of the
1177   // normal volatile put node sequence containing an extra cpuorder
1178   // membar
1179   //
1180   //   MemBarRelease
1181   //   MemBarCPUOrder
1182   //   StoreX[mo_release] {CardMark}-optional
1183   //   MemBarCPUOrder
1184   //   MemBarVolatile
1185   //
1186   // n.b. as an aside, a cpuorder membar is not itself subject to
1187   // matching and translation by adlc rules.  However, the rule
1188   // predicates need to detect its presence in order to correctly
1189   // select the desired adlc rules.
1190   //
1191   // Inlined unsafe volatile gets manifest as a slightly different
1192   // node sequence to a normal volatile get because of the
1193   // introduction of some CPUOrder memory barriers to bracket the
1194   // Load. However, but the same basic skeleton of a LoadX feeding a
1195   // MemBarAcquire, possibly thorugh an optional DecodeN, is still
1196   // present
1197   //
1198   //   MemBarCPUOrder
1199   //        ||       \\
1200   //   MemBarCPUOrder LoadX[mo_acquire]
1201   //        ||            |
1202   //        ||       {DecodeN} optional
1203   //        ||       /
1204   //     MemBarAcquire
1205   //
1206   // In this case the acquire membar does not directly depend on the
1207   // load. However, we can be sure that the load is generated from an
1208   // inlined unsafe volatile get if we see it dependent on this unique
1209   // sequence of membar nodes. Similarly, given an acquire membar we
1210   // can know that it was added because of an inlined unsafe volatile
1211   // get if it is fed and feeds a cpuorder membar and if its feed
1212   // membar also feeds an acquiring load.
1213   //
1214   // Finally an inlined (Unsafe) CAS operation is translated to the
1215   // following ideal graph
1216   //
1217   //   MemBarRelease
1218   //   MemBarCPUOrder
1219   //   CompareAndSwapX {CardMark}-optional
1220   //   MemBarCPUOrder
1221   //   MemBarAcquire
1222   //
1223   // So, where we can identify these volatile read and write
1224   // signatures we can choose to plant either of the above two code
1225   // sequences. For a volatile read we can simply plant a normal
1226   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1227   // also choose to inhibit translation of the MemBarAcquire and
1228   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1229   //
1230   // When we recognise a volatile store signature we can choose to
1231   // plant at a dmb ish as a translation for the MemBarRelease, a
1232   // normal str<x> and then a dmb ish for the MemBarVolatile.
1233   // Alternatively, we can inhibit translation of the MemBarRelease
1234   // and MemBarVolatile and instead plant a simple stlr<x>
1235   // instruction.
1236   //
1237   // when we recognise a CAS signature we can choose to plant a dmb
1238   // ish as a translation for the MemBarRelease, the conventional
1239   // macro-instruction sequence for the CompareAndSwap node (which
1240   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1241   // Alternatively, we can elide generation of the dmb instructions
1242   // and plant the alternative CompareAndSwap macro-instruction
1243   // sequence (which uses ldaxr<x>).
1244   //
1245   // Of course, the above only applies when we see these signature
1246   // configurations. We still want to plant dmb instructions in any
1247   // other cases where we may see a MemBarAcquire, MemBarRelease or
1248   // MemBarVolatile. For example, at the end of a constructor which
1249   // writes final/volatile fields we will see a MemBarRelease
1250   // instruction and this needs a 'dmb ish' lest we risk the
1251   // constructed object being visible without making the
1252   // final/volatile field writes visible.
1253   //
1254   // n.b. the translation rules below which rely on detection of the
1255   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1256   // If we see anything other than the signature configurations we
1257   // always just translate the loads and stores to ldr<x> and str<x>
1258   // and translate acquire, release and volatile membars to the
1259   // relevant dmb instructions.
1260   //
1261 
1262   // is_CAS(int opcode)
1263   //
1264   // return true if opcode is one of the possible CompareAndSwapX
1265   // values otherwise false.
1266 
1267   bool is_CAS(int opcode)
1268   {
1269     switch(opcode) {
1270       // We handle these
1271     case Op_CompareAndSwapI:
1272     case Op_CompareAndSwapL:
1273     case Op_CompareAndSwapP:
1274     case Op_CompareAndSwapN:
1275  // case Op_CompareAndSwapB:
1276  // case Op_CompareAndSwapS:
1277       return true;
1278       // These are TBD
1279     case Op_WeakCompareAndSwapB:
1280     case Op_WeakCompareAndSwapS:
1281     case Op_WeakCompareAndSwapI:
1282     case Op_WeakCompareAndSwapL:
1283     case Op_WeakCompareAndSwapP:
1284     case Op_WeakCompareAndSwapN:
1285     case Op_CompareAndExchangeB:
1286     case Op_CompareAndExchangeS:
1287     case Op_CompareAndExchangeI:
1288     case Op_CompareAndExchangeL:
1289     case Op_CompareAndExchangeP:
1290     case Op_CompareAndExchangeN:
1291       return false;
1292     default:
1293       return false;
1294     }
1295   }
1296 
1297   // helper to determine the maximum number of Phi nodes we may need to
1298   // traverse when searching from a card mark membar for the merge mem
1299   // feeding a trailing membar or vice versa
1300 
1301 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1302 
1303 bool unnecessary_acquire(const Node *barrier)
1304 {
1305   assert(barrier->is_MemBar(), "expecting a membar");
1306 
1307   if (UseBarriersForVolatile) {
1308     // we need to plant a dmb
1309     return false;
1310   }
1311 
1312   MemBarNode* mb = barrier->as_MemBar();
1313 
1314   if (mb->trailing_load()) {
1315     return true;
1316   }
1317 
1318   if (mb->trailing_load_store()) {
1319     Node* load_store = mb->in(MemBarNode::Precedent);
1320     assert(load_store->is_LoadStore(), "unexpected graph shape");
1321     return is_CAS(load_store->Opcode());
1322   }
1323 
1324   return false;
1325 }
1326 
1327 bool needs_acquiring_load(const Node *n)
1328 {
1329   assert(n->is_Load(), "expecting a load");
1330   if (UseBarriersForVolatile) {
1331     // we use a normal load and a dmb
1332     return false;
1333   }
1334 
1335   LoadNode *ld = n->as_Load();
1336 
1337   return ld->is_acquire();
1338 }
1339 
1340 bool unnecessary_release(const Node *n)
1341 {
1342   assert((n->is_MemBar() &&
1343           n->Opcode() == Op_MemBarRelease),
1344          "expecting a release membar");
1345 
1346   if (UseBarriersForVolatile) {
1347     // we need to plant a dmb
1348     return false;
1349   }
1350 
1351   MemBarNode *barrier = n->as_MemBar();
1352   if (!barrier->leading()) {
1353     return false;
1354   } else {
1355     Node* trailing = barrier->trailing_membar();
1356     MemBarNode* trailing_mb = trailing->as_MemBar();
1357     assert(trailing_mb->trailing(), "Not a trailing membar?");
1358     assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
1359 
1360     Node* mem = trailing_mb->in(MemBarNode::Precedent);
1361     if (mem->is_Store()) {
1362       assert(mem->as_Store()->is_release(), "");
1363       assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
1364       return true;
1365     } else {
1366       assert(mem->is_LoadStore(), "");
1367       assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
1368       return is_CAS(mem->Opcode());
1369     }
1370   }
1371   return false;
1372 }
1373 
1374 bool unnecessary_volatile(const Node *n)
1375 {
1376   // assert n->is_MemBar();
1377   if (UseBarriersForVolatile) {
1378     // we need to plant a dmb
1379     return false;
1380   }
1381 
1382   MemBarNode *mbvol = n->as_MemBar();
1383 
1384   bool release = mbvol->trailing_store();
1385   assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1386 #ifdef ASSERT
1387   if (release) {
1388     Node* leading = mbvol->leading_membar();
1389     assert(leading->Opcode() == Op_MemBarRelease, "");
1390     assert(leading->as_MemBar()->leading_store(), "");
1391     assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1392   }
1393 #endif
1394 
1395   return release;
1396 }
1397 
1398 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1399 
1400 bool needs_releasing_store(const Node *n)
1401 {
1402   // assert n->is_Store();
1403   if (UseBarriersForVolatile) {
1404     // we use a normal store and dmb combination
1405     return false;
1406   }
1407 
1408   StoreNode *st = n->as_Store();
1409 
1410   return st->trailing_membar() != NULL;
1411 }
1412 
1413 // predicate controlling translation of CAS
1414 //
1415 // returns true if CAS needs to use an acquiring load otherwise false
1416 
1417 bool needs_acquiring_load_exclusive(const Node *n)
1418 {
1419   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
1420   if (UseBarriersForVolatile) {
1421     return false;
1422   }
1423 
1424   LoadStoreNode* ldst = n->as_LoadStore();
1425   assert(ldst->trailing_membar() != NULL, "expected trailing membar");
1426 
1427   // so we can just return true here
1428   return true;
1429 }
1430 
1431 // predicate controlling translation of StoreCM
1432 //
1433 // returns true if a StoreStore must precede the card write otherwise
1434 // false
1435 
1436 bool unnecessary_storestore(const Node *storecm)
1437 {
1438   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
1439 
1440   // we need to generate a dmb ishst between an object put and the
1441   // associated card mark when we are using CMS without conditional
1442   // card marking
1443 
1444   if (UseConcMarkSweepGC && !UseCondCardMark) {
1445     return false;
1446   }
1447 
1448   // a storestore is unnecesary in all other cases
1449 
1450   return true;
1451 }
1452 
1453 
1454 #define __ _masm.
1455 
1456 // advance declarations for helper functions to convert register
1457 // indices to register objects
1458 
1459 // the ad file has to provide implementations of certain methods
1460 // expected by the generic code
1461 //
1462 // REQUIRED FUNCTIONALITY
1463 
1464 //=============================================================================
1465 
1466 // !!!!! Special hack to get all types of calls to specify the byte offset
1467 //       from the start of the call to the point where the return address
1468 //       will point.
1469 
1470 int MachCallStaticJavaNode::ret_addr_offset()
1471 {
1472   // call should be a simple bl
1473   int off = 4;
1474   return off;
1475 }
1476 
1477 int MachCallDynamicJavaNode::ret_addr_offset()
1478 {
1479   return 16; // movz, movk, movk, bl
1480 }
1481 
1482 int MachCallRuntimeNode::ret_addr_offset() {
1483   // for generated stubs the call will be
1484   //   far_call(addr)
1485   // for real runtime callouts it will be six instructions
1486   // see aarch64_enc_java_to_runtime
1487   //   adr(rscratch2, retaddr)
1488   //   lea(rscratch1, RuntimeAddress(addr)
1489   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1490   //   blr(rscratch1)
1491   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1492   if (cb) {
1493     return MacroAssembler::far_branch_size();
1494   } else {
1495     return 6 * NativeInstruction::instruction_size;
1496   }
1497 }
1498 
1499 // Indicate if the safepoint node needs the polling page as an input
1500 
1501 // the shared code plants the oop data at the start of the generated
1502 // code for the safepoint node and that needs ot be at the load
1503 // instruction itself. so we cannot plant a mov of the safepoint poll
1504 // address followed by a load. setting this to true means the mov is
1505 // scheduled as a prior instruction. that's better for scheduling
1506 // anyway.
1507 
1508 bool SafePointNode::needs_polling_address_input()
1509 {
1510   return true;
1511 }
1512 
1513 //=============================================================================
1514 
1515 #ifndef PRODUCT
1516 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1517   st->print("BREAKPOINT");
1518 }
1519 #endif
1520 
1521 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1522   MacroAssembler _masm(&cbuf);
1523   __ brk(0);
1524 }
1525 
1526 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1527   return MachNode::size(ra_);
1528 }
1529 
1530 //=============================================================================
1531 
1532 #ifndef PRODUCT
1533   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1534     st->print("nop \t# %d bytes pad for loops and calls", _count);
1535   }
1536 #endif
1537 
1538   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1539     MacroAssembler _masm(&cbuf);
1540     for (int i = 0; i < _count; i++) {
1541       __ nop();
1542     }
1543   }
1544 
1545   uint MachNopNode::size(PhaseRegAlloc*) const {
1546     return _count * NativeInstruction::instruction_size;
1547   }
1548 
1549 //=============================================================================
1550 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1551 
1552 int Compile::ConstantTable::calculate_table_base_offset() const {
1553   return 0;  // absolute addressing, no offset
1554 }
1555 
1556 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1557 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1558   ShouldNotReachHere();
1559 }
1560 
1561 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1562   // Empty encoding
1563 }
1564 
1565 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1566   return 0;
1567 }
1568 
1569 #ifndef PRODUCT
1570 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1571   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1572 }
1573 #endif
1574 
1575 #ifndef PRODUCT
1576 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1577   Compile* C = ra_->C;
1578 
1579   int framesize = C->frame_slots() << LogBytesPerInt;
1580 
1581   if (C->need_stack_bang(framesize))
1582     st->print("# stack bang size=%d\n\t", framesize);
1583 
1584   if (framesize < ((1 << 9) + 2 * wordSize)) {
1585     st->print("sub  sp, sp, #%d\n\t", framesize);
1586     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1587     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
1588   } else {
1589     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
1590     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
1591     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1592     st->print("sub  sp, sp, rscratch1");
1593   }
1594 }
1595 #endif
1596 
1597 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1598   Compile* C = ra_->C;
1599   MacroAssembler _masm(&cbuf);
1600 
1601   // n.b. frame size includes space for return pc and rfp
1602   const long framesize = C->frame_size_in_bytes();
1603   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
1604 
1605   // insert a nop at the start of the prolog so we can patch in a
1606   // branch if we need to invalidate the method later
1607   __ nop();
1608 
1609   int bangsize = C->bang_size_in_bytes();
1610   if (C->need_stack_bang(bangsize) && UseStackBanging)
1611     __ generate_stack_overflow_check(bangsize);
1612 
1613   __ build_frame(framesize);
1614 
1615   if (VerifyStackAtCalls) {
1616     Unimplemented();
1617   }
1618 
1619   C->set_frame_complete(cbuf.insts_size());
1620 
1621   if (C->has_mach_constant_base_node()) {
1622     // NOTE: We set the table base offset here because users might be
1623     // emitted before MachConstantBaseNode.
1624     Compile::ConstantTable& constant_table = C->constant_table();
1625     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1626   }
1627 }
1628 
1629 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1630 {
1631   return MachNode::size(ra_); // too many variables; just compute it
1632                               // the hard way
1633 }
1634 
1635 int MachPrologNode::reloc() const
1636 {
1637   return 0;
1638 }
1639 
1640 //=============================================================================
1641 
1642 #ifndef PRODUCT
1643 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1644   Compile* C = ra_->C;
1645   int framesize = C->frame_slots() << LogBytesPerInt;
1646 
1647   st->print("# pop frame %d\n\t",framesize);
1648 
1649   if (framesize == 0) {
1650     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1651   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1652     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1653     st->print("add  sp, sp, #%d\n\t", framesize);
1654   } else {
1655     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1656     st->print("add  sp, sp, rscratch1\n\t");
1657     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1658   }
1659 
1660   if (do_polling() && C->is_method_compilation()) {
1661     st->print("# touch polling page\n\t");
1662     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
1663     st->print("ldr zr, [rscratch1]");
1664   }
1665 }
1666 #endif
1667 
1668 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1669   Compile* C = ra_->C;
1670   MacroAssembler _masm(&cbuf);
1671   int framesize = C->frame_slots() << LogBytesPerInt;
1672 
1673   __ remove_frame(framesize);
1674 
1675   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1676     __ reserved_stack_check();
1677   }
1678 
1679   if (do_polling() && C->is_method_compilation()) {
1680     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
1681   }
1682 }
1683 
1684 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1685   // Variable size. Determine dynamically.
1686   return MachNode::size(ra_);
1687 }
1688 
1689 int MachEpilogNode::reloc() const {
1690   // Return number of relocatable values contained in this instruction.
1691   return 1; // 1 for polling page.
1692 }
1693 
1694 const Pipeline * MachEpilogNode::pipeline() const {
1695   return MachNode::pipeline_class();
1696 }
1697 
1698 // This method seems to be obsolete. It is declared in machnode.hpp
1699 // and defined in all *.ad files, but it is never called. Should we
1700 // get rid of it?
1701 int MachEpilogNode::safepoint_offset() const {
1702   assert(do_polling(), "no return for this epilog node");
1703   return 4;
1704 }
1705 
1706 //=============================================================================
1707 
1708 // Figure out which register class each belongs in: rc_int, rc_float or
1709 // rc_stack.
1710 enum RC { rc_bad, rc_int, rc_float, rc_stack };
1711 
1712 static enum RC rc_class(OptoReg::Name reg) {
1713 
1714   if (reg == OptoReg::Bad) {
1715     return rc_bad;
1716   }
1717 
1718   // we have 30 int registers * 2 halves
1719   // (rscratch1 and rscratch2 are omitted)
1720 
1721   if (reg < 60) {
1722     return rc_int;
1723   }
1724 
1725   // we have 32 float register * 2 halves
1726   if (reg < 60 + 128) {
1727     return rc_float;
1728   }
1729 
1730   // Between float regs & stack is the flags regs.
1731   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
1732 
1733   return rc_stack;
1734 }
1735 
1736 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
1737   Compile* C = ra_->C;
1738 
1739   // Get registers to move.
1740   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
1741   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
1742   OptoReg::Name dst_hi = ra_->get_reg_second(this);
1743   OptoReg::Name dst_lo = ra_->get_reg_first(this);
1744 
1745   enum RC src_hi_rc = rc_class(src_hi);
1746   enum RC src_lo_rc = rc_class(src_lo);
1747   enum RC dst_hi_rc = rc_class(dst_hi);
1748   enum RC dst_lo_rc = rc_class(dst_lo);
1749 
1750   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
1751 
1752   if (src_hi != OptoReg::Bad) {
1753     assert((src_lo&1)==0 && src_lo+1==src_hi &&
1754            (dst_lo&1)==0 && dst_lo+1==dst_hi,
1755            "expected aligned-adjacent pairs");
1756   }
1757 
1758   if (src_lo == dst_lo && src_hi == dst_hi) {
1759     return 0;            // Self copy, no move.
1760   }
1761 
1762   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
1763               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
1764   int src_offset = ra_->reg2offset(src_lo);
1765   int dst_offset = ra_->reg2offset(dst_lo);
1766 
1767   if (bottom_type()->isa_vect() != NULL) {
1768     uint ireg = ideal_reg();
1769     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
1770     if (cbuf) {
1771       MacroAssembler _masm(cbuf);
1772       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
1773       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
1774         // stack->stack
1775         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
1776         if (ireg == Op_VecD) {
1777           __ unspill(rscratch1, true, src_offset);
1778           __ spill(rscratch1, true, dst_offset);
1779         } else {
1780           __ spill_copy128(src_offset, dst_offset);
1781         }
1782       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
1783         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1784                ireg == Op_VecD ? __ T8B : __ T16B,
1785                as_FloatRegister(Matcher::_regEncode[src_lo]));
1786       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
1787         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1788                        ireg == Op_VecD ? __ D : __ Q,
1789                        ra_->reg2offset(dst_lo));
1790       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
1791         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1792                        ireg == Op_VecD ? __ D : __ Q,
1793                        ra_->reg2offset(src_lo));
1794       } else {
1795         ShouldNotReachHere();
1796       }
1797     }
1798   } else if (cbuf) {
1799     MacroAssembler _masm(cbuf);
1800     switch (src_lo_rc) {
1801     case rc_int:
1802       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
1803         if (is64) {
1804             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
1805                    as_Register(Matcher::_regEncode[src_lo]));
1806         } else {
1807             MacroAssembler _masm(cbuf);
1808             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
1809                     as_Register(Matcher::_regEncode[src_lo]));
1810         }
1811       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
1812         if (is64) {
1813             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1814                      as_Register(Matcher::_regEncode[src_lo]));
1815         } else {
1816             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1817                      as_Register(Matcher::_regEncode[src_lo]));
1818         }
1819       } else {                    // gpr --> stack spill
1820         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1821         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
1822       }
1823       break;
1824     case rc_float:
1825       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
1826         if (is64) {
1827             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
1828                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1829         } else {
1830             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
1831                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1832         }
1833       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
1834           if (cbuf) {
1835             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1836                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1837         } else {
1838             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1839                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1840         }
1841       } else {                    // fpr --> stack spill
1842         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1843         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1844                  is64 ? __ D : __ S, dst_offset);
1845       }
1846       break;
1847     case rc_stack:
1848       if (dst_lo_rc == rc_int) {  // stack --> gpr load
1849         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
1850       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
1851         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1852                    is64 ? __ D : __ S, src_offset);
1853       } else {                    // stack --> stack copy
1854         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1855         __ unspill(rscratch1, is64, src_offset);
1856         __ spill(rscratch1, is64, dst_offset);
1857       }
1858       break;
1859     default:
1860       assert(false, "bad rc_class for spill");
1861       ShouldNotReachHere();
1862     }
1863   }
1864 
1865   if (st) {
1866     st->print("spill ");
1867     if (src_lo_rc == rc_stack) {
1868       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
1869     } else {
1870       st->print("%s -> ", Matcher::regName[src_lo]);
1871     }
1872     if (dst_lo_rc == rc_stack) {
1873       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
1874     } else {
1875       st->print("%s", Matcher::regName[dst_lo]);
1876     }
1877     if (bottom_type()->isa_vect() != NULL) {
1878       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
1879     } else {
1880       st->print("\t# spill size = %d", is64 ? 64:32);
1881     }
1882   }
1883 
1884   return 0;
1885 
1886 }
1887 
1888 #ifndef PRODUCT
1889 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1890   if (!ra_)
1891     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
1892   else
1893     implementation(NULL, ra_, false, st);
1894 }
1895 #endif
1896 
1897 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1898   implementation(&cbuf, ra_, false, NULL);
1899 }
1900 
1901 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1902   return MachNode::size(ra_);
1903 }
1904 
1905 //=============================================================================
1906 
1907 #ifndef PRODUCT
1908 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1909   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1910   int reg = ra_->get_reg_first(this);
1911   st->print("add %s, rsp, #%d]\t# box lock",
1912             Matcher::regName[reg], offset);
1913 }
1914 #endif
1915 
1916 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1917   MacroAssembler _masm(&cbuf);
1918 
1919   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1920   int reg    = ra_->get_encode(this);
1921 
1922   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
1923     __ add(as_Register(reg), sp, offset);
1924   } else {
1925     ShouldNotReachHere();
1926   }
1927 }
1928 
1929 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1930   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
1931   return 4;
1932 }
1933 
1934 //=============================================================================
1935 
1936 #ifndef PRODUCT
1937 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1938 {
1939   st->print_cr("# MachUEPNode");
1940   if (UseCompressedClassPointers) {
1941     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1942     if (Universe::narrow_klass_shift() != 0) {
1943       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1944     }
1945   } else {
1946    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1947   }
1948   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
1949   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
1950 }
1951 #endif
1952 
1953 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1954 {
1955   // This is the unverified entry point.
1956   MacroAssembler _masm(&cbuf);
1957 
1958   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
1959   Label skip;
1960   // TODO
1961   // can we avoid this skip and still use a reloc?
1962   __ br(Assembler::EQ, skip);
1963   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1964   __ bind(skip);
1965 }
1966 
1967 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1968 {
1969   return MachNode::size(ra_);
1970 }
1971 
1972 // REQUIRED EMIT CODE
1973 
1974 //=============================================================================
1975 
1976 // Emit exception handler code.
1977 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
1978 {
1979   // mov rscratch1 #exception_blob_entry_point
1980   // br rscratch1
1981   // Note that the code buffer's insts_mark is always relative to insts.
1982   // That's why we must use the macroassembler to generate a handler.
1983   MacroAssembler _masm(&cbuf);
1984   address base = __ start_a_stub(size_exception_handler());
1985   if (base == NULL) {
1986     ciEnv::current()->record_failure("CodeCache is full");
1987     return 0;  // CodeBuffer::expand failed
1988   }
1989   int offset = __ offset();
1990   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1991   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1992   __ end_a_stub();
1993   return offset;
1994 }
1995 
1996 // Emit deopt handler code.
1997 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
1998 {
1999   // Note that the code buffer's insts_mark is always relative to insts.
2000   // That's why we must use the macroassembler to generate a handler.
2001   MacroAssembler _masm(&cbuf);
2002   address base = __ start_a_stub(size_deopt_handler());
2003   if (base == NULL) {
2004     ciEnv::current()->record_failure("CodeCache is full");
2005     return 0;  // CodeBuffer::expand failed
2006   }
2007   int offset = __ offset();
2008 
2009   __ adr(lr, __ pc());
2010   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2011 
2012   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
2013   __ end_a_stub();
2014   return offset;
2015 }
2016 
2017 // REQUIRED MATCHER CODE
2018 
2019 //=============================================================================
2020 
2021 const bool Matcher::match_rule_supported(int opcode) {
2022 
2023   switch (opcode) {
2024   default:
2025     break;
2026   }
2027 
2028   if (!has_match_rule(opcode)) {
2029     return false;
2030   }
2031 
2032   return true;  // Per default match rules are supported.
2033 }
2034 
2035 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
2036 
2037   // TODO
2038   // identify extra cases that we might want to provide match rules for
2039   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
2040   bool ret_value = match_rule_supported(opcode);
2041   // Add rules here.
2042 
2043   return ret_value;  // Per default match rules are supported.
2044 }
2045 
2046 const bool Matcher::has_predicated_vectors(void) {
2047   return false;
2048 }
2049 
2050 const int Matcher::float_pressure(int default_pressure_threshold) {
2051   return default_pressure_threshold;
2052 }
2053 
2054 int Matcher::regnum_to_fpu_offset(int regnum)
2055 {
2056   Unimplemented();
2057   return 0;
2058 }
2059 
2060 // Is this branch offset short enough that a short branch can be used?
2061 //
2062 // NOTE: If the platform does not provide any short branch variants, then
2063 //       this method should return false for offset 0.
2064 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2065   // The passed offset is relative to address of the branch.
2066 
2067   return (-32768 <= offset && offset < 32768);
2068 }
2069 
2070 const bool Matcher::isSimpleConstant64(jlong value) {
2071   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2072   // Probably always true, even if a temp register is required.
2073   return true;
2074 }
2075 
2076 // true just means we have fast l2f conversion
2077 const bool Matcher::convL2FSupported(void) {
2078   return true;
2079 }
2080 
2081 // Vector width in bytes.
2082 const int Matcher::vector_width_in_bytes(BasicType bt) {
2083   int size = MIN2(16,(int)MaxVectorSize);
2084   // Minimum 2 values in vector
2085   if (size < 2*type2aelembytes(bt)) size = 0;
2086   // But never < 4
2087   if (size < 4) size = 0;
2088   return size;
2089 }
2090 
2091 // Limits on vector size (number of elements) loaded into vector.
2092 const int Matcher::max_vector_size(const BasicType bt) {
2093   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2094 }
2095 const int Matcher::min_vector_size(const BasicType bt) {
2096 //  For the moment limit the vector size to 8 bytes
2097     int size = 8 / type2aelembytes(bt);
2098     if (size < 2) size = 2;
2099     return size;
2100 }
2101 
2102 // Vector ideal reg.
2103 const uint Matcher::vector_ideal_reg(int len) {
2104   switch(len) {
2105     case  8: return Op_VecD;
2106     case 16: return Op_VecX;
2107   }
2108   ShouldNotReachHere();
2109   return 0;
2110 }
2111 
2112 const uint Matcher::vector_shift_count_ideal_reg(int size) {
2113   switch(size) {
2114     case  8: return Op_VecD;
2115     case 16: return Op_VecX;
2116   }
2117   ShouldNotReachHere();
2118   return 0;
2119 }
2120 
2121 // AES support not yet implemented
2122 const bool Matcher::pass_original_key_for_aes() {
2123   return false;
2124 }
2125 
2126 // aarch64 supports misaligned vectors store/load.
2127 const bool Matcher::misaligned_vectors_ok() {
2128   return true;
2129 }
2130 
2131 // false => size gets scaled to BytesPerLong, ok.
2132 const bool Matcher::init_array_count_is_in_bytes = false;
2133 
2134 // Use conditional move (CMOVL)
2135 const int Matcher::long_cmove_cost() {
2136   // long cmoves are no more expensive than int cmoves
2137   return 0;
2138 }
2139 
2140 const int Matcher::float_cmove_cost() {
2141   // float cmoves are no more expensive than int cmoves
2142   return 0;
2143 }
2144 
2145 // Does the CPU require late expand (see block.cpp for description of late expand)?
2146 const bool Matcher::require_postalloc_expand = false;
2147 
2148 // Do we need to mask the count passed to shift instructions or does
2149 // the cpu only look at the lower 5/6 bits anyway?
2150 const bool Matcher::need_masked_shift_count = false;
2151 
2152 // This affects two different things:
2153 //  - how Decode nodes are matched
2154 //  - how ImplicitNullCheck opportunities are recognized
2155 // If true, the matcher will try to remove all Decodes and match them
2156 // (as operands) into nodes. NullChecks are not prepared to deal with
2157 // Decodes by final_graph_reshaping().
2158 // If false, final_graph_reshaping() forces the decode behind the Cmp
2159 // for a NullCheck. The matcher matches the Decode node into a register.
2160 // Implicit_null_check optimization moves the Decode along with the
2161 // memory operation back up before the NullCheck.
2162 bool Matcher::narrow_oop_use_complex_address() {
2163   return Universe::narrow_oop_shift() == 0;
2164 }
2165 
2166 bool Matcher::narrow_klass_use_complex_address() {
2167 // TODO
2168 // decide whether we need to set this to true
2169   return false;
2170 }
2171 
2172 bool Matcher::const_oop_prefer_decode() {
2173   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
2174   return Universe::narrow_oop_base() == NULL;
2175 }
2176 
2177 bool Matcher::const_klass_prefer_decode() {
2178   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
2179   return Universe::narrow_klass_base() == NULL;
2180 }
2181 
2182 // Is it better to copy float constants, or load them directly from
2183 // memory?  Intel can load a float constant from a direct address,
2184 // requiring no extra registers.  Most RISCs will have to materialize
2185 // an address into a register first, so they would do better to copy
2186 // the constant from stack.
2187 const bool Matcher::rematerialize_float_constants = false;
2188 
2189 // If CPU can load and store mis-aligned doubles directly then no
2190 // fixup is needed.  Else we split the double into 2 integer pieces
2191 // and move it piece-by-piece.  Only happens when passing doubles into
2192 // C code as the Java calling convention forces doubles to be aligned.
2193 const bool Matcher::misaligned_doubles_ok = true;
2194 
2195 // No-op on amd64
2196 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2197   Unimplemented();
2198 }
2199 
2200 // Advertise here if the CPU requires explicit rounding operations to
2201 // implement the UseStrictFP mode.
2202 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2203 
2204 // Are floats converted to double when stored to stack during
2205 // deoptimization?
2206 bool Matcher::float_in_double() { return false; }
2207 
2208 // Do ints take an entire long register or just half?
2209 // The relevant question is how the int is callee-saved:
2210 // the whole long is written but de-opt'ing will have to extract
2211 // the relevant 32 bits.
2212 const bool Matcher::int_in_long = true;
2213 
2214 // Return whether or not this register is ever used as an argument.
2215 // This function is used on startup to build the trampoline stubs in
2216 // generateOptoStub.  Registers not mentioned will be killed by the VM
2217 // call in the trampoline, and arguments in those registers not be
2218 // available to the callee.
2219 bool Matcher::can_be_java_arg(int reg)
2220 {
2221   return
2222     reg ==  R0_num || reg == R0_H_num ||
2223     reg ==  R1_num || reg == R1_H_num ||
2224     reg ==  R2_num || reg == R2_H_num ||
2225     reg ==  R3_num || reg == R3_H_num ||
2226     reg ==  R4_num || reg == R4_H_num ||
2227     reg ==  R5_num || reg == R5_H_num ||
2228     reg ==  R6_num || reg == R6_H_num ||
2229     reg ==  R7_num || reg == R7_H_num ||
2230     reg ==  V0_num || reg == V0_H_num ||
2231     reg ==  V1_num || reg == V1_H_num ||
2232     reg ==  V2_num || reg == V2_H_num ||
2233     reg ==  V3_num || reg == V3_H_num ||
2234     reg ==  V4_num || reg == V4_H_num ||
2235     reg ==  V5_num || reg == V5_H_num ||
2236     reg ==  V6_num || reg == V6_H_num ||
2237     reg ==  V7_num || reg == V7_H_num;
2238 }
2239 
2240 bool Matcher::is_spillable_arg(int reg)
2241 {
2242   return can_be_java_arg(reg);
2243 }
2244 
2245 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2246   return false;
2247 }
2248 
2249 RegMask Matcher::divI_proj_mask() {
2250   ShouldNotReachHere();
2251   return RegMask();
2252 }
2253 
2254 // Register for MODI projection of divmodI.
2255 RegMask Matcher::modI_proj_mask() {
2256   ShouldNotReachHere();
2257   return RegMask();
2258 }
2259 
2260 // Register for DIVL projection of divmodL.
2261 RegMask Matcher::divL_proj_mask() {
2262   ShouldNotReachHere();
2263   return RegMask();
2264 }
2265 
2266 // Register for MODL projection of divmodL.
2267 RegMask Matcher::modL_proj_mask() {
2268   ShouldNotReachHere();
2269   return RegMask();
2270 }
2271 
2272 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2273   return FP_REG_mask();
2274 }
2275 
2276 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
2277   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
2278     Node* u = addp->fast_out(i);
2279     if (u->is_Mem()) {
2280       int opsize = u->as_Mem()->memory_size();
2281       assert(opsize > 0, "unexpected memory operand size");
2282       if (u->as_Mem()->memory_size() != (1<<shift)) {
2283         return false;
2284       }
2285     }
2286   }
2287   return true;
2288 }
2289 
2290 const bool Matcher::convi2l_type_required = false;
2291 
2292 // Should the Matcher clone shifts on addressing modes, expecting them
2293 // to be subsumed into complex addressing expressions or compute them
2294 // into registers?
2295 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
2296   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
2297     return true;
2298   }
2299 
2300   Node *off = m->in(AddPNode::Offset);
2301   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
2302       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
2303       // Are there other uses besides address expressions?
2304       !is_visited(off)) {
2305     address_visited.set(off->_idx); // Flag as address_visited
2306     mstack.push(off->in(2), Visit);
2307     Node *conv = off->in(1);
2308     if (conv->Opcode() == Op_ConvI2L &&
2309         // Are there other uses besides address expressions?
2310         !is_visited(conv)) {
2311       address_visited.set(conv->_idx); // Flag as address_visited
2312       mstack.push(conv->in(1), Pre_Visit);
2313     } else {
2314       mstack.push(conv, Pre_Visit);
2315     }
2316     address_visited.test_set(m->_idx); // Flag as address_visited
2317     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2318     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2319     return true;
2320   } else if (off->Opcode() == Op_ConvI2L &&
2321              // Are there other uses besides address expressions?
2322              !is_visited(off)) {
2323     address_visited.test_set(m->_idx); // Flag as address_visited
2324     address_visited.set(off->_idx); // Flag as address_visited
2325     mstack.push(off->in(1), Pre_Visit);
2326     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2327     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2328     return true;
2329   }
2330   return false;
2331 }
2332 
2333 void Compile::reshape_address(AddPNode* addp) {
2334 }
2335 
2336 
2337 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2338   MacroAssembler _masm(&cbuf);                                          \
2339   {                                                                     \
2340     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2341     guarantee(DISP == 0, "mode not permitted for volatile");            \
2342     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2343     __ INSN(REG, as_Register(BASE));                                    \
2344   }
2345 
2346 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2347 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2348 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2349                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2350 
2351   // Used for all non-volatile memory accesses.  The use of
2352   // $mem->opcode() to discover whether this pattern uses sign-extended
2353   // offsets is something of a kludge.
2354   static void loadStore(MacroAssembler masm, mem_insn insn,
2355                          Register reg, int opcode,
2356                          Register base, int index, int size, int disp)
2357   {
2358     Address::extend scale;
2359 
2360     // Hooboy, this is fugly.  We need a way to communicate to the
2361     // encoder that the index needs to be sign extended, so we have to
2362     // enumerate all the cases.
2363     switch (opcode) {
2364     case INDINDEXSCALEDI2L:
2365     case INDINDEXSCALEDI2LN:
2366     case INDINDEXI2L:
2367     case INDINDEXI2LN:
2368       scale = Address::sxtw(size);
2369       break;
2370     default:
2371       scale = Address::lsl(size);
2372     }
2373 
2374     if (index == -1) {
2375       (masm.*insn)(reg, Address(base, disp));
2376     } else {
2377       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2378       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2379     }
2380   }
2381 
2382   static void loadStore(MacroAssembler masm, mem_float_insn insn,
2383                          FloatRegister reg, int opcode,
2384                          Register base, int index, int size, int disp)
2385   {
2386     Address::extend scale;
2387 
2388     switch (opcode) {
2389     case INDINDEXSCALEDI2L:
2390     case INDINDEXSCALEDI2LN:
2391       scale = Address::sxtw(size);
2392       break;
2393     default:
2394       scale = Address::lsl(size);
2395     }
2396 
2397      if (index == -1) {
2398       (masm.*insn)(reg, Address(base, disp));
2399     } else {
2400       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2401       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2402     }
2403   }
2404 
2405   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2406                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2407                          int opcode, Register base, int index, int size, int disp)
2408   {
2409     if (index == -1) {
2410       (masm.*insn)(reg, T, Address(base, disp));
2411     } else {
2412       assert(disp == 0, "unsupported address mode");
2413       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2414     }
2415   }
2416 
2417 %}
2418 
2419 
2420 
2421 //----------ENCODING BLOCK-----------------------------------------------------
2422 // This block specifies the encoding classes used by the compiler to
2423 // output byte streams.  Encoding classes are parameterized macros
2424 // used by Machine Instruction Nodes in order to generate the bit
2425 // encoding of the instruction.  Operands specify their base encoding
2426 // interface with the interface keyword.  There are currently
2427 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2428 // COND_INTER.  REG_INTER causes an operand to generate a function
2429 // which returns its register number when queried.  CONST_INTER causes
2430 // an operand to generate a function which returns the value of the
2431 // constant when queried.  MEMORY_INTER causes an operand to generate
2432 // four functions which return the Base Register, the Index Register,
2433 // the Scale Value, and the Offset Value of the operand when queried.
2434 // COND_INTER causes an operand to generate six functions which return
2435 // the encoding code (ie - encoding bits for the instruction)
2436 // associated with each basic boolean condition for a conditional
2437 // instruction.
2438 //
2439 // Instructions specify two basic values for encoding.  Again, a
2440 // function is available to check if the constant displacement is an
2441 // oop. They use the ins_encode keyword to specify their encoding
2442 // classes (which must be a sequence of enc_class names, and their
2443 // parameters, specified in the encoding block), and they use the
2444 // opcode keyword to specify, in order, their primary, secondary, and
2445 // tertiary opcode.  Only the opcode sections which a particular
2446 // instruction needs for encoding need to be specified.
2447 encode %{
2448   // Build emit functions for each basic byte or larger field in the
2449   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2450   // from C++ code in the enc_class source block.  Emit functions will
2451   // live in the main source block for now.  In future, we can
2452   // generalize this by adding a syntax that specifies the sizes of
2453   // fields in an order, so that the adlc can build the emit functions
2454   // automagically
2455 
2456   // catch all for unimplemented encodings
2457   enc_class enc_unimplemented %{
2458     MacroAssembler _masm(&cbuf);
2459     __ unimplemented("C2 catch all");
2460   %}
2461 
2462   // BEGIN Non-volatile memory access
2463 
2464   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
2465     Register dst_reg = as_Register($dst$$reg);
2466     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
2467                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2468   %}
2469 
2470   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
2471     Register dst_reg = as_Register($dst$$reg);
2472     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
2473                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2474   %}
2475 
2476   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
2477     Register dst_reg = as_Register($dst$$reg);
2478     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2479                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2480   %}
2481 
2482   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
2483     Register dst_reg = as_Register($dst$$reg);
2484     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2485                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2486   %}
2487 
2488   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
2489     Register dst_reg = as_Register($dst$$reg);
2490     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
2491                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2492   %}
2493 
2494   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
2495     Register dst_reg = as_Register($dst$$reg);
2496     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
2497                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2498   %}
2499 
2500   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
2501     Register dst_reg = as_Register($dst$$reg);
2502     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2503                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2504   %}
2505 
2506   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
2507     Register dst_reg = as_Register($dst$$reg);
2508     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2509                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2510   %}
2511 
2512   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
2513     Register dst_reg = as_Register($dst$$reg);
2514     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2515                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2516   %}
2517 
2518   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
2519     Register dst_reg = as_Register($dst$$reg);
2520     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2521                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2522   %}
2523 
2524   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
2525     Register dst_reg = as_Register($dst$$reg);
2526     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
2527                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2528   %}
2529 
2530   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
2531     Register dst_reg = as_Register($dst$$reg);
2532     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2533                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2534   %}
2535 
2536   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
2537     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2538     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2539                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2540   %}
2541 
2542   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
2543     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2544     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2545                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2546   %}
2547 
2548   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2549     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2550     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2551        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2552   %}
2553 
2554   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2555     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2556     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2557        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2558   %}
2559 
2560   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2561     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2562     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2563        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2564   %}
2565 
2566   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
2567     Register src_reg = as_Register($src$$reg);
2568     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2569                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2570   %}
2571 
2572   enc_class aarch64_enc_strb0(memory mem) %{
2573     MacroAssembler _masm(&cbuf);
2574     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2575                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2576   %}
2577 
2578   enc_class aarch64_enc_strb0_ordered(memory mem) %{
2579     MacroAssembler _masm(&cbuf);
2580     __ membar(Assembler::StoreStore);
2581     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2582                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2583   %}
2584 
2585   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
2586     Register src_reg = as_Register($src$$reg);
2587     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2588                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2589   %}
2590 
2591   enc_class aarch64_enc_strh0(memory mem) %{
2592     MacroAssembler _masm(&cbuf);
2593     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
2594                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2595   %}
2596 
2597   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
2598     Register src_reg = as_Register($src$$reg);
2599     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
2600                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2601   %}
2602 
2603   enc_class aarch64_enc_strw0(memory mem) %{
2604     MacroAssembler _masm(&cbuf);
2605     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
2606                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2607   %}
2608 
2609   enc_class aarch64_enc_str(iRegL src, memory mem) %{
2610     Register src_reg = as_Register($src$$reg);
2611     // we sometimes get asked to store the stack pointer into the
2612     // current thread -- we cannot do that directly on AArch64
2613     if (src_reg == r31_sp) {
2614       MacroAssembler _masm(&cbuf);
2615       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2616       __ mov(rscratch2, sp);
2617       src_reg = rscratch2;
2618     }
2619     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
2620                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2621   %}
2622 
2623   enc_class aarch64_enc_str0(memory mem) %{
2624     MacroAssembler _masm(&cbuf);
2625     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2626                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2627   %}
2628 
2629   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
2630     FloatRegister src_reg = as_FloatRegister($src$$reg);
2631     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2632                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2633   %}
2634 
2635   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
2636     FloatRegister src_reg = as_FloatRegister($src$$reg);
2637     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
2638                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2639   %}
2640 
2641   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
2642     FloatRegister src_reg = as_FloatRegister($src$$reg);
2643     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
2644        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2645   %}
2646 
2647   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
2648     FloatRegister src_reg = as_FloatRegister($src$$reg);
2649     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
2650        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2651   %}
2652 
2653   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
2654     FloatRegister src_reg = as_FloatRegister($src$$reg);
2655     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
2656        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2657   %}
2658 
2659   // END Non-volatile memory access
2660 
2661   // volatile loads and stores
2662 
2663   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
2664     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2665                  rscratch1, stlrb);
2666   %}
2667 
2668   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
2669     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2670                  rscratch1, stlrh);
2671   %}
2672 
2673   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
2674     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2675                  rscratch1, stlrw);
2676   %}
2677 
2678 
2679   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
2680     Register dst_reg = as_Register($dst$$reg);
2681     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2682              rscratch1, ldarb);
2683     __ sxtbw(dst_reg, dst_reg);
2684   %}
2685 
2686   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
2687     Register dst_reg = as_Register($dst$$reg);
2688     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2689              rscratch1, ldarb);
2690     __ sxtb(dst_reg, dst_reg);
2691   %}
2692 
2693   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
2694     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2695              rscratch1, ldarb);
2696   %}
2697 
2698   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
2699     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2700              rscratch1, ldarb);
2701   %}
2702 
2703   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
2704     Register dst_reg = as_Register($dst$$reg);
2705     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2706              rscratch1, ldarh);
2707     __ sxthw(dst_reg, dst_reg);
2708   %}
2709 
2710   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
2711     Register dst_reg = as_Register($dst$$reg);
2712     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2713              rscratch1, ldarh);
2714     __ sxth(dst_reg, dst_reg);
2715   %}
2716 
2717   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
2718     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2719              rscratch1, ldarh);
2720   %}
2721 
2722   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
2723     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2724              rscratch1, ldarh);
2725   %}
2726 
2727   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
2728     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2729              rscratch1, ldarw);
2730   %}
2731 
2732   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
2733     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2734              rscratch1, ldarw);
2735   %}
2736 
2737   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
2738     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2739              rscratch1, ldar);
2740   %}
2741 
2742   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
2743     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2744              rscratch1, ldarw);
2745     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
2746   %}
2747 
2748   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
2749     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2750              rscratch1, ldar);
2751     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
2752   %}
2753 
2754   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
2755     Register src_reg = as_Register($src$$reg);
2756     // we sometimes get asked to store the stack pointer into the
2757     // current thread -- we cannot do that directly on AArch64
2758     if (src_reg == r31_sp) {
2759         MacroAssembler _masm(&cbuf);
2760       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2761       __ mov(rscratch2, sp);
2762       src_reg = rscratch2;
2763     }
2764     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2765                  rscratch1, stlr);
2766   %}
2767 
2768   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
2769     {
2770       MacroAssembler _masm(&cbuf);
2771       FloatRegister src_reg = as_FloatRegister($src$$reg);
2772       __ fmovs(rscratch2, src_reg);
2773     }
2774     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2775                  rscratch1, stlrw);
2776   %}
2777 
2778   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
2779     {
2780       MacroAssembler _masm(&cbuf);
2781       FloatRegister src_reg = as_FloatRegister($src$$reg);
2782       __ fmovd(rscratch2, src_reg);
2783     }
2784     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2785                  rscratch1, stlr);
2786   %}
2787 
2788   // synchronized read/update encodings
2789 
2790   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
2791     MacroAssembler _masm(&cbuf);
2792     Register dst_reg = as_Register($dst$$reg);
2793     Register base = as_Register($mem$$base);
2794     int index = $mem$$index;
2795     int scale = $mem$$scale;
2796     int disp = $mem$$disp;
2797     if (index == -1) {
2798        if (disp != 0) {
2799         __ lea(rscratch1, Address(base, disp));
2800         __ ldaxr(dst_reg, rscratch1);
2801       } else {
2802         // TODO
2803         // should we ever get anything other than this case?
2804         __ ldaxr(dst_reg, base);
2805       }
2806     } else {
2807       Register index_reg = as_Register(index);
2808       if (disp == 0) {
2809         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
2810         __ ldaxr(dst_reg, rscratch1);
2811       } else {
2812         __ lea(rscratch1, Address(base, disp));
2813         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
2814         __ ldaxr(dst_reg, rscratch1);
2815       }
2816     }
2817   %}
2818 
2819   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
2820     MacroAssembler _masm(&cbuf);
2821     Register src_reg = as_Register($src$$reg);
2822     Register base = as_Register($mem$$base);
2823     int index = $mem$$index;
2824     int scale = $mem$$scale;
2825     int disp = $mem$$disp;
2826     if (index == -1) {
2827        if (disp != 0) {
2828         __ lea(rscratch2, Address(base, disp));
2829         __ stlxr(rscratch1, src_reg, rscratch2);
2830       } else {
2831         // TODO
2832         // should we ever get anything other than this case?
2833         __ stlxr(rscratch1, src_reg, base);
2834       }
2835     } else {
2836       Register index_reg = as_Register(index);
2837       if (disp == 0) {
2838         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
2839         __ stlxr(rscratch1, src_reg, rscratch2);
2840       } else {
2841         __ lea(rscratch2, Address(base, disp));
2842         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
2843         __ stlxr(rscratch1, src_reg, rscratch2);
2844       }
2845     }
2846     __ cmpw(rscratch1, zr);
2847   %}
2848 
2849   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2850     MacroAssembler _masm(&cbuf);
2851     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2852     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2853                Assembler::xword, /*acquire*/ false, /*release*/ true,
2854                /*weak*/ false, noreg);
2855   %}
2856 
2857   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2858     MacroAssembler _masm(&cbuf);
2859     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2860     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2861                Assembler::word, /*acquire*/ false, /*release*/ true,
2862                /*weak*/ false, noreg);
2863   %}
2864 
2865   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2866     MacroAssembler _masm(&cbuf);
2867     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2868     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2869                Assembler::halfword, /*acquire*/ false, /*release*/ true,
2870                /*weak*/ false, noreg);
2871   %}
2872 
2873   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2874     MacroAssembler _masm(&cbuf);
2875     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2876     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2877                Assembler::byte, /*acquire*/ false, /*release*/ true,
2878                /*weak*/ false, noreg);
2879   %}
2880 
2881 
2882   // The only difference between aarch64_enc_cmpxchg and
2883   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
2884   // CompareAndSwap sequence to serve as a barrier on acquiring a
2885   // lock.
2886   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2887     MacroAssembler _masm(&cbuf);
2888     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2889     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2890                Assembler::xword, /*acquire*/ true, /*release*/ true,
2891                /*weak*/ false, noreg);
2892   %}
2893 
2894   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2895     MacroAssembler _masm(&cbuf);
2896     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2897     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2898                Assembler::word, /*acquire*/ true, /*release*/ true,
2899                /*weak*/ false, noreg);
2900   %}
2901 
2902 
2903   // auxiliary used for CompareAndSwapX to set result register
2904   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
2905     MacroAssembler _masm(&cbuf);
2906     Register res_reg = as_Register($res$$reg);
2907     __ cset(res_reg, Assembler::EQ);
2908   %}
2909 
2910   // prefetch encodings
2911 
2912   enc_class aarch64_enc_prefetchw(memory mem) %{
2913     MacroAssembler _masm(&cbuf);
2914     Register base = as_Register($mem$$base);
2915     int index = $mem$$index;
2916     int scale = $mem$$scale;
2917     int disp = $mem$$disp;
2918     if (index == -1) {
2919       __ prfm(Address(base, disp), PSTL1KEEP);
2920     } else {
2921       Register index_reg = as_Register(index);
2922       if (disp == 0) {
2923         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
2924       } else {
2925         __ lea(rscratch1, Address(base, disp));
2926         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
2927       }
2928     }
2929   %}
2930 
2931   /// mov envcodings
2932 
2933   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
2934     MacroAssembler _masm(&cbuf);
2935     u_int32_t con = (u_int32_t)$src$$constant;
2936     Register dst_reg = as_Register($dst$$reg);
2937     if (con == 0) {
2938       __ movw(dst_reg, zr);
2939     } else {
2940       __ movw(dst_reg, con);
2941     }
2942   %}
2943 
2944   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
2945     MacroAssembler _masm(&cbuf);
2946     Register dst_reg = as_Register($dst$$reg);
2947     u_int64_t con = (u_int64_t)$src$$constant;
2948     if (con == 0) {
2949       __ mov(dst_reg, zr);
2950     } else {
2951       __ mov(dst_reg, con);
2952     }
2953   %}
2954 
2955   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
2956     MacroAssembler _masm(&cbuf);
2957     Register dst_reg = as_Register($dst$$reg);
2958     address con = (address)$src$$constant;
2959     if (con == NULL || con == (address)1) {
2960       ShouldNotReachHere();
2961     } else {
2962       relocInfo::relocType rtype = $src->constant_reloc();
2963       if (rtype == relocInfo::oop_type) {
2964         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
2965       } else if (rtype == relocInfo::metadata_type) {
2966         __ mov_metadata(dst_reg, (Metadata*)con);
2967       } else {
2968         assert(rtype == relocInfo::none, "unexpected reloc type");
2969         if (con < (address)(uintptr_t)os::vm_page_size()) {
2970           __ mov(dst_reg, con);
2971         } else {
2972           unsigned long offset;
2973           __ adrp(dst_reg, con, offset);
2974           __ add(dst_reg, dst_reg, offset);
2975         }
2976       }
2977     }
2978   %}
2979 
2980   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
2981     MacroAssembler _masm(&cbuf);
2982     Register dst_reg = as_Register($dst$$reg);
2983     __ mov(dst_reg, zr);
2984   %}
2985 
2986   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
2987     MacroAssembler _masm(&cbuf);
2988     Register dst_reg = as_Register($dst$$reg);
2989     __ mov(dst_reg, (u_int64_t)1);
2990   %}
2991 
2992   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
2993     MacroAssembler _masm(&cbuf);
2994     address page = (address)$src$$constant;
2995     Register dst_reg = as_Register($dst$$reg);
2996     unsigned long off;
2997     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
2998     assert(off == 0, "assumed offset == 0");
2999   %}
3000 
3001   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
3002     MacroAssembler _masm(&cbuf);
3003     __ load_byte_map_base($dst$$Register);
3004   %}
3005 
3006   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
3007     MacroAssembler _masm(&cbuf);
3008     Register dst_reg = as_Register($dst$$reg);
3009     address con = (address)$src$$constant;
3010     if (con == NULL) {
3011       ShouldNotReachHere();
3012     } else {
3013       relocInfo::relocType rtype = $src->constant_reloc();
3014       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
3015       __ set_narrow_oop(dst_reg, (jobject)con);
3016     }
3017   %}
3018 
3019   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
3020     MacroAssembler _masm(&cbuf);
3021     Register dst_reg = as_Register($dst$$reg);
3022     __ mov(dst_reg, zr);
3023   %}
3024 
3025   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
3026     MacroAssembler _masm(&cbuf);
3027     Register dst_reg = as_Register($dst$$reg);
3028     address con = (address)$src$$constant;
3029     if (con == NULL) {
3030       ShouldNotReachHere();
3031     } else {
3032       relocInfo::relocType rtype = $src->constant_reloc();
3033       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3034       __ set_narrow_klass(dst_reg, (Klass *)con);
3035     }
3036   %}
3037 
3038   // arithmetic encodings
3039 
3040   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3041     MacroAssembler _masm(&cbuf);
3042     Register dst_reg = as_Register($dst$$reg);
3043     Register src_reg = as_Register($src1$$reg);
3044     int32_t con = (int32_t)$src2$$constant;
3045     // add has primary == 0, subtract has primary == 1
3046     if ($primary) { con = -con; }
3047     if (con < 0) {
3048       __ subw(dst_reg, src_reg, -con);
3049     } else {
3050       __ addw(dst_reg, src_reg, con);
3051     }
3052   %}
3053 
3054   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3055     MacroAssembler _masm(&cbuf);
3056     Register dst_reg = as_Register($dst$$reg);
3057     Register src_reg = as_Register($src1$$reg);
3058     int32_t con = (int32_t)$src2$$constant;
3059     // add has primary == 0, subtract has primary == 1
3060     if ($primary) { con = -con; }
3061     if (con < 0) {
3062       __ sub(dst_reg, src_reg, -con);
3063     } else {
3064       __ add(dst_reg, src_reg, con);
3065     }
3066   %}
3067 
3068   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3069     MacroAssembler _masm(&cbuf);
3070    Register dst_reg = as_Register($dst$$reg);
3071    Register src1_reg = as_Register($src1$$reg);
3072    Register src2_reg = as_Register($src2$$reg);
3073     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3074   %}
3075 
3076   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3077     MacroAssembler _masm(&cbuf);
3078    Register dst_reg = as_Register($dst$$reg);
3079    Register src1_reg = as_Register($src1$$reg);
3080    Register src2_reg = as_Register($src2$$reg);
3081     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3082   %}
3083 
3084   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3085     MacroAssembler _masm(&cbuf);
3086    Register dst_reg = as_Register($dst$$reg);
3087    Register src1_reg = as_Register($src1$$reg);
3088    Register src2_reg = as_Register($src2$$reg);
3089     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3090   %}
3091 
3092   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3093     MacroAssembler _masm(&cbuf);
3094    Register dst_reg = as_Register($dst$$reg);
3095    Register src1_reg = as_Register($src1$$reg);
3096    Register src2_reg = as_Register($src2$$reg);
3097     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3098   %}
3099 
3100   // compare instruction encodings
3101 
3102   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3103     MacroAssembler _masm(&cbuf);
3104     Register reg1 = as_Register($src1$$reg);
3105     Register reg2 = as_Register($src2$$reg);
3106     __ cmpw(reg1, reg2);
3107   %}
3108 
3109   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3110     MacroAssembler _masm(&cbuf);
3111     Register reg = as_Register($src1$$reg);
3112     int32_t val = $src2$$constant;
3113     if (val >= 0) {
3114       __ subsw(zr, reg, val);
3115     } else {
3116       __ addsw(zr, reg, -val);
3117     }
3118   %}
3119 
3120   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3121     MacroAssembler _masm(&cbuf);
3122     Register reg1 = as_Register($src1$$reg);
3123     u_int32_t val = (u_int32_t)$src2$$constant;
3124     __ movw(rscratch1, val);
3125     __ cmpw(reg1, rscratch1);
3126   %}
3127 
3128   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3129     MacroAssembler _masm(&cbuf);
3130     Register reg1 = as_Register($src1$$reg);
3131     Register reg2 = as_Register($src2$$reg);
3132     __ cmp(reg1, reg2);
3133   %}
3134 
3135   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3136     MacroAssembler _masm(&cbuf);
3137     Register reg = as_Register($src1$$reg);
3138     int64_t val = $src2$$constant;
3139     if (val >= 0) {
3140       __ subs(zr, reg, val);
3141     } else if (val != -val) {
3142       __ adds(zr, reg, -val);
3143     } else {
3144     // aargh, Long.MIN_VALUE is a special case
3145       __ orr(rscratch1, zr, (u_int64_t)val);
3146       __ subs(zr, reg, rscratch1);
3147     }
3148   %}
3149 
3150   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3151     MacroAssembler _masm(&cbuf);
3152     Register reg1 = as_Register($src1$$reg);
3153     u_int64_t val = (u_int64_t)$src2$$constant;
3154     __ mov(rscratch1, val);
3155     __ cmp(reg1, rscratch1);
3156   %}
3157 
3158   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3159     MacroAssembler _masm(&cbuf);
3160     Register reg1 = as_Register($src1$$reg);
3161     Register reg2 = as_Register($src2$$reg);
3162     __ cmp(reg1, reg2);
3163   %}
3164 
3165   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3166     MacroAssembler _masm(&cbuf);
3167     Register reg1 = as_Register($src1$$reg);
3168     Register reg2 = as_Register($src2$$reg);
3169     __ cmpw(reg1, reg2);
3170   %}
3171 
3172   enc_class aarch64_enc_testp(iRegP src) %{
3173     MacroAssembler _masm(&cbuf);
3174     Register reg = as_Register($src$$reg);
3175     __ cmp(reg, zr);
3176   %}
3177 
3178   enc_class aarch64_enc_testn(iRegN src) %{
3179     MacroAssembler _masm(&cbuf);
3180     Register reg = as_Register($src$$reg);
3181     __ cmpw(reg, zr);
3182   %}
3183 
3184   enc_class aarch64_enc_b(label lbl) %{
3185     MacroAssembler _masm(&cbuf);
3186     Label *L = $lbl$$label;
3187     __ b(*L);
3188   %}
3189 
3190   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3191     MacroAssembler _masm(&cbuf);
3192     Label *L = $lbl$$label;
3193     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3194   %}
3195 
3196   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3197     MacroAssembler _masm(&cbuf);
3198     Label *L = $lbl$$label;
3199     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3200   %}
3201 
3202   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3203   %{
3204      Register sub_reg = as_Register($sub$$reg);
3205      Register super_reg = as_Register($super$$reg);
3206      Register temp_reg = as_Register($temp$$reg);
3207      Register result_reg = as_Register($result$$reg);
3208 
3209      Label miss;
3210      MacroAssembler _masm(&cbuf);
3211      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3212                                      NULL, &miss,
3213                                      /*set_cond_codes:*/ true);
3214      if ($primary) {
3215        __ mov(result_reg, zr);
3216      }
3217      __ bind(miss);
3218   %}
3219 
3220   enc_class aarch64_enc_java_static_call(method meth) %{
3221     MacroAssembler _masm(&cbuf);
3222 
3223     address addr = (address)$meth$$method;
3224     address call;
3225     if (!_method) {
3226       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3227       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3228     } else {
3229       int method_index = resolved_method_index(cbuf);
3230       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
3231                                                   : static_call_Relocation::spec(method_index);
3232       call = __ trampoline_call(Address(addr, rspec), &cbuf);
3233 
3234       // Emit stub for static call
3235       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
3236       if (stub == NULL) {
3237         ciEnv::current()->record_failure("CodeCache is full");
3238         return;
3239       }
3240     }
3241     if (call == NULL) {
3242       ciEnv::current()->record_failure("CodeCache is full");
3243       return;
3244     }
3245   %}
3246 
3247   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3248     MacroAssembler _masm(&cbuf);
3249     int method_index = resolved_method_index(cbuf);
3250     address call = __ ic_call((address)$meth$$method, method_index);
3251     if (call == NULL) {
3252       ciEnv::current()->record_failure("CodeCache is full");
3253       return;
3254     }
3255   %}
3256 
3257   enc_class aarch64_enc_call_epilog() %{
3258     MacroAssembler _masm(&cbuf);
3259     if (VerifyStackAtCalls) {
3260       // Check that stack depth is unchanged: find majik cookie on stack
3261       __ call_Unimplemented();
3262     }
3263   %}
3264 
3265   enc_class aarch64_enc_java_to_runtime(method meth) %{
3266     MacroAssembler _masm(&cbuf);
3267 
3268     // some calls to generated routines (arraycopy code) are scheduled
3269     // by C2 as runtime calls. if so we can call them using a br (they
3270     // will be in a reachable segment) otherwise we have to use a blr
3271     // which loads the absolute address into a register.
3272     address entry = (address)$meth$$method;
3273     CodeBlob *cb = CodeCache::find_blob(entry);
3274     if (cb) {
3275       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3276       if (call == NULL) {
3277         ciEnv::current()->record_failure("CodeCache is full");
3278         return;
3279       }
3280     } else {
3281       Label retaddr;
3282       __ adr(rscratch2, retaddr);
3283       __ lea(rscratch1, RuntimeAddress(entry));
3284       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3285       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3286       __ blr(rscratch1);
3287       __ bind(retaddr);
3288       __ add(sp, sp, 2 * wordSize);
3289     }
3290   %}
3291 
3292   enc_class aarch64_enc_rethrow() %{
3293     MacroAssembler _masm(&cbuf);
3294     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3295   %}
3296 
3297   enc_class aarch64_enc_ret() %{
3298     MacroAssembler _masm(&cbuf);
3299     __ ret(lr);
3300   %}
3301 
3302   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3303     MacroAssembler _masm(&cbuf);
3304     Register target_reg = as_Register($jump_target$$reg);
3305     __ br(target_reg);
3306   %}
3307 
3308   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3309     MacroAssembler _masm(&cbuf);
3310     Register target_reg = as_Register($jump_target$$reg);
3311     // exception oop should be in r0
3312     // ret addr has been popped into lr
3313     // callee expects it in r3
3314     __ mov(r3, lr);
3315     __ br(target_reg);
3316   %}
3317 
3318   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3319     MacroAssembler _masm(&cbuf);
3320     Register oop = as_Register($object$$reg);
3321     Register box = as_Register($box$$reg);
3322     Register disp_hdr = as_Register($tmp$$reg);
3323     Register tmp = as_Register($tmp2$$reg);
3324     Label cont;
3325     Label object_has_monitor;
3326     Label cas_failed;
3327 
3328     assert_different_registers(oop, box, tmp, disp_hdr);
3329 
3330     // Load markOop from object into displaced_header.
3331     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3332 
3333     // Always do locking in runtime.
3334     if (EmitSync & 0x01) {
3335       __ cmp(oop, zr);
3336       return;
3337     }
3338 
3339     if (UseBiasedLocking && !UseOptoBiasInlining) {
3340       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
3341     }
3342 
3343     // Check for existing monitor
3344     if ((EmitSync & 0x02) == 0) {
3345       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3346     }
3347 
3348     // Set tmp to be (markOop of object | UNLOCK_VALUE).
3349     __ orr(tmp, disp_hdr, markOopDesc::unlocked_value);
3350 
3351     // Initialize the box. (Must happen before we update the object mark!)
3352     __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3353 
3354     // Compare object markOop with an unlocked value (tmp) and if
3355     // equal exchange the stack address of our box with object markOop.
3356     // On failure disp_hdr contains the possibly locked markOop.
3357     __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
3358                /*release*/ true, /*weak*/ false, disp_hdr);
3359     __ br(Assembler::EQ, cont);
3360 
3361     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3362 
3363     // If the compare-and-exchange succeeded, then we found an unlocked
3364     // object, will have now locked it will continue at label cont
3365 
3366     __ bind(cas_failed);
3367     // We did not see an unlocked object so try the fast recursive case.
3368 
3369     // Check if the owner is self by comparing the value in the
3370     // markOop of object (disp_hdr) with the stack pointer.
3371     __ mov(rscratch1, sp);
3372     __ sub(disp_hdr, disp_hdr, rscratch1);
3373     __ mov(tmp, (address) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
3374     // If condition is true we are cont and hence we can store 0 as the
3375     // displaced header in the box, which indicates that it is a recursive lock.
3376     __ ands(tmp/*==0?*/, disp_hdr, tmp);   // Sets flags for result
3377     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3378 
3379     if ((EmitSync & 0x02) == 0) {
3380       __ b(cont);
3381 
3382       // Handle existing monitor.
3383       __ bind(object_has_monitor);
3384       // The object's monitor m is unlocked iff m->owner == NULL,
3385       // otherwise m->owner may contain a thread or a stack address.
3386       //
3387       // Try to CAS m->owner from NULL to current thread.
3388       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
3389     __ cmpxchg(tmp, zr, rthread, Assembler::xword, /*acquire*/ true,
3390                /*release*/ true, /*weak*/ false, noreg); // Sets flags for result
3391 
3392       // Store a non-null value into the box to avoid looking like a re-entrant
3393       // lock. The fast-path monitor unlock code checks for
3394       // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
3395       // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
3396       __ mov(tmp, (address)markOopDesc::unused_mark());
3397       __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3398     }
3399 
3400     __ bind(cont);
3401     // flag == EQ indicates success
3402     // flag == NE indicates failure
3403   %}
3404 
3405   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3406     MacroAssembler _masm(&cbuf);
3407     Register oop = as_Register($object$$reg);
3408     Register box = as_Register($box$$reg);
3409     Register disp_hdr = as_Register($tmp$$reg);
3410     Register tmp = as_Register($tmp2$$reg);
3411     Label cont;
3412     Label object_has_monitor;
3413 
3414     assert_different_registers(oop, box, tmp, disp_hdr);
3415 
3416     // Always do locking in runtime.
3417     if (EmitSync & 0x01) {
3418       __ cmp(oop, zr); // Oop can't be 0 here => always false.
3419       return;
3420     }
3421 
3422     if (UseBiasedLocking && !UseOptoBiasInlining) {
3423       __ biased_locking_exit(oop, tmp, cont);
3424     }
3425 
3426     // Find the lock address and load the displaced header from the stack.
3427     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3428 
3429     // If the displaced header is 0, we have a recursive unlock.
3430     __ cmp(disp_hdr, zr);
3431     __ br(Assembler::EQ, cont);
3432 
3433     // Handle existing monitor.
3434     if ((EmitSync & 0x02) == 0) {
3435       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
3436       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3437     }
3438 
3439     // Check if it is still a light weight lock, this is is true if we
3440     // see the stack address of the basicLock in the markOop of the
3441     // object.
3442 
3443     __ cmpxchg(oop, box, disp_hdr, Assembler::xword, /*acquire*/ false,
3444                /*release*/ true, /*weak*/ false, tmp);
3445     __ b(cont);
3446 
3447     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3448 
3449     // Handle existing monitor.
3450     if ((EmitSync & 0x02) == 0) {
3451       __ bind(object_has_monitor);
3452       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
3453       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3454       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
3455       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
3456       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
3457       __ cmp(rscratch1, zr); // Sets flags for result
3458       __ br(Assembler::NE, cont);
3459 
3460       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
3461       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
3462       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
3463       __ cmp(rscratch1, zr); // Sets flags for result
3464       __ cbnz(rscratch1, cont);
3465       // need a release store here
3466       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3467       __ stlr(zr, tmp); // set unowned
3468     }
3469 
3470     __ bind(cont);
3471     // flag == EQ indicates success
3472     // flag == NE indicates failure
3473   %}
3474 
3475 %}
3476 
3477 //----------FRAME--------------------------------------------------------------
3478 // Definition of frame structure and management information.
3479 //
3480 //  S T A C K   L A Y O U T    Allocators stack-slot number
3481 //                             |   (to get allocators register number
3482 //  G  Owned by    |        |  v    add OptoReg::stack0())
3483 //  r   CALLER     |        |
3484 //  o     |        +--------+      pad to even-align allocators stack-slot
3485 //  w     V        |  pad0  |        numbers; owned by CALLER
3486 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3487 //  h     ^        |   in   |  5
3488 //        |        |  args  |  4   Holes in incoming args owned by SELF
3489 //  |     |        |        |  3
3490 //  |     |        +--------+
3491 //  V     |        | old out|      Empty on Intel, window on Sparc
3492 //        |    old |preserve|      Must be even aligned.
3493 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3494 //        |        |   in   |  3   area for Intel ret address
3495 //     Owned by    |preserve|      Empty on Sparc.
3496 //       SELF      +--------+
3497 //        |        |  pad2  |  2   pad to align old SP
3498 //        |        +--------+  1
3499 //        |        | locks  |  0
3500 //        |        +--------+----> OptoReg::stack0(), even aligned
3501 //        |        |  pad1  | 11   pad to align new SP
3502 //        |        +--------+
3503 //        |        |        | 10
3504 //        |        | spills |  9   spills
3505 //        V        |        |  8   (pad0 slot for callee)
3506 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3507 //        ^        |  out   |  7
3508 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3509 //     Owned by    +--------+
3510 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3511 //        |    new |preserve|      Must be even-aligned.
3512 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3513 //        |        |        |
3514 //
3515 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3516 //         known from SELF's arguments and the Java calling convention.
3517 //         Region 6-7 is determined per call site.
3518 // Note 2: If the calling convention leaves holes in the incoming argument
3519 //         area, those holes are owned by SELF.  Holes in the outgoing area
3520 //         are owned by the CALLEE.  Holes should not be nessecary in the
3521 //         incoming area, as the Java calling convention is completely under
3522 //         the control of the AD file.  Doubles can be sorted and packed to
3523 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3524 //         varargs C calling conventions.
3525 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3526 //         even aligned with pad0 as needed.
3527 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3528 //           (the latter is true on Intel but is it false on AArch64?)
3529 //         region 6-11 is even aligned; it may be padded out more so that
3530 //         the region from SP to FP meets the minimum stack alignment.
3531 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3532 //         alignment.  Region 11, pad1, may be dynamically extended so that
3533 //         SP meets the minimum alignment.
3534 
3535 frame %{
3536   // What direction does stack grow in (assumed to be same for C & Java)
3537   stack_direction(TOWARDS_LOW);
3538 
3539   // These three registers define part of the calling convention
3540   // between compiled code and the interpreter.
3541 
3542   // Inline Cache Register or methodOop for I2C.
3543   inline_cache_reg(R12);
3544 
3545   // Method Oop Register when calling interpreter.
3546   interpreter_method_oop_reg(R12);
3547 
3548   // Number of stack slots consumed by locking an object
3549   sync_stack_slots(2);
3550 
3551   // Compiled code's Frame Pointer
3552   frame_pointer(R31);
3553 
3554   // Interpreter stores its frame pointer in a register which is
3555   // stored to the stack by I2CAdaptors.
3556   // I2CAdaptors convert from interpreted java to compiled java.
3557   interpreter_frame_pointer(R29);
3558 
3559   // Stack alignment requirement
3560   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3561 
3562   // Number of stack slots between incoming argument block and the start of
3563   // a new frame.  The PROLOG must add this many slots to the stack.  The
3564   // EPILOG must remove this many slots. aarch64 needs two slots for
3565   // return address and fp.
3566   // TODO think this is correct but check
3567   in_preserve_stack_slots(4);
3568 
3569   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3570   // for calls to C.  Supports the var-args backing area for register parms.
3571   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3572 
3573   // The after-PROLOG location of the return address.  Location of
3574   // return address specifies a type (REG or STACK) and a number
3575   // representing the register number (i.e. - use a register name) or
3576   // stack slot.
3577   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3578   // Otherwise, it is above the locks and verification slot and alignment word
3579   // TODO this may well be correct but need to check why that - 2 is there
3580   // ppc port uses 0 but we definitely need to allow for fixed_slots
3581   // which folds in the space used for monitors
3582   return_addr(STACK - 2 +
3583               align_up((Compile::current()->in_preserve_stack_slots() +
3584                         Compile::current()->fixed_slots()),
3585                        stack_alignment_in_slots()));
3586 
3587   // Body of function which returns an integer array locating
3588   // arguments either in registers or in stack slots.  Passed an array
3589   // of ideal registers called "sig" and a "length" count.  Stack-slot
3590   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3591   // arguments for a CALLEE.  Incoming stack arguments are
3592   // automatically biased by the preserve_stack_slots field above.
3593 
3594   calling_convention
3595   %{
3596     // No difference between ingoing/outgoing just pass false
3597     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3598   %}
3599 
3600   c_calling_convention
3601   %{
3602     // This is obviously always outgoing
3603     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
3604   %}
3605 
3606   // Location of compiled Java return values.  Same as C for now.
3607   return_value
3608   %{
3609     // TODO do we allow ideal_reg == Op_RegN???
3610     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3611            "only return normal values");
3612 
3613     static const int lo[Op_RegL + 1] = { // enum name
3614       0,                                 // Op_Node
3615       0,                                 // Op_Set
3616       R0_num,                            // Op_RegN
3617       R0_num,                            // Op_RegI
3618       R0_num,                            // Op_RegP
3619       V0_num,                            // Op_RegF
3620       V0_num,                            // Op_RegD
3621       R0_num                             // Op_RegL
3622     };
3623 
3624     static const int hi[Op_RegL + 1] = { // enum name
3625       0,                                 // Op_Node
3626       0,                                 // Op_Set
3627       OptoReg::Bad,                       // Op_RegN
3628       OptoReg::Bad,                      // Op_RegI
3629       R0_H_num,                          // Op_RegP
3630       OptoReg::Bad,                      // Op_RegF
3631       V0_H_num,                          // Op_RegD
3632       R0_H_num                           // Op_RegL
3633     };
3634 
3635     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3636   %}
3637 %}
3638 
3639 //----------ATTRIBUTES---------------------------------------------------------
3640 //----------Operand Attributes-------------------------------------------------
3641 op_attrib op_cost(1);        // Required cost attribute
3642 
3643 //----------Instruction Attributes---------------------------------------------
3644 ins_attrib ins_cost(INSN_COST); // Required cost attribute
3645 ins_attrib ins_size(32);        // Required size attribute (in bits)
3646 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3647                                 // a non-matching short branch variant
3648                                 // of some long branch?
3649 ins_attrib ins_alignment(4);    // Required alignment attribute (must
3650                                 // be a power of 2) specifies the
3651                                 // alignment that some part of the
3652                                 // instruction (not necessarily the
3653                                 // start) requires.  If > 1, a
3654                                 // compute_padding() function must be
3655                                 // provided for the instruction
3656 
3657 //----------OPERANDS-----------------------------------------------------------
3658 // Operand definitions must precede instruction definitions for correct parsing
3659 // in the ADLC because operands constitute user defined types which are used in
3660 // instruction definitions.
3661 
3662 //----------Simple Operands----------------------------------------------------
3663 
3664 // Integer operands 32 bit
3665 // 32 bit immediate
3666 operand immI()
3667 %{
3668   match(ConI);
3669 
3670   op_cost(0);
3671   format %{ %}
3672   interface(CONST_INTER);
3673 %}
3674 
3675 // 32 bit zero
3676 operand immI0()
3677 %{
3678   predicate(n->get_int() == 0);
3679   match(ConI);
3680 
3681   op_cost(0);
3682   format %{ %}
3683   interface(CONST_INTER);
3684 %}
3685 
3686 // 32 bit unit increment
3687 operand immI_1()
3688 %{
3689   predicate(n->get_int() == 1);
3690   match(ConI);
3691 
3692   op_cost(0);
3693   format %{ %}
3694   interface(CONST_INTER);
3695 %}
3696 
3697 // 32 bit unit decrement
3698 operand immI_M1()
3699 %{
3700   predicate(n->get_int() == -1);
3701   match(ConI);
3702 
3703   op_cost(0);
3704   format %{ %}
3705   interface(CONST_INTER);
3706 %}
3707 
3708 // Shift values for add/sub extension shift
3709 operand immIExt()
3710 %{
3711   predicate(0 <= n->get_int() && (n->get_int() <= 4));
3712   match(ConI);
3713 
3714   op_cost(0);
3715   format %{ %}
3716   interface(CONST_INTER);
3717 %}
3718 
3719 operand immI_le_4()
3720 %{
3721   predicate(n->get_int() <= 4);
3722   match(ConI);
3723 
3724   op_cost(0);
3725   format %{ %}
3726   interface(CONST_INTER);
3727 %}
3728 
3729 operand immI_31()
3730 %{
3731   predicate(n->get_int() == 31);
3732   match(ConI);
3733 
3734   op_cost(0);
3735   format %{ %}
3736   interface(CONST_INTER);
3737 %}
3738 
3739 operand immI_8()
3740 %{
3741   predicate(n->get_int() == 8);
3742   match(ConI);
3743 
3744   op_cost(0);
3745   format %{ %}
3746   interface(CONST_INTER);
3747 %}
3748 
3749 operand immI_16()
3750 %{
3751   predicate(n->get_int() == 16);
3752   match(ConI);
3753 
3754   op_cost(0);
3755   format %{ %}
3756   interface(CONST_INTER);
3757 %}
3758 
3759 operand immI_24()
3760 %{
3761   predicate(n->get_int() == 24);
3762   match(ConI);
3763 
3764   op_cost(0);
3765   format %{ %}
3766   interface(CONST_INTER);
3767 %}
3768 
3769 operand immI_32()
3770 %{
3771   predicate(n->get_int() == 32);
3772   match(ConI);
3773 
3774   op_cost(0);
3775   format %{ %}
3776   interface(CONST_INTER);
3777 %}
3778 
3779 operand immI_48()
3780 %{
3781   predicate(n->get_int() == 48);
3782   match(ConI);
3783 
3784   op_cost(0);
3785   format %{ %}
3786   interface(CONST_INTER);
3787 %}
3788 
3789 operand immI_56()
3790 %{
3791   predicate(n->get_int() == 56);
3792   match(ConI);
3793 
3794   op_cost(0);
3795   format %{ %}
3796   interface(CONST_INTER);
3797 %}
3798 
3799 operand immI_63()
3800 %{
3801   predicate(n->get_int() == 63);
3802   match(ConI);
3803 
3804   op_cost(0);
3805   format %{ %}
3806   interface(CONST_INTER);
3807 %}
3808 
3809 operand immI_64()
3810 %{
3811   predicate(n->get_int() == 64);
3812   match(ConI);
3813 
3814   op_cost(0);
3815   format %{ %}
3816   interface(CONST_INTER);
3817 %}
3818 
3819 operand immI_255()
3820 %{
3821   predicate(n->get_int() == 255);
3822   match(ConI);
3823 
3824   op_cost(0);
3825   format %{ %}
3826   interface(CONST_INTER);
3827 %}
3828 
3829 operand immI_65535()
3830 %{
3831   predicate(n->get_int() == 65535);
3832   match(ConI);
3833 
3834   op_cost(0);
3835   format %{ %}
3836   interface(CONST_INTER);
3837 %}
3838 
3839 operand immL_255()
3840 %{
3841   predicate(n->get_long() == 255L);
3842   match(ConL);
3843 
3844   op_cost(0);
3845   format %{ %}
3846   interface(CONST_INTER);
3847 %}
3848 
3849 operand immL_65535()
3850 %{
3851   predicate(n->get_long() == 65535L);
3852   match(ConL);
3853 
3854   op_cost(0);
3855   format %{ %}
3856   interface(CONST_INTER);
3857 %}
3858 
3859 operand immL_4294967295()
3860 %{
3861   predicate(n->get_long() == 4294967295L);
3862   match(ConL);
3863 
3864   op_cost(0);
3865   format %{ %}
3866   interface(CONST_INTER);
3867 %}
3868 
3869 operand immL_bitmask()
3870 %{
3871   predicate(((n->get_long() & 0xc000000000000000l) == 0)
3872             && is_power_of_2(n->get_long() + 1));
3873   match(ConL);
3874 
3875   op_cost(0);
3876   format %{ %}
3877   interface(CONST_INTER);
3878 %}
3879 
3880 operand immI_bitmask()
3881 %{
3882   predicate(((n->get_int() & 0xc0000000) == 0)
3883             && is_power_of_2(n->get_int() + 1));
3884   match(ConI);
3885 
3886   op_cost(0);
3887   format %{ %}
3888   interface(CONST_INTER);
3889 %}
3890 
3891 // Scale values for scaled offset addressing modes (up to long but not quad)
3892 operand immIScale()
3893 %{
3894   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3895   match(ConI);
3896 
3897   op_cost(0);
3898   format %{ %}
3899   interface(CONST_INTER);
3900 %}
3901 
3902 // 26 bit signed offset -- for pc-relative branches
3903 operand immI26()
3904 %{
3905   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
3906   match(ConI);
3907 
3908   op_cost(0);
3909   format %{ %}
3910   interface(CONST_INTER);
3911 %}
3912 
3913 // 19 bit signed offset -- for pc-relative loads
3914 operand immI19()
3915 %{
3916   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
3917   match(ConI);
3918 
3919   op_cost(0);
3920   format %{ %}
3921   interface(CONST_INTER);
3922 %}
3923 
3924 // 12 bit unsigned offset -- for base plus immediate loads
3925 operand immIU12()
3926 %{
3927   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
3928   match(ConI);
3929 
3930   op_cost(0);
3931   format %{ %}
3932   interface(CONST_INTER);
3933 %}
3934 
3935 operand immLU12()
3936 %{
3937   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
3938   match(ConL);
3939 
3940   op_cost(0);
3941   format %{ %}
3942   interface(CONST_INTER);
3943 %}
3944 
3945 // Offset for scaled or unscaled immediate loads and stores
3946 operand immIOffset()
3947 %{
3948   predicate(Address::offset_ok_for_immed(n->get_int()));
3949   match(ConI);
3950 
3951   op_cost(0);
3952   format %{ %}
3953   interface(CONST_INTER);
3954 %}
3955 
3956 operand immIOffset4()
3957 %{
3958   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
3959   match(ConI);
3960 
3961   op_cost(0);
3962   format %{ %}
3963   interface(CONST_INTER);
3964 %}
3965 
3966 operand immIOffset8()
3967 %{
3968   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
3969   match(ConI);
3970 
3971   op_cost(0);
3972   format %{ %}
3973   interface(CONST_INTER);
3974 %}
3975 
3976 operand immIOffset16()
3977 %{
3978   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
3979   match(ConI);
3980 
3981   op_cost(0);
3982   format %{ %}
3983   interface(CONST_INTER);
3984 %}
3985 
3986 operand immLoffset()
3987 %{
3988   predicate(Address::offset_ok_for_immed(n->get_long()));
3989   match(ConL);
3990 
3991   op_cost(0);
3992   format %{ %}
3993   interface(CONST_INTER);
3994 %}
3995 
3996 operand immLoffset4()
3997 %{
3998   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
3999   match(ConL);
4000 
4001   op_cost(0);
4002   format %{ %}
4003   interface(CONST_INTER);
4004 %}
4005 
4006 operand immLoffset8()
4007 %{
4008   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
4009   match(ConL);
4010 
4011   op_cost(0);
4012   format %{ %}
4013   interface(CONST_INTER);
4014 %}
4015 
4016 operand immLoffset16()
4017 %{
4018   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
4019   match(ConL);
4020 
4021   op_cost(0);
4022   format %{ %}
4023   interface(CONST_INTER);
4024 %}
4025 
4026 // 32 bit integer valid for add sub immediate
4027 operand immIAddSub()
4028 %{
4029   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4030   match(ConI);
4031   op_cost(0);
4032   format %{ %}
4033   interface(CONST_INTER);
4034 %}
4035 
4036 // 32 bit unsigned integer valid for logical immediate
4037 // TODO -- check this is right when e.g the mask is 0x80000000
4038 operand immILog()
4039 %{
4040   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4041   match(ConI);
4042 
4043   op_cost(0);
4044   format %{ %}
4045   interface(CONST_INTER);
4046 %}
4047 
4048 // Integer operands 64 bit
4049 // 64 bit immediate
4050 operand immL()
4051 %{
4052   match(ConL);
4053 
4054   op_cost(0);
4055   format %{ %}
4056   interface(CONST_INTER);
4057 %}
4058 
4059 // 64 bit zero
4060 operand immL0()
4061 %{
4062   predicate(n->get_long() == 0);
4063   match(ConL);
4064 
4065   op_cost(0);
4066   format %{ %}
4067   interface(CONST_INTER);
4068 %}
4069 
4070 // 64 bit unit increment
4071 operand immL_1()
4072 %{
4073   predicate(n->get_long() == 1);
4074   match(ConL);
4075 
4076   op_cost(0);
4077   format %{ %}
4078   interface(CONST_INTER);
4079 %}
4080 
4081 // 64 bit unit decrement
4082 operand immL_M1()
4083 %{
4084   predicate(n->get_long() == -1);
4085   match(ConL);
4086 
4087   op_cost(0);
4088   format %{ %}
4089   interface(CONST_INTER);
4090 %}
4091 
4092 // 32 bit offset of pc in thread anchor
4093 
4094 operand immL_pc_off()
4095 %{
4096   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4097                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4098   match(ConL);
4099 
4100   op_cost(0);
4101   format %{ %}
4102   interface(CONST_INTER);
4103 %}
4104 
4105 // 64 bit integer valid for add sub immediate
4106 operand immLAddSub()
4107 %{
4108   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4109   match(ConL);
4110   op_cost(0);
4111   format %{ %}
4112   interface(CONST_INTER);
4113 %}
4114 
4115 // 64 bit integer valid for logical immediate
4116 operand immLLog()
4117 %{
4118   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4119   match(ConL);
4120   op_cost(0);
4121   format %{ %}
4122   interface(CONST_INTER);
4123 %}
4124 
4125 // Long Immediate: low 32-bit mask
4126 operand immL_32bits()
4127 %{
4128   predicate(n->get_long() == 0xFFFFFFFFL);
4129   match(ConL);
4130   op_cost(0);
4131   format %{ %}
4132   interface(CONST_INTER);
4133 %}
4134 
4135 // Pointer operands
4136 // Pointer Immediate
4137 operand immP()
4138 %{
4139   match(ConP);
4140 
4141   op_cost(0);
4142   format %{ %}
4143   interface(CONST_INTER);
4144 %}
4145 
4146 // NULL Pointer Immediate
4147 operand immP0()
4148 %{
4149   predicate(n->get_ptr() == 0);
4150   match(ConP);
4151 
4152   op_cost(0);
4153   format %{ %}
4154   interface(CONST_INTER);
4155 %}
4156 
4157 // Pointer Immediate One
4158 // this is used in object initialization (initial object header)
4159 operand immP_1()
4160 %{
4161   predicate(n->get_ptr() == 1);
4162   match(ConP);
4163 
4164   op_cost(0);
4165   format %{ %}
4166   interface(CONST_INTER);
4167 %}
4168 
4169 // Polling Page Pointer Immediate
4170 operand immPollPage()
4171 %{
4172   predicate((address)n->get_ptr() == os::get_polling_page());
4173   match(ConP);
4174 
4175   op_cost(0);
4176   format %{ %}
4177   interface(CONST_INTER);
4178 %}
4179 
4180 // Card Table Byte Map Base
4181 operand immByteMapBase()
4182 %{
4183   // Get base of card map
4184   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
4185             (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
4186   match(ConP);
4187 
4188   op_cost(0);
4189   format %{ %}
4190   interface(CONST_INTER);
4191 %}
4192 
4193 // Pointer Immediate Minus One
4194 // this is used when we want to write the current PC to the thread anchor
4195 operand immP_M1()
4196 %{
4197   predicate(n->get_ptr() == -1);
4198   match(ConP);
4199 
4200   op_cost(0);
4201   format %{ %}
4202   interface(CONST_INTER);
4203 %}
4204 
4205 // Pointer Immediate Minus Two
4206 // this is used when we want to write the current PC to the thread anchor
4207 operand immP_M2()
4208 %{
4209   predicate(n->get_ptr() == -2);
4210   match(ConP);
4211 
4212   op_cost(0);
4213   format %{ %}
4214   interface(CONST_INTER);
4215 %}
4216 
4217 // Float and Double operands
4218 // Double Immediate
4219 operand immD()
4220 %{
4221   match(ConD);
4222   op_cost(0);
4223   format %{ %}
4224   interface(CONST_INTER);
4225 %}
4226 
4227 // Double Immediate: +0.0d
4228 operand immD0()
4229 %{
4230   predicate(jlong_cast(n->getd()) == 0);
4231   match(ConD);
4232 
4233   op_cost(0);
4234   format %{ %}
4235   interface(CONST_INTER);
4236 %}
4237 
4238 // constant 'double +0.0'.
4239 operand immDPacked()
4240 %{
4241   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4242   match(ConD);
4243   op_cost(0);
4244   format %{ %}
4245   interface(CONST_INTER);
4246 %}
4247 
4248 // Float Immediate
4249 operand immF()
4250 %{
4251   match(ConF);
4252   op_cost(0);
4253   format %{ %}
4254   interface(CONST_INTER);
4255 %}
4256 
4257 // Float Immediate: +0.0f.
4258 operand immF0()
4259 %{
4260   predicate(jint_cast(n->getf()) == 0);
4261   match(ConF);
4262 
4263   op_cost(0);
4264   format %{ %}
4265   interface(CONST_INTER);
4266 %}
4267 
4268 //
4269 operand immFPacked()
4270 %{
4271   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4272   match(ConF);
4273   op_cost(0);
4274   format %{ %}
4275   interface(CONST_INTER);
4276 %}
4277 
4278 // Narrow pointer operands
4279 // Narrow Pointer Immediate
4280 operand immN()
4281 %{
4282   match(ConN);
4283 
4284   op_cost(0);
4285   format %{ %}
4286   interface(CONST_INTER);
4287 %}
4288 
4289 // Narrow NULL Pointer Immediate
4290 operand immN0()
4291 %{
4292   predicate(n->get_narrowcon() == 0);
4293   match(ConN);
4294 
4295   op_cost(0);
4296   format %{ %}
4297   interface(CONST_INTER);
4298 %}
4299 
4300 operand immNKlass()
4301 %{
4302   match(ConNKlass);
4303 
4304   op_cost(0);
4305   format %{ %}
4306   interface(CONST_INTER);
4307 %}
4308 
4309 // Integer 32 bit Register Operands
4310 // Integer 32 bitRegister (excludes SP)
4311 operand iRegI()
4312 %{
4313   constraint(ALLOC_IN_RC(any_reg32));
4314   match(RegI);
4315   match(iRegINoSp);
4316   op_cost(0);
4317   format %{ %}
4318   interface(REG_INTER);
4319 %}
4320 
4321 // Integer 32 bit Register not Special
4322 operand iRegINoSp()
4323 %{
4324   constraint(ALLOC_IN_RC(no_special_reg32));
4325   match(RegI);
4326   op_cost(0);
4327   format %{ %}
4328   interface(REG_INTER);
4329 %}
4330 
4331 // Integer 64 bit Register Operands
4332 // Integer 64 bit Register (includes SP)
4333 operand iRegL()
4334 %{
4335   constraint(ALLOC_IN_RC(any_reg));
4336   match(RegL);
4337   match(iRegLNoSp);
4338   op_cost(0);
4339   format %{ %}
4340   interface(REG_INTER);
4341 %}
4342 
4343 // Integer 64 bit Register not Special
4344 operand iRegLNoSp()
4345 %{
4346   constraint(ALLOC_IN_RC(no_special_reg));
4347   match(RegL);
4348   match(iRegL_R0);
4349   format %{ %}
4350   interface(REG_INTER);
4351 %}
4352 
4353 // Pointer Register Operands
4354 // Pointer Register
4355 operand iRegP()
4356 %{
4357   constraint(ALLOC_IN_RC(ptr_reg));
4358   match(RegP);
4359   match(iRegPNoSp);
4360   match(iRegP_R0);
4361   //match(iRegP_R2);
4362   //match(iRegP_R4);
4363   //match(iRegP_R5);
4364   match(thread_RegP);
4365   op_cost(0);
4366   format %{ %}
4367   interface(REG_INTER);
4368 %}
4369 
4370 // Pointer 64 bit Register not Special
4371 operand iRegPNoSp()
4372 %{
4373   constraint(ALLOC_IN_RC(no_special_ptr_reg));
4374   match(RegP);
4375   // match(iRegP);
4376   // match(iRegP_R0);
4377   // match(iRegP_R2);
4378   // match(iRegP_R4);
4379   // match(iRegP_R5);
4380   // match(thread_RegP);
4381   op_cost(0);
4382   format %{ %}
4383   interface(REG_INTER);
4384 %}
4385 
4386 // Pointer 64 bit Register R0 only
4387 operand iRegP_R0()
4388 %{
4389   constraint(ALLOC_IN_RC(r0_reg));
4390   match(RegP);
4391   // match(iRegP);
4392   match(iRegPNoSp);
4393   op_cost(0);
4394   format %{ %}
4395   interface(REG_INTER);
4396 %}
4397 
4398 // Pointer 64 bit Register R1 only
4399 operand iRegP_R1()
4400 %{
4401   constraint(ALLOC_IN_RC(r1_reg));
4402   match(RegP);
4403   // match(iRegP);
4404   match(iRegPNoSp);
4405   op_cost(0);
4406   format %{ %}
4407   interface(REG_INTER);
4408 %}
4409 
4410 // Pointer 64 bit Register R2 only
4411 operand iRegP_R2()
4412 %{
4413   constraint(ALLOC_IN_RC(r2_reg));
4414   match(RegP);
4415   // match(iRegP);
4416   match(iRegPNoSp);
4417   op_cost(0);
4418   format %{ %}
4419   interface(REG_INTER);
4420 %}
4421 
4422 // Pointer 64 bit Register R3 only
4423 operand iRegP_R3()
4424 %{
4425   constraint(ALLOC_IN_RC(r3_reg));
4426   match(RegP);
4427   // match(iRegP);
4428   match(iRegPNoSp);
4429   op_cost(0);
4430   format %{ %}
4431   interface(REG_INTER);
4432 %}
4433 
4434 // Pointer 64 bit Register R4 only
4435 operand iRegP_R4()
4436 %{
4437   constraint(ALLOC_IN_RC(r4_reg));
4438   match(RegP);
4439   // match(iRegP);
4440   match(iRegPNoSp);
4441   op_cost(0);
4442   format %{ %}
4443   interface(REG_INTER);
4444 %}
4445 
4446 // Pointer 64 bit Register R5 only
4447 operand iRegP_R5()
4448 %{
4449   constraint(ALLOC_IN_RC(r5_reg));
4450   match(RegP);
4451   // match(iRegP);
4452   match(iRegPNoSp);
4453   op_cost(0);
4454   format %{ %}
4455   interface(REG_INTER);
4456 %}
4457 
4458 // Pointer 64 bit Register R10 only
4459 operand iRegP_R10()
4460 %{
4461   constraint(ALLOC_IN_RC(r10_reg));
4462   match(RegP);
4463   // match(iRegP);
4464   match(iRegPNoSp);
4465   op_cost(0);
4466   format %{ %}
4467   interface(REG_INTER);
4468 %}
4469 
4470 // Long 64 bit Register R0 only
4471 operand iRegL_R0()
4472 %{
4473   constraint(ALLOC_IN_RC(r0_reg));
4474   match(RegL);
4475   match(iRegLNoSp);
4476   op_cost(0);
4477   format %{ %}
4478   interface(REG_INTER);
4479 %}
4480 
4481 // Long 64 bit Register R2 only
4482 operand iRegL_R2()
4483 %{
4484   constraint(ALLOC_IN_RC(r2_reg));
4485   match(RegL);
4486   match(iRegLNoSp);
4487   op_cost(0);
4488   format %{ %}
4489   interface(REG_INTER);
4490 %}
4491 
4492 // Long 64 bit Register R3 only
4493 operand iRegL_R3()
4494 %{
4495   constraint(ALLOC_IN_RC(r3_reg));
4496   match(RegL);
4497   match(iRegLNoSp);
4498   op_cost(0);
4499   format %{ %}
4500   interface(REG_INTER);
4501 %}
4502 
4503 // Long 64 bit Register R11 only
4504 operand iRegL_R11()
4505 %{
4506   constraint(ALLOC_IN_RC(r11_reg));
4507   match(RegL);
4508   match(iRegLNoSp);
4509   op_cost(0);
4510   format %{ %}
4511   interface(REG_INTER);
4512 %}
4513 
4514 // Pointer 64 bit Register FP only
4515 operand iRegP_FP()
4516 %{
4517   constraint(ALLOC_IN_RC(fp_reg));
4518   match(RegP);
4519   // match(iRegP);
4520   op_cost(0);
4521   format %{ %}
4522   interface(REG_INTER);
4523 %}
4524 
4525 // Register R0 only
4526 operand iRegI_R0()
4527 %{
4528   constraint(ALLOC_IN_RC(int_r0_reg));
4529   match(RegI);
4530   match(iRegINoSp);
4531   op_cost(0);
4532   format %{ %}
4533   interface(REG_INTER);
4534 %}
4535 
4536 // Register R2 only
4537 operand iRegI_R2()
4538 %{
4539   constraint(ALLOC_IN_RC(int_r2_reg));
4540   match(RegI);
4541   match(iRegINoSp);
4542   op_cost(0);
4543   format %{ %}
4544   interface(REG_INTER);
4545 %}
4546 
4547 // Register R3 only
4548 operand iRegI_R3()
4549 %{
4550   constraint(ALLOC_IN_RC(int_r3_reg));
4551   match(RegI);
4552   match(iRegINoSp);
4553   op_cost(0);
4554   format %{ %}
4555   interface(REG_INTER);
4556 %}
4557 
4558 
4559 // Register R4 only
4560 operand iRegI_R4()
4561 %{
4562   constraint(ALLOC_IN_RC(int_r4_reg));
4563   match(RegI);
4564   match(iRegINoSp);
4565   op_cost(0);
4566   format %{ %}
4567   interface(REG_INTER);
4568 %}
4569 
4570 
4571 // Pointer Register Operands
4572 // Narrow Pointer Register
4573 operand iRegN()
4574 %{
4575   constraint(ALLOC_IN_RC(any_reg32));
4576   match(RegN);
4577   match(iRegNNoSp);
4578   op_cost(0);
4579   format %{ %}
4580   interface(REG_INTER);
4581 %}
4582 
4583 operand iRegN_R0()
4584 %{
4585   constraint(ALLOC_IN_RC(r0_reg));
4586   match(iRegN);
4587   op_cost(0);
4588   format %{ %}
4589   interface(REG_INTER);
4590 %}
4591 
4592 operand iRegN_R2()
4593 %{
4594   constraint(ALLOC_IN_RC(r2_reg));
4595   match(iRegN);
4596   op_cost(0);
4597   format %{ %}
4598   interface(REG_INTER);
4599 %}
4600 
4601 operand iRegN_R3()
4602 %{
4603   constraint(ALLOC_IN_RC(r3_reg));
4604   match(iRegN);
4605   op_cost(0);
4606   format %{ %}
4607   interface(REG_INTER);
4608 %}
4609 
4610 // Integer 64 bit Register not Special
4611 operand iRegNNoSp()
4612 %{
4613   constraint(ALLOC_IN_RC(no_special_reg32));
4614   match(RegN);
4615   op_cost(0);
4616   format %{ %}
4617   interface(REG_INTER);
4618 %}
4619 
4620 // heap base register -- used for encoding immN0
4621 
4622 operand iRegIHeapbase()
4623 %{
4624   constraint(ALLOC_IN_RC(heapbase_reg));
4625   match(RegI);
4626   op_cost(0);
4627   format %{ %}
4628   interface(REG_INTER);
4629 %}
4630 
4631 // Float Register
4632 // Float register operands
4633 operand vRegF()
4634 %{
4635   constraint(ALLOC_IN_RC(float_reg));
4636   match(RegF);
4637 
4638   op_cost(0);
4639   format %{ %}
4640   interface(REG_INTER);
4641 %}
4642 
4643 // Double Register
4644 // Double register operands
4645 operand vRegD()
4646 %{
4647   constraint(ALLOC_IN_RC(double_reg));
4648   match(RegD);
4649 
4650   op_cost(0);
4651   format %{ %}
4652   interface(REG_INTER);
4653 %}
4654 
4655 operand vecD()
4656 %{
4657   constraint(ALLOC_IN_RC(vectord_reg));
4658   match(VecD);
4659 
4660   op_cost(0);
4661   format %{ %}
4662   interface(REG_INTER);
4663 %}
4664 
4665 operand vecX()
4666 %{
4667   constraint(ALLOC_IN_RC(vectorx_reg));
4668   match(VecX);
4669 
4670   op_cost(0);
4671   format %{ %}
4672   interface(REG_INTER);
4673 %}
4674 
4675 operand vRegD_V0()
4676 %{
4677   constraint(ALLOC_IN_RC(v0_reg));
4678   match(RegD);
4679   op_cost(0);
4680   format %{ %}
4681   interface(REG_INTER);
4682 %}
4683 
4684 operand vRegD_V1()
4685 %{
4686   constraint(ALLOC_IN_RC(v1_reg));
4687   match(RegD);
4688   op_cost(0);
4689   format %{ %}
4690   interface(REG_INTER);
4691 %}
4692 
4693 operand vRegD_V2()
4694 %{
4695   constraint(ALLOC_IN_RC(v2_reg));
4696   match(RegD);
4697   op_cost(0);
4698   format %{ %}
4699   interface(REG_INTER);
4700 %}
4701 
4702 operand vRegD_V3()
4703 %{
4704   constraint(ALLOC_IN_RC(v3_reg));
4705   match(RegD);
4706   op_cost(0);
4707   format %{ %}
4708   interface(REG_INTER);
4709 %}
4710 
4711 // Flags register, used as output of signed compare instructions
4712 
4713 // note that on AArch64 we also use this register as the output for
4714 // for floating point compare instructions (CmpF CmpD). this ensures
4715 // that ordered inequality tests use GT, GE, LT or LE none of which
4716 // pass through cases where the result is unordered i.e. one or both
4717 // inputs to the compare is a NaN. this means that the ideal code can
4718 // replace e.g. a GT with an LE and not end up capturing the NaN case
4719 // (where the comparison should always fail). EQ and NE tests are
4720 // always generated in ideal code so that unordered folds into the NE
4721 // case, matching the behaviour of AArch64 NE.
4722 //
4723 // This differs from x86 where the outputs of FP compares use a
4724 // special FP flags registers and where compares based on this
4725 // register are distinguished into ordered inequalities (cmpOpUCF) and
4726 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
4727 // to explicitly handle the unordered case in branches. x86 also has
4728 // to include extra CMoveX rules to accept a cmpOpUCF input.
4729 
4730 operand rFlagsReg()
4731 %{
4732   constraint(ALLOC_IN_RC(int_flags));
4733   match(RegFlags);
4734 
4735   op_cost(0);
4736   format %{ "RFLAGS" %}
4737   interface(REG_INTER);
4738 %}
4739 
4740 // Flags register, used as output of unsigned compare instructions
4741 operand rFlagsRegU()
4742 %{
4743   constraint(ALLOC_IN_RC(int_flags));
4744   match(RegFlags);
4745 
4746   op_cost(0);
4747   format %{ "RFLAGSU" %}
4748   interface(REG_INTER);
4749 %}
4750 
4751 // Special Registers
4752 
4753 // Method Register
4754 operand inline_cache_RegP(iRegP reg)
4755 %{
4756   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
4757   match(reg);
4758   match(iRegPNoSp);
4759   op_cost(0);
4760   format %{ %}
4761   interface(REG_INTER);
4762 %}
4763 
4764 operand interpreter_method_oop_RegP(iRegP reg)
4765 %{
4766   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
4767   match(reg);
4768   match(iRegPNoSp);
4769   op_cost(0);
4770   format %{ %}
4771   interface(REG_INTER);
4772 %}
4773 
4774 // Thread Register
4775 operand thread_RegP(iRegP reg)
4776 %{
4777   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
4778   match(reg);
4779   op_cost(0);
4780   format %{ %}
4781   interface(REG_INTER);
4782 %}
4783 
4784 operand lr_RegP(iRegP reg)
4785 %{
4786   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
4787   match(reg);
4788   op_cost(0);
4789   format %{ %}
4790   interface(REG_INTER);
4791 %}
4792 
4793 //----------Memory Operands----------------------------------------------------
4794 
4795 operand indirect(iRegP reg)
4796 %{
4797   constraint(ALLOC_IN_RC(ptr_reg));
4798   match(reg);
4799   op_cost(0);
4800   format %{ "[$reg]" %}
4801   interface(MEMORY_INTER) %{
4802     base($reg);
4803     index(0xffffffff);
4804     scale(0x0);
4805     disp(0x0);
4806   %}
4807 %}
4808 
4809 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
4810 %{
4811   constraint(ALLOC_IN_RC(ptr_reg));
4812   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
4813   match(AddP reg (LShiftL (ConvI2L ireg) scale));
4814   op_cost(0);
4815   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
4816   interface(MEMORY_INTER) %{
4817     base($reg);
4818     index($ireg);
4819     scale($scale);
4820     disp(0x0);
4821   %}
4822 %}
4823 
4824 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
4825 %{
4826   constraint(ALLOC_IN_RC(ptr_reg));
4827   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
4828   match(AddP reg (LShiftL lreg scale));
4829   op_cost(0);
4830   format %{ "$reg, $lreg lsl($scale)" %}
4831   interface(MEMORY_INTER) %{
4832     base($reg);
4833     index($lreg);
4834     scale($scale);
4835     disp(0x0);
4836   %}
4837 %}
4838 
4839 operand indIndexI2L(iRegP reg, iRegI ireg)
4840 %{
4841   constraint(ALLOC_IN_RC(ptr_reg));
4842   match(AddP reg (ConvI2L ireg));
4843   op_cost(0);
4844   format %{ "$reg, $ireg, 0, I2L" %}
4845   interface(MEMORY_INTER) %{
4846     base($reg);
4847     index($ireg);
4848     scale(0x0);
4849     disp(0x0);
4850   %}
4851 %}
4852 
4853 operand indIndex(iRegP reg, iRegL lreg)
4854 %{
4855   constraint(ALLOC_IN_RC(ptr_reg));
4856   match(AddP reg lreg);
4857   op_cost(0);
4858   format %{ "$reg, $lreg" %}
4859   interface(MEMORY_INTER) %{
4860     base($reg);
4861     index($lreg);
4862     scale(0x0);
4863     disp(0x0);
4864   %}
4865 %}
4866 
4867 operand indOffI(iRegP reg, immIOffset off)
4868 %{
4869   constraint(ALLOC_IN_RC(ptr_reg));
4870   match(AddP reg off);
4871   op_cost(0);
4872   format %{ "[$reg, $off]" %}
4873   interface(MEMORY_INTER) %{
4874     base($reg);
4875     index(0xffffffff);
4876     scale(0x0);
4877     disp($off);
4878   %}
4879 %}
4880 
4881 operand indOffI4(iRegP reg, immIOffset4 off)
4882 %{
4883   constraint(ALLOC_IN_RC(ptr_reg));
4884   match(AddP reg off);
4885   op_cost(0);
4886   format %{ "[$reg, $off]" %}
4887   interface(MEMORY_INTER) %{
4888     base($reg);
4889     index(0xffffffff);
4890     scale(0x0);
4891     disp($off);
4892   %}
4893 %}
4894 
4895 operand indOffI8(iRegP reg, immIOffset8 off)
4896 %{
4897   constraint(ALLOC_IN_RC(ptr_reg));
4898   match(AddP reg off);
4899   op_cost(0);
4900   format %{ "[$reg, $off]" %}
4901   interface(MEMORY_INTER) %{
4902     base($reg);
4903     index(0xffffffff);
4904     scale(0x0);
4905     disp($off);
4906   %}
4907 %}
4908 
4909 operand indOffI16(iRegP reg, immIOffset16 off)
4910 %{
4911   constraint(ALLOC_IN_RC(ptr_reg));
4912   match(AddP reg off);
4913   op_cost(0);
4914   format %{ "[$reg, $off]" %}
4915   interface(MEMORY_INTER) %{
4916     base($reg);
4917     index(0xffffffff);
4918     scale(0x0);
4919     disp($off);
4920   %}
4921 %}
4922 
4923 operand indOffL(iRegP reg, immLoffset off)
4924 %{
4925   constraint(ALLOC_IN_RC(ptr_reg));
4926   match(AddP reg off);
4927   op_cost(0);
4928   format %{ "[$reg, $off]" %}
4929   interface(MEMORY_INTER) %{
4930     base($reg);
4931     index(0xffffffff);
4932     scale(0x0);
4933     disp($off);
4934   %}
4935 %}
4936 
4937 operand indOffL4(iRegP reg, immLoffset4 off)
4938 %{
4939   constraint(ALLOC_IN_RC(ptr_reg));
4940   match(AddP reg off);
4941   op_cost(0);
4942   format %{ "[$reg, $off]" %}
4943   interface(MEMORY_INTER) %{
4944     base($reg);
4945     index(0xffffffff);
4946     scale(0x0);
4947     disp($off);
4948   %}
4949 %}
4950 
4951 operand indOffL8(iRegP reg, immLoffset8 off)
4952 %{
4953   constraint(ALLOC_IN_RC(ptr_reg));
4954   match(AddP reg off);
4955   op_cost(0);
4956   format %{ "[$reg, $off]" %}
4957   interface(MEMORY_INTER) %{
4958     base($reg);
4959     index(0xffffffff);
4960     scale(0x0);
4961     disp($off);
4962   %}
4963 %}
4964 
4965 operand indOffL16(iRegP reg, immLoffset16 off)
4966 %{
4967   constraint(ALLOC_IN_RC(ptr_reg));
4968   match(AddP reg off);
4969   op_cost(0);
4970   format %{ "[$reg, $off]" %}
4971   interface(MEMORY_INTER) %{
4972     base($reg);
4973     index(0xffffffff);
4974     scale(0x0);
4975     disp($off);
4976   %}
4977 %}
4978 
4979 operand indirectN(iRegN reg)
4980 %{
4981   predicate(Universe::narrow_oop_shift() == 0);
4982   constraint(ALLOC_IN_RC(ptr_reg));
4983   match(DecodeN reg);
4984   op_cost(0);
4985   format %{ "[$reg]\t# narrow" %}
4986   interface(MEMORY_INTER) %{
4987     base($reg);
4988     index(0xffffffff);
4989     scale(0x0);
4990     disp(0x0);
4991   %}
4992 %}
4993 
4994 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
4995 %{
4996   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
4997   constraint(ALLOC_IN_RC(ptr_reg));
4998   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
4999   op_cost(0);
5000   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5001   interface(MEMORY_INTER) %{
5002     base($reg);
5003     index($ireg);
5004     scale($scale);
5005     disp(0x0);
5006   %}
5007 %}
5008 
5009 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5010 %{
5011   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5012   constraint(ALLOC_IN_RC(ptr_reg));
5013   match(AddP (DecodeN reg) (LShiftL lreg scale));
5014   op_cost(0);
5015   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5016   interface(MEMORY_INTER) %{
5017     base($reg);
5018     index($lreg);
5019     scale($scale);
5020     disp(0x0);
5021   %}
5022 %}
5023 
5024 operand indIndexI2LN(iRegN reg, iRegI ireg)
5025 %{
5026   predicate(Universe::narrow_oop_shift() == 0);
5027   constraint(ALLOC_IN_RC(ptr_reg));
5028   match(AddP (DecodeN reg) (ConvI2L ireg));
5029   op_cost(0);
5030   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
5031   interface(MEMORY_INTER) %{
5032     base($reg);
5033     index($ireg);
5034     scale(0x0);
5035     disp(0x0);
5036   %}
5037 %}
5038 
5039 operand indIndexN(iRegN reg, iRegL lreg)
5040 %{
5041   predicate(Universe::narrow_oop_shift() == 0);
5042   constraint(ALLOC_IN_RC(ptr_reg));
5043   match(AddP (DecodeN reg) lreg);
5044   op_cost(0);
5045   format %{ "$reg, $lreg\t# narrow" %}
5046   interface(MEMORY_INTER) %{
5047     base($reg);
5048     index($lreg);
5049     scale(0x0);
5050     disp(0x0);
5051   %}
5052 %}
5053 
5054 operand indOffIN(iRegN reg, immIOffset off)
5055 %{
5056   predicate(Universe::narrow_oop_shift() == 0);
5057   constraint(ALLOC_IN_RC(ptr_reg));
5058   match(AddP (DecodeN reg) off);
5059   op_cost(0);
5060   format %{ "[$reg, $off]\t# narrow" %}
5061   interface(MEMORY_INTER) %{
5062     base($reg);
5063     index(0xffffffff);
5064     scale(0x0);
5065     disp($off);
5066   %}
5067 %}
5068 
5069 operand indOffLN(iRegN reg, immLoffset off)
5070 %{
5071   predicate(Universe::narrow_oop_shift() == 0);
5072   constraint(ALLOC_IN_RC(ptr_reg));
5073   match(AddP (DecodeN reg) off);
5074   op_cost(0);
5075   format %{ "[$reg, $off]\t# narrow" %}
5076   interface(MEMORY_INTER) %{
5077     base($reg);
5078     index(0xffffffff);
5079     scale(0x0);
5080     disp($off);
5081   %}
5082 %}
5083 
5084 
5085 
5086 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5087 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5088 %{
5089   constraint(ALLOC_IN_RC(ptr_reg));
5090   match(AddP reg off);
5091   op_cost(0);
5092   format %{ "[$reg, $off]" %}
5093   interface(MEMORY_INTER) %{
5094     base($reg);
5095     index(0xffffffff);
5096     scale(0x0);
5097     disp($off);
5098   %}
5099 %}
5100 
5101 //----------Special Memory Operands--------------------------------------------
5102 // Stack Slot Operand - This operand is used for loading and storing temporary
5103 //                      values on the stack where a match requires a value to
5104 //                      flow through memory.
5105 operand stackSlotP(sRegP reg)
5106 %{
5107   constraint(ALLOC_IN_RC(stack_slots));
5108   op_cost(100);
5109   // No match rule because this operand is only generated in matching
5110   // match(RegP);
5111   format %{ "[$reg]" %}
5112   interface(MEMORY_INTER) %{
5113     base(0x1e);  // RSP
5114     index(0x0);  // No Index
5115     scale(0x0);  // No Scale
5116     disp($reg);  // Stack Offset
5117   %}
5118 %}
5119 
5120 operand stackSlotI(sRegI reg)
5121 %{
5122   constraint(ALLOC_IN_RC(stack_slots));
5123   // No match rule because this operand is only generated in matching
5124   // match(RegI);
5125   format %{ "[$reg]" %}
5126   interface(MEMORY_INTER) %{
5127     base(0x1e);  // RSP
5128     index(0x0);  // No Index
5129     scale(0x0);  // No Scale
5130     disp($reg);  // Stack Offset
5131   %}
5132 %}
5133 
5134 operand stackSlotF(sRegF reg)
5135 %{
5136   constraint(ALLOC_IN_RC(stack_slots));
5137   // No match rule because this operand is only generated in matching
5138   // match(RegF);
5139   format %{ "[$reg]" %}
5140   interface(MEMORY_INTER) %{
5141     base(0x1e);  // RSP
5142     index(0x0);  // No Index
5143     scale(0x0);  // No Scale
5144     disp($reg);  // Stack Offset
5145   %}
5146 %}
5147 
5148 operand stackSlotD(sRegD reg)
5149 %{
5150   constraint(ALLOC_IN_RC(stack_slots));
5151   // No match rule because this operand is only generated in matching
5152   // match(RegD);
5153   format %{ "[$reg]" %}
5154   interface(MEMORY_INTER) %{
5155     base(0x1e);  // RSP
5156     index(0x0);  // No Index
5157     scale(0x0);  // No Scale
5158     disp($reg);  // Stack Offset
5159   %}
5160 %}
5161 
5162 operand stackSlotL(sRegL reg)
5163 %{
5164   constraint(ALLOC_IN_RC(stack_slots));
5165   // No match rule because this operand is only generated in matching
5166   // match(RegL);
5167   format %{ "[$reg]" %}
5168   interface(MEMORY_INTER) %{
5169     base(0x1e);  // RSP
5170     index(0x0);  // No Index
5171     scale(0x0);  // No Scale
5172     disp($reg);  // Stack Offset
5173   %}
5174 %}
5175 
5176 // Operands for expressing Control Flow
5177 // NOTE: Label is a predefined operand which should not be redefined in
5178 //       the AD file. It is generically handled within the ADLC.
5179 
5180 //----------Conditional Branch Operands----------------------------------------
5181 // Comparison Op  - This is the operation of the comparison, and is limited to
5182 //                  the following set of codes:
5183 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5184 //
5185 // Other attributes of the comparison, such as unsignedness, are specified
5186 // by the comparison instruction that sets a condition code flags register.
5187 // That result is represented by a flags operand whose subtype is appropriate
5188 // to the unsignedness (etc.) of the comparison.
5189 //
5190 // Later, the instruction which matches both the Comparison Op (a Bool) and
5191 // the flags (produced by the Cmp) specifies the coding of the comparison op
5192 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5193 
5194 // used for signed integral comparisons and fp comparisons
5195 
5196 operand cmpOp()
5197 %{
5198   match(Bool);
5199 
5200   format %{ "" %}
5201   interface(COND_INTER) %{
5202     equal(0x0, "eq");
5203     not_equal(0x1, "ne");
5204     less(0xb, "lt");
5205     greater_equal(0xa, "ge");
5206     less_equal(0xd, "le");
5207     greater(0xc, "gt");
5208     overflow(0x6, "vs");
5209     no_overflow(0x7, "vc");
5210   %}
5211 %}
5212 
5213 // used for unsigned integral comparisons
5214 
5215 operand cmpOpU()
5216 %{
5217   match(Bool);
5218 
5219   format %{ "" %}
5220   interface(COND_INTER) %{
5221     equal(0x0, "eq");
5222     not_equal(0x1, "ne");
5223     less(0x3, "lo");
5224     greater_equal(0x2, "hs");
5225     less_equal(0x9, "ls");
5226     greater(0x8, "hi");
5227     overflow(0x6, "vs");
5228     no_overflow(0x7, "vc");
5229   %}
5230 %}
5231 
5232 // used for certain integral comparisons which can be
5233 // converted to cbxx or tbxx instructions
5234 
5235 operand cmpOpEqNe()
5236 %{
5237   match(Bool);
5238   match(CmpOp);
5239   op_cost(0);
5240   predicate(n->as_Bool()->_test._test == BoolTest::ne
5241             || n->as_Bool()->_test._test == BoolTest::eq);
5242 
5243   format %{ "" %}
5244   interface(COND_INTER) %{
5245     equal(0x0, "eq");
5246     not_equal(0x1, "ne");
5247     less(0xb, "lt");
5248     greater_equal(0xa, "ge");
5249     less_equal(0xd, "le");
5250     greater(0xc, "gt");
5251     overflow(0x6, "vs");
5252     no_overflow(0x7, "vc");
5253   %}
5254 %}
5255 
5256 // used for certain integral comparisons which can be
5257 // converted to cbxx or tbxx instructions
5258 
5259 operand cmpOpLtGe()
5260 %{
5261   match(Bool);
5262   match(CmpOp);
5263   op_cost(0);
5264 
5265   predicate(n->as_Bool()->_test._test == BoolTest::lt
5266             || n->as_Bool()->_test._test == BoolTest::ge);
5267 
5268   format %{ "" %}
5269   interface(COND_INTER) %{
5270     equal(0x0, "eq");
5271     not_equal(0x1, "ne");
5272     less(0xb, "lt");
5273     greater_equal(0xa, "ge");
5274     less_equal(0xd, "le");
5275     greater(0xc, "gt");
5276     overflow(0x6, "vs");
5277     no_overflow(0x7, "vc");
5278   %}
5279 %}
5280 
5281 // used for certain unsigned integral comparisons which can be
5282 // converted to cbxx or tbxx instructions
5283 
5284 operand cmpOpUEqNeLtGe()
5285 %{
5286   match(Bool);
5287   match(CmpOp);
5288   op_cost(0);
5289 
5290   predicate(n->as_Bool()->_test._test == BoolTest::eq
5291             || n->as_Bool()->_test._test == BoolTest::ne
5292             || n->as_Bool()->_test._test == BoolTest::lt
5293             || n->as_Bool()->_test._test == BoolTest::ge);
5294 
5295   format %{ "" %}
5296   interface(COND_INTER) %{
5297     equal(0x0, "eq");
5298     not_equal(0x1, "ne");
5299     less(0xb, "lt");
5300     greater_equal(0xa, "ge");
5301     less_equal(0xd, "le");
5302     greater(0xc, "gt");
5303     overflow(0x6, "vs");
5304     no_overflow(0x7, "vc");
5305   %}
5306 %}
5307 
5308 // Special operand allowing long args to int ops to be truncated for free
5309 
5310 operand iRegL2I(iRegL reg) %{
5311 
5312   op_cost(0);
5313 
5314   match(ConvL2I reg);
5315 
5316   format %{ "l2i($reg)" %}
5317 
5318   interface(REG_INTER)
5319 %}
5320 
5321 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
5322 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
5323 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
5324 
5325 //----------OPERAND CLASSES----------------------------------------------------
5326 // Operand Classes are groups of operands that are used as to simplify
5327 // instruction definitions by not requiring the AD writer to specify
5328 // separate instructions for every form of operand when the
5329 // instruction accepts multiple operand types with the same basic
5330 // encoding and format. The classic case of this is memory operands.
5331 
5332 // memory is used to define read/write location for load/store
5333 // instruction defs. we can turn a memory op into an Address
5334 
5335 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
5336                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
5337 
5338 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5339 // operations. it allows the src to be either an iRegI or a (ConvL2I
5340 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5341 // can be elided because the 32-bit instruction will just employ the
5342 // lower 32 bits anyway.
5343 //
5344 // n.b. this does not elide all L2I conversions. if the truncated
5345 // value is consumed by more than one operation then the ConvL2I
5346 // cannot be bundled into the consuming nodes so an l2i gets planted
5347 // (actually a movw $dst $src) and the downstream instructions consume
5348 // the result of the l2i as an iRegI input. That's a shame since the
5349 // movw is actually redundant but its not too costly.
5350 
5351 opclass iRegIorL2I(iRegI, iRegL2I);
5352 
5353 //----------PIPELINE-----------------------------------------------------------
5354 // Rules which define the behavior of the target architectures pipeline.
5355 
5356 // For specific pipelines, eg A53, define the stages of that pipeline
5357 //pipe_desc(ISS, EX1, EX2, WR);
5358 #define ISS S0
5359 #define EX1 S1
5360 #define EX2 S2
5361 #define WR  S3
5362 
5363 // Integer ALU reg operation
5364 pipeline %{
5365 
5366 attributes %{
5367   // ARM instructions are of fixed length
5368   fixed_size_instructions;        // Fixed size instructions TODO does
5369   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5370   // ARM instructions come in 32-bit word units
5371   instruction_unit_size = 4;         // An instruction is 4 bytes long
5372   instruction_fetch_unit_size = 64;  // The processor fetches one line
5373   instruction_fetch_units = 1;       // of 64 bytes
5374 
5375   // List of nop instructions
5376   nops( MachNop );
5377 %}
5378 
5379 // We don't use an actual pipeline model so don't care about resources
5380 // or description. we do use pipeline classes to introduce fixed
5381 // latencies
5382 
5383 //----------RESOURCES----------------------------------------------------------
5384 // Resources are the functional units available to the machine
5385 
5386 resources( INS0, INS1, INS01 = INS0 | INS1,
5387            ALU0, ALU1, ALU = ALU0 | ALU1,
5388            MAC,
5389            DIV,
5390            BRANCH,
5391            LDST,
5392            NEON_FP);
5393 
5394 //----------PIPELINE DESCRIPTION-----------------------------------------------
5395 // Pipeline Description specifies the stages in the machine's pipeline
5396 
5397 // Define the pipeline as a generic 6 stage pipeline
5398 pipe_desc(S0, S1, S2, S3, S4, S5);
5399 
5400 //----------PIPELINE CLASSES---------------------------------------------------
5401 // Pipeline Classes describe the stages in which input and output are
5402 // referenced by the hardware pipeline.
5403 
5404 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
5405 %{
5406   single_instruction;
5407   src1   : S1(read);
5408   src2   : S2(read);
5409   dst    : S5(write);
5410   INS01  : ISS;
5411   NEON_FP : S5;
5412 %}
5413 
5414 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
5415 %{
5416   single_instruction;
5417   src1   : S1(read);
5418   src2   : S2(read);
5419   dst    : S5(write);
5420   INS01  : ISS;
5421   NEON_FP : S5;
5422 %}
5423 
5424 pipe_class fp_uop_s(vRegF dst, vRegF src)
5425 %{
5426   single_instruction;
5427   src    : S1(read);
5428   dst    : S5(write);
5429   INS01  : ISS;
5430   NEON_FP : S5;
5431 %}
5432 
5433 pipe_class fp_uop_d(vRegD dst, vRegD src)
5434 %{
5435   single_instruction;
5436   src    : S1(read);
5437   dst    : S5(write);
5438   INS01  : ISS;
5439   NEON_FP : S5;
5440 %}
5441 
5442 pipe_class fp_d2f(vRegF dst, vRegD src)
5443 %{
5444   single_instruction;
5445   src    : S1(read);
5446   dst    : S5(write);
5447   INS01  : ISS;
5448   NEON_FP : S5;
5449 %}
5450 
5451 pipe_class fp_f2d(vRegD dst, vRegF src)
5452 %{
5453   single_instruction;
5454   src    : S1(read);
5455   dst    : S5(write);
5456   INS01  : ISS;
5457   NEON_FP : S5;
5458 %}
5459 
5460 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
5461 %{
5462   single_instruction;
5463   src    : S1(read);
5464   dst    : S5(write);
5465   INS01  : ISS;
5466   NEON_FP : S5;
5467 %}
5468 
5469 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
5470 %{
5471   single_instruction;
5472   src    : S1(read);
5473   dst    : S5(write);
5474   INS01  : ISS;
5475   NEON_FP : S5;
5476 %}
5477 
5478 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
5479 %{
5480   single_instruction;
5481   src    : S1(read);
5482   dst    : S5(write);
5483   INS01  : ISS;
5484   NEON_FP : S5;
5485 %}
5486 
5487 pipe_class fp_l2f(vRegF dst, iRegL src)
5488 %{
5489   single_instruction;
5490   src    : S1(read);
5491   dst    : S5(write);
5492   INS01  : ISS;
5493   NEON_FP : S5;
5494 %}
5495 
5496 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
5497 %{
5498   single_instruction;
5499   src    : S1(read);
5500   dst    : S5(write);
5501   INS01  : ISS;
5502   NEON_FP : S5;
5503 %}
5504 
5505 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
5506 %{
5507   single_instruction;
5508   src    : S1(read);
5509   dst    : S5(write);
5510   INS01  : ISS;
5511   NEON_FP : S5;
5512 %}
5513 
5514 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
5515 %{
5516   single_instruction;
5517   src    : S1(read);
5518   dst    : S5(write);
5519   INS01  : ISS;
5520   NEON_FP : S5;
5521 %}
5522 
5523 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
5524 %{
5525   single_instruction;
5526   src    : S1(read);
5527   dst    : S5(write);
5528   INS01  : ISS;
5529   NEON_FP : S5;
5530 %}
5531 
5532 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
5533 %{
5534   single_instruction;
5535   src1   : S1(read);
5536   src2   : S2(read);
5537   dst    : S5(write);
5538   INS0   : ISS;
5539   NEON_FP : S5;
5540 %}
5541 
5542 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
5543 %{
5544   single_instruction;
5545   src1   : S1(read);
5546   src2   : S2(read);
5547   dst    : S5(write);
5548   INS0   : ISS;
5549   NEON_FP : S5;
5550 %}
5551 
5552 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
5553 %{
5554   single_instruction;
5555   cr     : S1(read);
5556   src1   : S1(read);
5557   src2   : S1(read);
5558   dst    : S3(write);
5559   INS01  : ISS;
5560   NEON_FP : S3;
5561 %}
5562 
5563 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
5564 %{
5565   single_instruction;
5566   cr     : S1(read);
5567   src1   : S1(read);
5568   src2   : S1(read);
5569   dst    : S3(write);
5570   INS01  : ISS;
5571   NEON_FP : S3;
5572 %}
5573 
5574 pipe_class fp_imm_s(vRegF dst)
5575 %{
5576   single_instruction;
5577   dst    : S3(write);
5578   INS01  : ISS;
5579   NEON_FP : S3;
5580 %}
5581 
5582 pipe_class fp_imm_d(vRegD dst)
5583 %{
5584   single_instruction;
5585   dst    : S3(write);
5586   INS01  : ISS;
5587   NEON_FP : S3;
5588 %}
5589 
5590 pipe_class fp_load_constant_s(vRegF dst)
5591 %{
5592   single_instruction;
5593   dst    : S4(write);
5594   INS01  : ISS;
5595   NEON_FP : S4;
5596 %}
5597 
5598 pipe_class fp_load_constant_d(vRegD dst)
5599 %{
5600   single_instruction;
5601   dst    : S4(write);
5602   INS01  : ISS;
5603   NEON_FP : S4;
5604 %}
5605 
5606 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
5607 %{
5608   single_instruction;
5609   dst    : S5(write);
5610   src1   : S1(read);
5611   src2   : S1(read);
5612   INS01  : ISS;
5613   NEON_FP : S5;
5614 %}
5615 
5616 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
5617 %{
5618   single_instruction;
5619   dst    : S5(write);
5620   src1   : S1(read);
5621   src2   : S1(read);
5622   INS0   : ISS;
5623   NEON_FP : S5;
5624 %}
5625 
5626 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
5627 %{
5628   single_instruction;
5629   dst    : S5(write);
5630   src1   : S1(read);
5631   src2   : S1(read);
5632   dst    : S1(read);
5633   INS01  : ISS;
5634   NEON_FP : S5;
5635 %}
5636 
5637 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
5638 %{
5639   single_instruction;
5640   dst    : S5(write);
5641   src1   : S1(read);
5642   src2   : S1(read);
5643   dst    : S1(read);
5644   INS0   : ISS;
5645   NEON_FP : S5;
5646 %}
5647 
5648 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
5649 %{
5650   single_instruction;
5651   dst    : S4(write);
5652   src1   : S2(read);
5653   src2   : S2(read);
5654   INS01  : ISS;
5655   NEON_FP : S4;
5656 %}
5657 
5658 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
5659 %{
5660   single_instruction;
5661   dst    : S4(write);
5662   src1   : S2(read);
5663   src2   : S2(read);
5664   INS0   : ISS;
5665   NEON_FP : S4;
5666 %}
5667 
5668 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
5669 %{
5670   single_instruction;
5671   dst    : S3(write);
5672   src1   : S2(read);
5673   src2   : S2(read);
5674   INS01  : ISS;
5675   NEON_FP : S3;
5676 %}
5677 
5678 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
5679 %{
5680   single_instruction;
5681   dst    : S3(write);
5682   src1   : S2(read);
5683   src2   : S2(read);
5684   INS0   : ISS;
5685   NEON_FP : S3;
5686 %}
5687 
5688 pipe_class vshift64(vecD dst, vecD src, vecX shift)
5689 %{
5690   single_instruction;
5691   dst    : S3(write);
5692   src    : S1(read);
5693   shift  : S1(read);
5694   INS01  : ISS;
5695   NEON_FP : S3;
5696 %}
5697 
5698 pipe_class vshift128(vecX dst, vecX src, vecX shift)
5699 %{
5700   single_instruction;
5701   dst    : S3(write);
5702   src    : S1(read);
5703   shift  : S1(read);
5704   INS0   : ISS;
5705   NEON_FP : S3;
5706 %}
5707 
5708 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
5709 %{
5710   single_instruction;
5711   dst    : S3(write);
5712   src    : S1(read);
5713   INS01  : ISS;
5714   NEON_FP : S3;
5715 %}
5716 
5717 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
5718 %{
5719   single_instruction;
5720   dst    : S3(write);
5721   src    : S1(read);
5722   INS0   : ISS;
5723   NEON_FP : S3;
5724 %}
5725 
5726 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
5727 %{
5728   single_instruction;
5729   dst    : S5(write);
5730   src1   : S1(read);
5731   src2   : S1(read);
5732   INS01  : ISS;
5733   NEON_FP : S5;
5734 %}
5735 
5736 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
5737 %{
5738   single_instruction;
5739   dst    : S5(write);
5740   src1   : S1(read);
5741   src2   : S1(read);
5742   INS0   : ISS;
5743   NEON_FP : S5;
5744 %}
5745 
5746 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
5747 %{
5748   single_instruction;
5749   dst    : S5(write);
5750   src1   : S1(read);
5751   src2   : S1(read);
5752   INS0   : ISS;
5753   NEON_FP : S5;
5754 %}
5755 
5756 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
5757 %{
5758   single_instruction;
5759   dst    : S5(write);
5760   src1   : S1(read);
5761   src2   : S1(read);
5762   INS0   : ISS;
5763   NEON_FP : S5;
5764 %}
5765 
5766 pipe_class vsqrt_fp128(vecX dst, vecX src)
5767 %{
5768   single_instruction;
5769   dst    : S5(write);
5770   src    : S1(read);
5771   INS0   : ISS;
5772   NEON_FP : S5;
5773 %}
5774 
5775 pipe_class vunop_fp64(vecD dst, vecD src)
5776 %{
5777   single_instruction;
5778   dst    : S5(write);
5779   src    : S1(read);
5780   INS01  : ISS;
5781   NEON_FP : S5;
5782 %}
5783 
5784 pipe_class vunop_fp128(vecX dst, vecX src)
5785 %{
5786   single_instruction;
5787   dst    : S5(write);
5788   src    : S1(read);
5789   INS0   : ISS;
5790   NEON_FP : S5;
5791 %}
5792 
5793 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
5794 %{
5795   single_instruction;
5796   dst    : S3(write);
5797   src    : S1(read);
5798   INS01  : ISS;
5799   NEON_FP : S3;
5800 %}
5801 
5802 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
5803 %{
5804   single_instruction;
5805   dst    : S3(write);
5806   src    : S1(read);
5807   INS01  : ISS;
5808   NEON_FP : S3;
5809 %}
5810 
5811 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
5812 %{
5813   single_instruction;
5814   dst    : S3(write);
5815   src    : S1(read);
5816   INS01  : ISS;
5817   NEON_FP : S3;
5818 %}
5819 
5820 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
5821 %{
5822   single_instruction;
5823   dst    : S3(write);
5824   src    : S1(read);
5825   INS01  : ISS;
5826   NEON_FP : S3;
5827 %}
5828 
5829 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
5830 %{
5831   single_instruction;
5832   dst    : S3(write);
5833   src    : S1(read);
5834   INS01  : ISS;
5835   NEON_FP : S3;
5836 %}
5837 
5838 pipe_class vmovi_reg_imm64(vecD dst)
5839 %{
5840   single_instruction;
5841   dst    : S3(write);
5842   INS01  : ISS;
5843   NEON_FP : S3;
5844 %}
5845 
5846 pipe_class vmovi_reg_imm128(vecX dst)
5847 %{
5848   single_instruction;
5849   dst    : S3(write);
5850   INS0   : ISS;
5851   NEON_FP : S3;
5852 %}
5853 
5854 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
5855 %{
5856   single_instruction;
5857   dst    : S5(write);
5858   mem    : ISS(read);
5859   INS01  : ISS;
5860   NEON_FP : S3;
5861 %}
5862 
5863 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
5864 %{
5865   single_instruction;
5866   dst    : S5(write);
5867   mem    : ISS(read);
5868   INS01  : ISS;
5869   NEON_FP : S3;
5870 %}
5871 
5872 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
5873 %{
5874   single_instruction;
5875   mem    : ISS(read);
5876   src    : S2(read);
5877   INS01  : ISS;
5878   NEON_FP : S3;
5879 %}
5880 
5881 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
5882 %{
5883   single_instruction;
5884   mem    : ISS(read);
5885   src    : S2(read);
5886   INS01  : ISS;
5887   NEON_FP : S3;
5888 %}
5889 
5890 //------- Integer ALU operations --------------------------
5891 
5892 // Integer ALU reg-reg operation
5893 // Operands needed in EX1, result generated in EX2
5894 // Eg.  ADD     x0, x1, x2
5895 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
5896 %{
5897   single_instruction;
5898   dst    : EX2(write);
5899   src1   : EX1(read);
5900   src2   : EX1(read);
5901   INS01  : ISS; // Dual issue as instruction 0 or 1
5902   ALU    : EX2;
5903 %}
5904 
5905 // Integer ALU reg-reg operation with constant shift
5906 // Shifted register must be available in LATE_ISS instead of EX1
5907 // Eg.  ADD     x0, x1, x2, LSL #2
5908 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
5909 %{
5910   single_instruction;
5911   dst    : EX2(write);
5912   src1   : EX1(read);
5913   src2   : ISS(read);
5914   INS01  : ISS;
5915   ALU    : EX2;
5916 %}
5917 
5918 // Integer ALU reg operation with constant shift
5919 // Eg.  LSL     x0, x1, #shift
5920 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
5921 %{
5922   single_instruction;
5923   dst    : EX2(write);
5924   src1   : ISS(read);
5925   INS01  : ISS;
5926   ALU    : EX2;
5927 %}
5928 
5929 // Integer ALU reg-reg operation with variable shift
5930 // Both operands must be available in LATE_ISS instead of EX1
5931 // Result is available in EX1 instead of EX2
5932 // Eg.  LSLV    x0, x1, x2
5933 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
5934 %{
5935   single_instruction;
5936   dst    : EX1(write);
5937   src1   : ISS(read);
5938   src2   : ISS(read);
5939   INS01  : ISS;
5940   ALU    : EX1;
5941 %}
5942 
5943 // Integer ALU reg-reg operation with extract
5944 // As for _vshift above, but result generated in EX2
5945 // Eg.  EXTR    x0, x1, x2, #N
5946 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
5947 %{
5948   single_instruction;
5949   dst    : EX2(write);
5950   src1   : ISS(read);
5951   src2   : ISS(read);
5952   INS1   : ISS; // Can only dual issue as Instruction 1
5953   ALU    : EX1;
5954 %}
5955 
5956 // Integer ALU reg operation
5957 // Eg.  NEG     x0, x1
5958 pipe_class ialu_reg(iRegI dst, iRegI src)
5959 %{
5960   single_instruction;
5961   dst    : EX2(write);
5962   src    : EX1(read);
5963   INS01  : ISS;
5964   ALU    : EX2;
5965 %}
5966 
5967 // Integer ALU reg mmediate operation
5968 // Eg.  ADD     x0, x1, #N
5969 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
5970 %{
5971   single_instruction;
5972   dst    : EX2(write);
5973   src1   : EX1(read);
5974   INS01  : ISS;
5975   ALU    : EX2;
5976 %}
5977 
5978 // Integer ALU immediate operation (no source operands)
5979 // Eg.  MOV     x0, #N
5980 pipe_class ialu_imm(iRegI dst)
5981 %{
5982   single_instruction;
5983   dst    : EX1(write);
5984   INS01  : ISS;
5985   ALU    : EX1;
5986 %}
5987 
5988 //------- Compare operation -------------------------------
5989 
5990 // Compare reg-reg
5991 // Eg.  CMP     x0, x1
5992 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
5993 %{
5994   single_instruction;
5995 //  fixed_latency(16);
5996   cr     : EX2(write);
5997   op1    : EX1(read);
5998   op2    : EX1(read);
5999   INS01  : ISS;
6000   ALU    : EX2;
6001 %}
6002 
6003 // Compare reg-reg
6004 // Eg.  CMP     x0, #N
6005 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6006 %{
6007   single_instruction;
6008 //  fixed_latency(16);
6009   cr     : EX2(write);
6010   op1    : EX1(read);
6011   INS01  : ISS;
6012   ALU    : EX2;
6013 %}
6014 
6015 //------- Conditional instructions ------------------------
6016 
6017 // Conditional no operands
6018 // Eg.  CSINC   x0, zr, zr, <cond>
6019 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6020 %{
6021   single_instruction;
6022   cr     : EX1(read);
6023   dst    : EX2(write);
6024   INS01  : ISS;
6025   ALU    : EX2;
6026 %}
6027 
6028 // Conditional 2 operand
6029 // EG.  CSEL    X0, X1, X2, <cond>
6030 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6031 %{
6032   single_instruction;
6033   cr     : EX1(read);
6034   src1   : EX1(read);
6035   src2   : EX1(read);
6036   dst    : EX2(write);
6037   INS01  : ISS;
6038   ALU    : EX2;
6039 %}
6040 
6041 // Conditional 2 operand
6042 // EG.  CSEL    X0, X1, X2, <cond>
6043 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6044 %{
6045   single_instruction;
6046   cr     : EX1(read);
6047   src    : EX1(read);
6048   dst    : EX2(write);
6049   INS01  : ISS;
6050   ALU    : EX2;
6051 %}
6052 
6053 //------- Multiply pipeline operations --------------------
6054 
6055 // Multiply reg-reg
6056 // Eg.  MUL     w0, w1, w2
6057 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6058 %{
6059   single_instruction;
6060   dst    : WR(write);
6061   src1   : ISS(read);
6062   src2   : ISS(read);
6063   INS01  : ISS;
6064   MAC    : WR;
6065 %}
6066 
6067 // Multiply accumulate
6068 // Eg.  MADD    w0, w1, w2, w3
6069 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6070 %{
6071   single_instruction;
6072   dst    : WR(write);
6073   src1   : ISS(read);
6074   src2   : ISS(read);
6075   src3   : ISS(read);
6076   INS01  : ISS;
6077   MAC    : WR;
6078 %}
6079 
6080 // Eg.  MUL     w0, w1, w2
6081 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6082 %{
6083   single_instruction;
6084   fixed_latency(3); // Maximum latency for 64 bit mul
6085   dst    : WR(write);
6086   src1   : ISS(read);
6087   src2   : ISS(read);
6088   INS01  : ISS;
6089   MAC    : WR;
6090 %}
6091 
6092 // Multiply accumulate
6093 // Eg.  MADD    w0, w1, w2, w3
6094 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6095 %{
6096   single_instruction;
6097   fixed_latency(3); // Maximum latency for 64 bit mul
6098   dst    : WR(write);
6099   src1   : ISS(read);
6100   src2   : ISS(read);
6101   src3   : ISS(read);
6102   INS01  : ISS;
6103   MAC    : WR;
6104 %}
6105 
6106 //------- Divide pipeline operations --------------------
6107 
6108 // Eg.  SDIV    w0, w1, w2
6109 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6110 %{
6111   single_instruction;
6112   fixed_latency(8); // Maximum latency for 32 bit divide
6113   dst    : WR(write);
6114   src1   : ISS(read);
6115   src2   : ISS(read);
6116   INS0   : ISS; // Can only dual issue as instruction 0
6117   DIV    : WR;
6118 %}
6119 
6120 // Eg.  SDIV    x0, x1, x2
6121 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6122 %{
6123   single_instruction;
6124   fixed_latency(16); // Maximum latency for 64 bit divide
6125   dst    : WR(write);
6126   src1   : ISS(read);
6127   src2   : ISS(read);
6128   INS0   : ISS; // Can only dual issue as instruction 0
6129   DIV    : WR;
6130 %}
6131 
6132 //------- Load pipeline operations ------------------------
6133 
6134 // Load - prefetch
6135 // Eg.  PFRM    <mem>
6136 pipe_class iload_prefetch(memory mem)
6137 %{
6138   single_instruction;
6139   mem    : ISS(read);
6140   INS01  : ISS;
6141   LDST   : WR;
6142 %}
6143 
6144 // Load - reg, mem
6145 // Eg.  LDR     x0, <mem>
6146 pipe_class iload_reg_mem(iRegI dst, memory mem)
6147 %{
6148   single_instruction;
6149   dst    : WR(write);
6150   mem    : ISS(read);
6151   INS01  : ISS;
6152   LDST   : WR;
6153 %}
6154 
6155 // Load - reg, reg
6156 // Eg.  LDR     x0, [sp, x1]
6157 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6158 %{
6159   single_instruction;
6160   dst    : WR(write);
6161   src    : ISS(read);
6162   INS01  : ISS;
6163   LDST   : WR;
6164 %}
6165 
6166 //------- Store pipeline operations -----------------------
6167 
6168 // Store - zr, mem
6169 // Eg.  STR     zr, <mem>
6170 pipe_class istore_mem(memory mem)
6171 %{
6172   single_instruction;
6173   mem    : ISS(read);
6174   INS01  : ISS;
6175   LDST   : WR;
6176 %}
6177 
6178 // Store - reg, mem
6179 // Eg.  STR     x0, <mem>
6180 pipe_class istore_reg_mem(iRegI src, memory mem)
6181 %{
6182   single_instruction;
6183   mem    : ISS(read);
6184   src    : EX2(read);
6185   INS01  : ISS;
6186   LDST   : WR;
6187 %}
6188 
6189 // Store - reg, reg
6190 // Eg. STR      x0, [sp, x1]
6191 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6192 %{
6193   single_instruction;
6194   dst    : ISS(read);
6195   src    : EX2(read);
6196   INS01  : ISS;
6197   LDST   : WR;
6198 %}
6199 
6200 //------- Store pipeline operations -----------------------
6201 
6202 // Branch
6203 pipe_class pipe_branch()
6204 %{
6205   single_instruction;
6206   INS01  : ISS;
6207   BRANCH : EX1;
6208 %}
6209 
6210 // Conditional branch
6211 pipe_class pipe_branch_cond(rFlagsReg cr)
6212 %{
6213   single_instruction;
6214   cr     : EX1(read);
6215   INS01  : ISS;
6216   BRANCH : EX1;
6217 %}
6218 
6219 // Compare & Branch
6220 // EG.  CBZ/CBNZ
6221 pipe_class pipe_cmp_branch(iRegI op1)
6222 %{
6223   single_instruction;
6224   op1    : EX1(read);
6225   INS01  : ISS;
6226   BRANCH : EX1;
6227 %}
6228 
6229 //------- Synchronisation operations ----------------------
6230 
6231 // Any operation requiring serialization.
6232 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6233 pipe_class pipe_serial()
6234 %{
6235   single_instruction;
6236   force_serialization;
6237   fixed_latency(16);
6238   INS01  : ISS(2); // Cannot dual issue with any other instruction
6239   LDST   : WR;
6240 %}
6241 
6242 // Generic big/slow expanded idiom - also serialized
6243 pipe_class pipe_slow()
6244 %{
6245   instruction_count(10);
6246   multiple_bundles;
6247   force_serialization;
6248   fixed_latency(16);
6249   INS01  : ISS(2); // Cannot dual issue with any other instruction
6250   LDST   : WR;
6251 %}
6252 
6253 // Empty pipeline class
6254 pipe_class pipe_class_empty()
6255 %{
6256   single_instruction;
6257   fixed_latency(0);
6258 %}
6259 
6260 // Default pipeline class.
6261 pipe_class pipe_class_default()
6262 %{
6263   single_instruction;
6264   fixed_latency(2);
6265 %}
6266 
6267 // Pipeline class for compares.
6268 pipe_class pipe_class_compare()
6269 %{
6270   single_instruction;
6271   fixed_latency(16);
6272 %}
6273 
6274 // Pipeline class for memory operations.
6275 pipe_class pipe_class_memory()
6276 %{
6277   single_instruction;
6278   fixed_latency(16);
6279 %}
6280 
6281 // Pipeline class for call.
6282 pipe_class pipe_class_call()
6283 %{
6284   single_instruction;
6285   fixed_latency(100);
6286 %}
6287 
6288 // Define the class for the Nop node.
6289 define %{
6290    MachNop = pipe_class_empty;
6291 %}
6292 
6293 %}
6294 //----------INSTRUCTIONS-------------------------------------------------------
6295 //
6296 // match      -- States which machine-independent subtree may be replaced
6297 //               by this instruction.
6298 // ins_cost   -- The estimated cost of this instruction is used by instruction
6299 //               selection to identify a minimum cost tree of machine
6300 //               instructions that matches a tree of machine-independent
6301 //               instructions.
6302 // format     -- A string providing the disassembly for this instruction.
6303 //               The value of an instruction's operand may be inserted
6304 //               by referring to it with a '$' prefix.
6305 // opcode     -- Three instruction opcodes may be provided.  These are referred
6306 //               to within an encode class as $primary, $secondary, and $tertiary
6307 //               rrspectively.  The primary opcode is commonly used to
6308 //               indicate the type of machine instruction, while secondary
6309 //               and tertiary are often used for prefix options or addressing
6310 //               modes.
6311 // ins_encode -- A list of encode classes with parameters. The encode class
6312 //               name must have been defined in an 'enc_class' specification
6313 //               in the encode section of the architecture description.
6314 
6315 // ============================================================================
6316 // Memory (Load/Store) Instructions
6317 
6318 // Load Instructions
6319 
6320 // Load Byte (8 bit signed)
6321 instruct loadB(iRegINoSp dst, memory mem)
6322 %{
6323   match(Set dst (LoadB mem));
6324   predicate(!needs_acquiring_load(n));
6325 
6326   ins_cost(4 * INSN_COST);
6327   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6328 
6329   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6330 
6331   ins_pipe(iload_reg_mem);
6332 %}
6333 
6334 // Load Byte (8 bit signed) into long
6335 instruct loadB2L(iRegLNoSp dst, memory mem)
6336 %{
6337   match(Set dst (ConvI2L (LoadB mem)));
6338   predicate(!needs_acquiring_load(n->in(1)));
6339 
6340   ins_cost(4 * INSN_COST);
6341   format %{ "ldrsb  $dst, $mem\t# byte" %}
6342 
6343   ins_encode(aarch64_enc_ldrsb(dst, mem));
6344 
6345   ins_pipe(iload_reg_mem);
6346 %}
6347 
6348 // Load Byte (8 bit unsigned)
6349 instruct loadUB(iRegINoSp dst, memory mem)
6350 %{
6351   match(Set dst (LoadUB mem));
6352   predicate(!needs_acquiring_load(n));
6353 
6354   ins_cost(4 * INSN_COST);
6355   format %{ "ldrbw  $dst, $mem\t# byte" %}
6356 
6357   ins_encode(aarch64_enc_ldrb(dst, mem));
6358 
6359   ins_pipe(iload_reg_mem);
6360 %}
6361 
6362 // Load Byte (8 bit unsigned) into long
6363 instruct loadUB2L(iRegLNoSp dst, memory mem)
6364 %{
6365   match(Set dst (ConvI2L (LoadUB mem)));
6366   predicate(!needs_acquiring_load(n->in(1)));
6367 
6368   ins_cost(4 * INSN_COST);
6369   format %{ "ldrb  $dst, $mem\t# byte" %}
6370 
6371   ins_encode(aarch64_enc_ldrb(dst, mem));
6372 
6373   ins_pipe(iload_reg_mem);
6374 %}
6375 
6376 // Load Short (16 bit signed)
6377 instruct loadS(iRegINoSp dst, memory mem)
6378 %{
6379   match(Set dst (LoadS mem));
6380   predicate(!needs_acquiring_load(n));
6381 
6382   ins_cost(4 * INSN_COST);
6383   format %{ "ldrshw  $dst, $mem\t# short" %}
6384 
6385   ins_encode(aarch64_enc_ldrshw(dst, mem));
6386 
6387   ins_pipe(iload_reg_mem);
6388 %}
6389 
6390 // Load Short (16 bit signed) into long
6391 instruct loadS2L(iRegLNoSp dst, memory mem)
6392 %{
6393   match(Set dst (ConvI2L (LoadS mem)));
6394   predicate(!needs_acquiring_load(n->in(1)));
6395 
6396   ins_cost(4 * INSN_COST);
6397   format %{ "ldrsh  $dst, $mem\t# short" %}
6398 
6399   ins_encode(aarch64_enc_ldrsh(dst, mem));
6400 
6401   ins_pipe(iload_reg_mem);
6402 %}
6403 
6404 // Load Char (16 bit unsigned)
6405 instruct loadUS(iRegINoSp dst, memory mem)
6406 %{
6407   match(Set dst (LoadUS mem));
6408   predicate(!needs_acquiring_load(n));
6409 
6410   ins_cost(4 * INSN_COST);
6411   format %{ "ldrh  $dst, $mem\t# short" %}
6412 
6413   ins_encode(aarch64_enc_ldrh(dst, mem));
6414 
6415   ins_pipe(iload_reg_mem);
6416 %}
6417 
6418 // Load Short/Char (16 bit unsigned) into long
6419 instruct loadUS2L(iRegLNoSp dst, memory mem)
6420 %{
6421   match(Set dst (ConvI2L (LoadUS mem)));
6422   predicate(!needs_acquiring_load(n->in(1)));
6423 
6424   ins_cost(4 * INSN_COST);
6425   format %{ "ldrh  $dst, $mem\t# short" %}
6426 
6427   ins_encode(aarch64_enc_ldrh(dst, mem));
6428 
6429   ins_pipe(iload_reg_mem);
6430 %}
6431 
6432 // Load Integer (32 bit signed)
6433 instruct loadI(iRegINoSp dst, memory mem)
6434 %{
6435   match(Set dst (LoadI mem));
6436   predicate(!needs_acquiring_load(n));
6437 
6438   ins_cost(4 * INSN_COST);
6439   format %{ "ldrw  $dst, $mem\t# int" %}
6440 
6441   ins_encode(aarch64_enc_ldrw(dst, mem));
6442 
6443   ins_pipe(iload_reg_mem);
6444 %}
6445 
6446 // Load Integer (32 bit signed) into long
6447 instruct loadI2L(iRegLNoSp dst, memory mem)
6448 %{
6449   match(Set dst (ConvI2L (LoadI mem)));
6450   predicate(!needs_acquiring_load(n->in(1)));
6451 
6452   ins_cost(4 * INSN_COST);
6453   format %{ "ldrsw  $dst, $mem\t# int" %}
6454 
6455   ins_encode(aarch64_enc_ldrsw(dst, mem));
6456 
6457   ins_pipe(iload_reg_mem);
6458 %}
6459 
6460 // Load Integer (32 bit unsigned) into long
6461 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6462 %{
6463   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6464   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6465 
6466   ins_cost(4 * INSN_COST);
6467   format %{ "ldrw  $dst, $mem\t# int" %}
6468 
6469   ins_encode(aarch64_enc_ldrw(dst, mem));
6470 
6471   ins_pipe(iload_reg_mem);
6472 %}
6473 
6474 // Load Long (64 bit signed)
6475 instruct loadL(iRegLNoSp dst, memory mem)
6476 %{
6477   match(Set dst (LoadL mem));
6478   predicate(!needs_acquiring_load(n));
6479 
6480   ins_cost(4 * INSN_COST);
6481   format %{ "ldr  $dst, $mem\t# int" %}
6482 
6483   ins_encode(aarch64_enc_ldr(dst, mem));
6484 
6485   ins_pipe(iload_reg_mem);
6486 %}
6487 
6488 // Load Range
6489 instruct loadRange(iRegINoSp dst, memory mem)
6490 %{
6491   match(Set dst (LoadRange mem));
6492 
6493   ins_cost(4 * INSN_COST);
6494   format %{ "ldrw  $dst, $mem\t# range" %}
6495 
6496   ins_encode(aarch64_enc_ldrw(dst, mem));
6497 
6498   ins_pipe(iload_reg_mem);
6499 %}
6500 
6501 // Load Pointer
6502 instruct loadP(iRegPNoSp dst, memory mem)
6503 %{
6504   match(Set dst (LoadP mem));
6505   predicate(!needs_acquiring_load(n));
6506 
6507   ins_cost(4 * INSN_COST);
6508   format %{ "ldr  $dst, $mem\t# ptr" %}
6509 
6510   ins_encode(aarch64_enc_ldr(dst, mem));
6511 
6512   ins_pipe(iload_reg_mem);
6513 %}
6514 
6515 // Load Compressed Pointer
6516 instruct loadN(iRegNNoSp dst, memory mem)
6517 %{
6518   match(Set dst (LoadN mem));
6519   predicate(!needs_acquiring_load(n));
6520 
6521   ins_cost(4 * INSN_COST);
6522   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6523 
6524   ins_encode(aarch64_enc_ldrw(dst, mem));
6525 
6526   ins_pipe(iload_reg_mem);
6527 %}
6528 
6529 // Load Klass Pointer
6530 instruct loadKlass(iRegPNoSp dst, memory mem)
6531 %{
6532   match(Set dst (LoadKlass mem));
6533   predicate(!needs_acquiring_load(n));
6534 
6535   ins_cost(4 * INSN_COST);
6536   format %{ "ldr  $dst, $mem\t# class" %}
6537 
6538   ins_encode(aarch64_enc_ldr(dst, mem));
6539 
6540   ins_pipe(iload_reg_mem);
6541 %}
6542 
6543 // Load Narrow Klass Pointer
6544 instruct loadNKlass(iRegNNoSp dst, memory mem)
6545 %{
6546   match(Set dst (LoadNKlass mem));
6547   predicate(!needs_acquiring_load(n));
6548 
6549   ins_cost(4 * INSN_COST);
6550   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6551 
6552   ins_encode(aarch64_enc_ldrw(dst, mem));
6553 
6554   ins_pipe(iload_reg_mem);
6555 %}
6556 
6557 // Load Float
6558 instruct loadF(vRegF dst, memory mem)
6559 %{
6560   match(Set dst (LoadF mem));
6561   predicate(!needs_acquiring_load(n));
6562 
6563   ins_cost(4 * INSN_COST);
6564   format %{ "ldrs  $dst, $mem\t# float" %}
6565 
6566   ins_encode( aarch64_enc_ldrs(dst, mem) );
6567 
6568   ins_pipe(pipe_class_memory);
6569 %}
6570 
6571 // Load Double
6572 instruct loadD(vRegD dst, memory mem)
6573 %{
6574   match(Set dst (LoadD mem));
6575   predicate(!needs_acquiring_load(n));
6576 
6577   ins_cost(4 * INSN_COST);
6578   format %{ "ldrd  $dst, $mem\t# double" %}
6579 
6580   ins_encode( aarch64_enc_ldrd(dst, mem) );
6581 
6582   ins_pipe(pipe_class_memory);
6583 %}
6584 
6585 
6586 // Load Int Constant
6587 instruct loadConI(iRegINoSp dst, immI src)
6588 %{
6589   match(Set dst src);
6590 
6591   ins_cost(INSN_COST);
6592   format %{ "mov $dst, $src\t# int" %}
6593 
6594   ins_encode( aarch64_enc_movw_imm(dst, src) );
6595 
6596   ins_pipe(ialu_imm);
6597 %}
6598 
6599 // Load Long Constant
6600 instruct loadConL(iRegLNoSp dst, immL src)
6601 %{
6602   match(Set dst src);
6603 
6604   ins_cost(INSN_COST);
6605   format %{ "mov $dst, $src\t# long" %}
6606 
6607   ins_encode( aarch64_enc_mov_imm(dst, src) );
6608 
6609   ins_pipe(ialu_imm);
6610 %}
6611 
6612 // Load Pointer Constant
6613 
6614 instruct loadConP(iRegPNoSp dst, immP con)
6615 %{
6616   match(Set dst con);
6617 
6618   ins_cost(INSN_COST * 4);
6619   format %{
6620     "mov  $dst, $con\t# ptr\n\t"
6621   %}
6622 
6623   ins_encode(aarch64_enc_mov_p(dst, con));
6624 
6625   ins_pipe(ialu_imm);
6626 %}
6627 
6628 // Load Null Pointer Constant
6629 
6630 instruct loadConP0(iRegPNoSp dst, immP0 con)
6631 %{
6632   match(Set dst con);
6633 
6634   ins_cost(INSN_COST);
6635   format %{ "mov  $dst, $con\t# NULL ptr" %}
6636 
6637   ins_encode(aarch64_enc_mov_p0(dst, con));
6638 
6639   ins_pipe(ialu_imm);
6640 %}
6641 
6642 // Load Pointer Constant One
6643 
6644 instruct loadConP1(iRegPNoSp dst, immP_1 con)
6645 %{
6646   match(Set dst con);
6647 
6648   ins_cost(INSN_COST);
6649   format %{ "mov  $dst, $con\t# NULL ptr" %}
6650 
6651   ins_encode(aarch64_enc_mov_p1(dst, con));
6652 
6653   ins_pipe(ialu_imm);
6654 %}
6655 
6656 // Load Poll Page Constant
6657 
6658 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
6659 %{
6660   match(Set dst con);
6661 
6662   ins_cost(INSN_COST);
6663   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
6664 
6665   ins_encode(aarch64_enc_mov_poll_page(dst, con));
6666 
6667   ins_pipe(ialu_imm);
6668 %}
6669 
6670 // Load Byte Map Base Constant
6671 
6672 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
6673 %{
6674   match(Set dst con);
6675 
6676   ins_cost(INSN_COST);
6677   format %{ "adr  $dst, $con\t# Byte Map Base" %}
6678 
6679   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
6680 
6681   ins_pipe(ialu_imm);
6682 %}
6683 
6684 // Load Narrow Pointer Constant
6685 
6686 instruct loadConN(iRegNNoSp dst, immN con)
6687 %{
6688   match(Set dst con);
6689 
6690   ins_cost(INSN_COST * 4);
6691   format %{ "mov  $dst, $con\t# compressed ptr" %}
6692 
6693   ins_encode(aarch64_enc_mov_n(dst, con));
6694 
6695   ins_pipe(ialu_imm);
6696 %}
6697 
6698 // Load Narrow Null Pointer Constant
6699 
6700 instruct loadConN0(iRegNNoSp dst, immN0 con)
6701 %{
6702   match(Set dst con);
6703 
6704   ins_cost(INSN_COST);
6705   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
6706 
6707   ins_encode(aarch64_enc_mov_n0(dst, con));
6708 
6709   ins_pipe(ialu_imm);
6710 %}
6711 
6712 // Load Narrow Klass Constant
6713 
6714 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
6715 %{
6716   match(Set dst con);
6717 
6718   ins_cost(INSN_COST);
6719   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
6720 
6721   ins_encode(aarch64_enc_mov_nk(dst, con));
6722 
6723   ins_pipe(ialu_imm);
6724 %}
6725 
6726 // Load Packed Float Constant
6727 
6728 instruct loadConF_packed(vRegF dst, immFPacked con) %{
6729   match(Set dst con);
6730   ins_cost(INSN_COST * 4);
6731   format %{ "fmovs  $dst, $con"%}
6732   ins_encode %{
6733     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
6734   %}
6735 
6736   ins_pipe(fp_imm_s);
6737 %}
6738 
6739 // Load Float Constant
6740 
6741 instruct loadConF(vRegF dst, immF con) %{
6742   match(Set dst con);
6743 
6744   ins_cost(INSN_COST * 4);
6745 
6746   format %{
6747     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6748   %}
6749 
6750   ins_encode %{
6751     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6752   %}
6753 
6754   ins_pipe(fp_load_constant_s);
6755 %}
6756 
6757 // Load Packed Double Constant
6758 
6759 instruct loadConD_packed(vRegD dst, immDPacked con) %{
6760   match(Set dst con);
6761   ins_cost(INSN_COST);
6762   format %{ "fmovd  $dst, $con"%}
6763   ins_encode %{
6764     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
6765   %}
6766 
6767   ins_pipe(fp_imm_d);
6768 %}
6769 
6770 // Load Double Constant
6771 
6772 instruct loadConD(vRegD dst, immD con) %{
6773   match(Set dst con);
6774 
6775   ins_cost(INSN_COST * 5);
6776   format %{
6777     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6778   %}
6779 
6780   ins_encode %{
6781     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
6782   %}
6783 
6784   ins_pipe(fp_load_constant_d);
6785 %}
6786 
6787 // Store Instructions
6788 
6789 // Store CMS card-mark Immediate
6790 instruct storeimmCM0(immI0 zero, memory mem)
6791 %{
6792   match(Set mem (StoreCM mem zero));
6793   predicate(unnecessary_storestore(n));
6794 
6795   ins_cost(INSN_COST);
6796   format %{ "storestore (elided)\n\t"
6797             "strb zr, $mem\t# byte" %}
6798 
6799   ins_encode(aarch64_enc_strb0(mem));
6800 
6801   ins_pipe(istore_mem);
6802 %}
6803 
6804 // Store CMS card-mark Immediate with intervening StoreStore
6805 // needed when using CMS with no conditional card marking
6806 instruct storeimmCM0_ordered(immI0 zero, memory mem)
6807 %{
6808   match(Set mem (StoreCM mem zero));
6809 
6810   ins_cost(INSN_COST * 2);
6811   format %{ "storestore\n\t"
6812             "dmb ishst"
6813             "\n\tstrb zr, $mem\t# byte" %}
6814 
6815   ins_encode(aarch64_enc_strb0_ordered(mem));
6816 
6817   ins_pipe(istore_mem);
6818 %}
6819 
6820 // Store Byte
6821 instruct storeB(iRegIorL2I src, memory mem)
6822 %{
6823   match(Set mem (StoreB mem src));
6824   predicate(!needs_releasing_store(n));
6825 
6826   ins_cost(INSN_COST);
6827   format %{ "strb  $src, $mem\t# byte" %}
6828 
6829   ins_encode(aarch64_enc_strb(src, mem));
6830 
6831   ins_pipe(istore_reg_mem);
6832 %}
6833 
6834 
6835 instruct storeimmB0(immI0 zero, memory mem)
6836 %{
6837   match(Set mem (StoreB mem zero));
6838   predicate(!needs_releasing_store(n));
6839 
6840   ins_cost(INSN_COST);
6841   format %{ "strb rscractch2, $mem\t# byte" %}
6842 
6843   ins_encode(aarch64_enc_strb0(mem));
6844 
6845   ins_pipe(istore_mem);
6846 %}
6847 
6848 // Store Char/Short
6849 instruct storeC(iRegIorL2I src, memory mem)
6850 %{
6851   match(Set mem (StoreC mem src));
6852   predicate(!needs_releasing_store(n));
6853 
6854   ins_cost(INSN_COST);
6855   format %{ "strh  $src, $mem\t# short" %}
6856 
6857   ins_encode(aarch64_enc_strh(src, mem));
6858 
6859   ins_pipe(istore_reg_mem);
6860 %}
6861 
6862 instruct storeimmC0(immI0 zero, memory mem)
6863 %{
6864   match(Set mem (StoreC mem zero));
6865   predicate(!needs_releasing_store(n));
6866 
6867   ins_cost(INSN_COST);
6868   format %{ "strh  zr, $mem\t# short" %}
6869 
6870   ins_encode(aarch64_enc_strh0(mem));
6871 
6872   ins_pipe(istore_mem);
6873 %}
6874 
6875 // Store Integer
6876 
6877 instruct storeI(iRegIorL2I src, memory mem)
6878 %{
6879   match(Set mem(StoreI mem src));
6880   predicate(!needs_releasing_store(n));
6881 
6882   ins_cost(INSN_COST);
6883   format %{ "strw  $src, $mem\t# int" %}
6884 
6885   ins_encode(aarch64_enc_strw(src, mem));
6886 
6887   ins_pipe(istore_reg_mem);
6888 %}
6889 
6890 instruct storeimmI0(immI0 zero, memory mem)
6891 %{
6892   match(Set mem(StoreI mem zero));
6893   predicate(!needs_releasing_store(n));
6894 
6895   ins_cost(INSN_COST);
6896   format %{ "strw  zr, $mem\t# int" %}
6897 
6898   ins_encode(aarch64_enc_strw0(mem));
6899 
6900   ins_pipe(istore_mem);
6901 %}
6902 
6903 // Store Long (64 bit signed)
6904 instruct storeL(iRegL src, memory mem)
6905 %{
6906   match(Set mem (StoreL mem src));
6907   predicate(!needs_releasing_store(n));
6908 
6909   ins_cost(INSN_COST);
6910   format %{ "str  $src, $mem\t# int" %}
6911 
6912   ins_encode(aarch64_enc_str(src, mem));
6913 
6914   ins_pipe(istore_reg_mem);
6915 %}
6916 
6917 // Store Long (64 bit signed)
6918 instruct storeimmL0(immL0 zero, memory mem)
6919 %{
6920   match(Set mem (StoreL mem zero));
6921   predicate(!needs_releasing_store(n));
6922 
6923   ins_cost(INSN_COST);
6924   format %{ "str  zr, $mem\t# int" %}
6925 
6926   ins_encode(aarch64_enc_str0(mem));
6927 
6928   ins_pipe(istore_mem);
6929 %}
6930 
6931 // Store Pointer
6932 instruct storeP(iRegP src, memory mem)
6933 %{
6934   match(Set mem (StoreP mem src));
6935   predicate(!needs_releasing_store(n));
6936 
6937   ins_cost(INSN_COST);
6938   format %{ "str  $src, $mem\t# ptr" %}
6939 
6940   ins_encode(aarch64_enc_str(src, mem));
6941 
6942   ins_pipe(istore_reg_mem);
6943 %}
6944 
6945 // Store Pointer
6946 instruct storeimmP0(immP0 zero, memory mem)
6947 %{
6948   match(Set mem (StoreP mem zero));
6949   predicate(!needs_releasing_store(n));
6950 
6951   ins_cost(INSN_COST);
6952   format %{ "str zr, $mem\t# ptr" %}
6953 
6954   ins_encode(aarch64_enc_str0(mem));
6955 
6956   ins_pipe(istore_mem);
6957 %}
6958 
6959 // Store Compressed Pointer
6960 instruct storeN(iRegN src, memory mem)
6961 %{
6962   match(Set mem (StoreN mem src));
6963   predicate(!needs_releasing_store(n));
6964 
6965   ins_cost(INSN_COST);
6966   format %{ "strw  $src, $mem\t# compressed ptr" %}
6967 
6968   ins_encode(aarch64_enc_strw(src, mem));
6969 
6970   ins_pipe(istore_reg_mem);
6971 %}
6972 
6973 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
6974 %{
6975   match(Set mem (StoreN mem zero));
6976   predicate(Universe::narrow_oop_base() == NULL &&
6977             Universe::narrow_klass_base() == NULL &&
6978             (!needs_releasing_store(n)));
6979 
6980   ins_cost(INSN_COST);
6981   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
6982 
6983   ins_encode(aarch64_enc_strw(heapbase, mem));
6984 
6985   ins_pipe(istore_reg_mem);
6986 %}
6987 
6988 // Store Float
6989 instruct storeF(vRegF src, memory mem)
6990 %{
6991   match(Set mem (StoreF mem src));
6992   predicate(!needs_releasing_store(n));
6993 
6994   ins_cost(INSN_COST);
6995   format %{ "strs  $src, $mem\t# float" %}
6996 
6997   ins_encode( aarch64_enc_strs(src, mem) );
6998 
6999   ins_pipe(pipe_class_memory);
7000 %}
7001 
7002 // TODO
7003 // implement storeImmF0 and storeFImmPacked
7004 
7005 // Store Double
7006 instruct storeD(vRegD src, memory mem)
7007 %{
7008   match(Set mem (StoreD mem src));
7009   predicate(!needs_releasing_store(n));
7010 
7011   ins_cost(INSN_COST);
7012   format %{ "strd  $src, $mem\t# double" %}
7013 
7014   ins_encode( aarch64_enc_strd(src, mem) );
7015 
7016   ins_pipe(pipe_class_memory);
7017 %}
7018 
7019 // Store Compressed Klass Pointer
7020 instruct storeNKlass(iRegN src, memory mem)
7021 %{
7022   predicate(!needs_releasing_store(n));
7023   match(Set mem (StoreNKlass mem src));
7024 
7025   ins_cost(INSN_COST);
7026   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7027 
7028   ins_encode(aarch64_enc_strw(src, mem));
7029 
7030   ins_pipe(istore_reg_mem);
7031 %}
7032 
7033 // TODO
7034 // implement storeImmD0 and storeDImmPacked
7035 
7036 // prefetch instructions
7037 // Must be safe to execute with invalid address (cannot fault).
7038 
7039 instruct prefetchalloc( memory mem ) %{
7040   match(PrefetchAllocation mem);
7041 
7042   ins_cost(INSN_COST);
7043   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7044 
7045   ins_encode( aarch64_enc_prefetchw(mem) );
7046 
7047   ins_pipe(iload_prefetch);
7048 %}
7049 
7050 //  ---------------- volatile loads and stores ----------------
7051 
7052 // Load Byte (8 bit signed)
7053 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7054 %{
7055   match(Set dst (LoadB mem));
7056 
7057   ins_cost(VOLATILE_REF_COST);
7058   format %{ "ldarsb  $dst, $mem\t# byte" %}
7059 
7060   ins_encode(aarch64_enc_ldarsb(dst, mem));
7061 
7062   ins_pipe(pipe_serial);
7063 %}
7064 
7065 // Load Byte (8 bit signed) into long
7066 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7067 %{
7068   match(Set dst (ConvI2L (LoadB mem)));
7069 
7070   ins_cost(VOLATILE_REF_COST);
7071   format %{ "ldarsb  $dst, $mem\t# byte" %}
7072 
7073   ins_encode(aarch64_enc_ldarsb(dst, mem));
7074 
7075   ins_pipe(pipe_serial);
7076 %}
7077 
7078 // Load Byte (8 bit unsigned)
7079 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7080 %{
7081   match(Set dst (LoadUB mem));
7082 
7083   ins_cost(VOLATILE_REF_COST);
7084   format %{ "ldarb  $dst, $mem\t# byte" %}
7085 
7086   ins_encode(aarch64_enc_ldarb(dst, mem));
7087 
7088   ins_pipe(pipe_serial);
7089 %}
7090 
7091 // Load Byte (8 bit unsigned) into long
7092 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7093 %{
7094   match(Set dst (ConvI2L (LoadUB mem)));
7095 
7096   ins_cost(VOLATILE_REF_COST);
7097   format %{ "ldarb  $dst, $mem\t# byte" %}
7098 
7099   ins_encode(aarch64_enc_ldarb(dst, mem));
7100 
7101   ins_pipe(pipe_serial);
7102 %}
7103 
7104 // Load Short (16 bit signed)
7105 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7106 %{
7107   match(Set dst (LoadS mem));
7108 
7109   ins_cost(VOLATILE_REF_COST);
7110   format %{ "ldarshw  $dst, $mem\t# short" %}
7111 
7112   ins_encode(aarch64_enc_ldarshw(dst, mem));
7113 
7114   ins_pipe(pipe_serial);
7115 %}
7116 
7117 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7118 %{
7119   match(Set dst (LoadUS mem));
7120 
7121   ins_cost(VOLATILE_REF_COST);
7122   format %{ "ldarhw  $dst, $mem\t# short" %}
7123 
7124   ins_encode(aarch64_enc_ldarhw(dst, mem));
7125 
7126   ins_pipe(pipe_serial);
7127 %}
7128 
7129 // Load Short/Char (16 bit unsigned) into long
7130 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7131 %{
7132   match(Set dst (ConvI2L (LoadUS mem)));
7133 
7134   ins_cost(VOLATILE_REF_COST);
7135   format %{ "ldarh  $dst, $mem\t# short" %}
7136 
7137   ins_encode(aarch64_enc_ldarh(dst, mem));
7138 
7139   ins_pipe(pipe_serial);
7140 %}
7141 
7142 // Load Short/Char (16 bit signed) into long
7143 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7144 %{
7145   match(Set dst (ConvI2L (LoadS mem)));
7146 
7147   ins_cost(VOLATILE_REF_COST);
7148   format %{ "ldarh  $dst, $mem\t# short" %}
7149 
7150   ins_encode(aarch64_enc_ldarsh(dst, mem));
7151 
7152   ins_pipe(pipe_serial);
7153 %}
7154 
7155 // Load Integer (32 bit signed)
7156 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7157 %{
7158   match(Set dst (LoadI mem));
7159 
7160   ins_cost(VOLATILE_REF_COST);
7161   format %{ "ldarw  $dst, $mem\t# int" %}
7162 
7163   ins_encode(aarch64_enc_ldarw(dst, mem));
7164 
7165   ins_pipe(pipe_serial);
7166 %}
7167 
7168 // Load Integer (32 bit unsigned) into long
7169 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7170 %{
7171   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7172 
7173   ins_cost(VOLATILE_REF_COST);
7174   format %{ "ldarw  $dst, $mem\t# int" %}
7175 
7176   ins_encode(aarch64_enc_ldarw(dst, mem));
7177 
7178   ins_pipe(pipe_serial);
7179 %}
7180 
7181 // Load Long (64 bit signed)
7182 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7183 %{
7184   match(Set dst (LoadL mem));
7185 
7186   ins_cost(VOLATILE_REF_COST);
7187   format %{ "ldar  $dst, $mem\t# int" %}
7188 
7189   ins_encode(aarch64_enc_ldar(dst, mem));
7190 
7191   ins_pipe(pipe_serial);
7192 %}
7193 
7194 // Load Pointer
7195 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7196 %{
7197   match(Set dst (LoadP mem));
7198 
7199   ins_cost(VOLATILE_REF_COST);
7200   format %{ "ldar  $dst, $mem\t# ptr" %}
7201 
7202   ins_encode(aarch64_enc_ldar(dst, mem));
7203 
7204   ins_pipe(pipe_serial);
7205 %}
7206 
7207 // Load Compressed Pointer
7208 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7209 %{
7210   match(Set dst (LoadN mem));
7211 
7212   ins_cost(VOLATILE_REF_COST);
7213   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7214 
7215   ins_encode(aarch64_enc_ldarw(dst, mem));
7216 
7217   ins_pipe(pipe_serial);
7218 %}
7219 
7220 // Load Float
7221 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7222 %{
7223   match(Set dst (LoadF mem));
7224 
7225   ins_cost(VOLATILE_REF_COST);
7226   format %{ "ldars  $dst, $mem\t# float" %}
7227 
7228   ins_encode( aarch64_enc_fldars(dst, mem) );
7229 
7230   ins_pipe(pipe_serial);
7231 %}
7232 
7233 // Load Double
7234 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7235 %{
7236   match(Set dst (LoadD mem));
7237 
7238   ins_cost(VOLATILE_REF_COST);
7239   format %{ "ldard  $dst, $mem\t# double" %}
7240 
7241   ins_encode( aarch64_enc_fldard(dst, mem) );
7242 
7243   ins_pipe(pipe_serial);
7244 %}
7245 
7246 // Store Byte
7247 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7248 %{
7249   match(Set mem (StoreB mem src));
7250 
7251   ins_cost(VOLATILE_REF_COST);
7252   format %{ "stlrb  $src, $mem\t# byte" %}
7253 
7254   ins_encode(aarch64_enc_stlrb(src, mem));
7255 
7256   ins_pipe(pipe_class_memory);
7257 %}
7258 
7259 // Store Char/Short
7260 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7261 %{
7262   match(Set mem (StoreC mem src));
7263 
7264   ins_cost(VOLATILE_REF_COST);
7265   format %{ "stlrh  $src, $mem\t# short" %}
7266 
7267   ins_encode(aarch64_enc_stlrh(src, mem));
7268 
7269   ins_pipe(pipe_class_memory);
7270 %}
7271 
7272 // Store Integer
7273 
7274 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7275 %{
7276   match(Set mem(StoreI mem src));
7277 
7278   ins_cost(VOLATILE_REF_COST);
7279   format %{ "stlrw  $src, $mem\t# int" %}
7280 
7281   ins_encode(aarch64_enc_stlrw(src, mem));
7282 
7283   ins_pipe(pipe_class_memory);
7284 %}
7285 
7286 // Store Long (64 bit signed)
7287 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7288 %{
7289   match(Set mem (StoreL mem src));
7290 
7291   ins_cost(VOLATILE_REF_COST);
7292   format %{ "stlr  $src, $mem\t# int" %}
7293 
7294   ins_encode(aarch64_enc_stlr(src, mem));
7295 
7296   ins_pipe(pipe_class_memory);
7297 %}
7298 
7299 // Store Pointer
7300 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7301 %{
7302   match(Set mem (StoreP mem src));
7303 
7304   ins_cost(VOLATILE_REF_COST);
7305   format %{ "stlr  $src, $mem\t# ptr" %}
7306 
7307   ins_encode(aarch64_enc_stlr(src, mem));
7308 
7309   ins_pipe(pipe_class_memory);
7310 %}
7311 
7312 // Store Compressed Pointer
7313 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7314 %{
7315   match(Set mem (StoreN mem src));
7316 
7317   ins_cost(VOLATILE_REF_COST);
7318   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7319 
7320   ins_encode(aarch64_enc_stlrw(src, mem));
7321 
7322   ins_pipe(pipe_class_memory);
7323 %}
7324 
7325 // Store Float
7326 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7327 %{
7328   match(Set mem (StoreF mem src));
7329 
7330   ins_cost(VOLATILE_REF_COST);
7331   format %{ "stlrs  $src, $mem\t# float" %}
7332 
7333   ins_encode( aarch64_enc_fstlrs(src, mem) );
7334 
7335   ins_pipe(pipe_class_memory);
7336 %}
7337 
7338 // TODO
7339 // implement storeImmF0 and storeFImmPacked
7340 
7341 // Store Double
7342 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7343 %{
7344   match(Set mem (StoreD mem src));
7345 
7346   ins_cost(VOLATILE_REF_COST);
7347   format %{ "stlrd  $src, $mem\t# double" %}
7348 
7349   ins_encode( aarch64_enc_fstlrd(src, mem) );
7350 
7351   ins_pipe(pipe_class_memory);
7352 %}
7353 
7354 //  ---------------- end of volatile loads and stores ----------------
7355 
7356 // ============================================================================
7357 // BSWAP Instructions
7358 
7359 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7360   match(Set dst (ReverseBytesI src));
7361 
7362   ins_cost(INSN_COST);
7363   format %{ "revw  $dst, $src" %}
7364 
7365   ins_encode %{
7366     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7367   %}
7368 
7369   ins_pipe(ialu_reg);
7370 %}
7371 
7372 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7373   match(Set dst (ReverseBytesL src));
7374 
7375   ins_cost(INSN_COST);
7376   format %{ "rev  $dst, $src" %}
7377 
7378   ins_encode %{
7379     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7380   %}
7381 
7382   ins_pipe(ialu_reg);
7383 %}
7384 
7385 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7386   match(Set dst (ReverseBytesUS src));
7387 
7388   ins_cost(INSN_COST);
7389   format %{ "rev16w  $dst, $src" %}
7390 
7391   ins_encode %{
7392     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7393   %}
7394 
7395   ins_pipe(ialu_reg);
7396 %}
7397 
7398 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7399   match(Set dst (ReverseBytesS src));
7400 
7401   ins_cost(INSN_COST);
7402   format %{ "rev16w  $dst, $src\n\t"
7403             "sbfmw $dst, $dst, #0, #15" %}
7404 
7405   ins_encode %{
7406     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7407     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7408   %}
7409 
7410   ins_pipe(ialu_reg);
7411 %}
7412 
7413 // ============================================================================
7414 // Zero Count Instructions
7415 
7416 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7417   match(Set dst (CountLeadingZerosI src));
7418 
7419   ins_cost(INSN_COST);
7420   format %{ "clzw  $dst, $src" %}
7421   ins_encode %{
7422     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7423   %}
7424 
7425   ins_pipe(ialu_reg);
7426 %}
7427 
7428 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7429   match(Set dst (CountLeadingZerosL src));
7430 
7431   ins_cost(INSN_COST);
7432   format %{ "clz   $dst, $src" %}
7433   ins_encode %{
7434     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7435   %}
7436 
7437   ins_pipe(ialu_reg);
7438 %}
7439 
7440 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7441   match(Set dst (CountTrailingZerosI src));
7442 
7443   ins_cost(INSN_COST * 2);
7444   format %{ "rbitw  $dst, $src\n\t"
7445             "clzw   $dst, $dst" %}
7446   ins_encode %{
7447     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7448     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7449   %}
7450 
7451   ins_pipe(ialu_reg);
7452 %}
7453 
7454 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7455   match(Set dst (CountTrailingZerosL src));
7456 
7457   ins_cost(INSN_COST * 2);
7458   format %{ "rbit   $dst, $src\n\t"
7459             "clz    $dst, $dst" %}
7460   ins_encode %{
7461     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7462     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7463   %}
7464 
7465   ins_pipe(ialu_reg);
7466 %}
7467 
7468 //---------- Population Count Instructions -------------------------------------
7469 //
7470 
7471 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
7472   predicate(UsePopCountInstruction);
7473   match(Set dst (PopCountI src));
7474   effect(TEMP tmp);
7475   ins_cost(INSN_COST * 13);
7476 
7477   format %{ "movw   $src, $src\n\t"
7478             "mov    $tmp, $src\t# vector (1D)\n\t"
7479             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7480             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7481             "mov    $dst, $tmp\t# vector (1D)" %}
7482   ins_encode %{
7483     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
7484     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7485     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7486     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7487     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7488   %}
7489 
7490   ins_pipe(pipe_class_default);
7491 %}
7492 
7493 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
7494   predicate(UsePopCountInstruction);
7495   match(Set dst (PopCountI (LoadI mem)));
7496   effect(TEMP tmp);
7497   ins_cost(INSN_COST * 13);
7498 
7499   format %{ "ldrs   $tmp, $mem\n\t"
7500             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7501             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7502             "mov    $dst, $tmp\t# vector (1D)" %}
7503   ins_encode %{
7504     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7505     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
7506                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7507     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7508     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7509     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7510   %}
7511 
7512   ins_pipe(pipe_class_default);
7513 %}
7514 
7515 // Note: Long.bitCount(long) returns an int.
7516 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
7517   predicate(UsePopCountInstruction);
7518   match(Set dst (PopCountL src));
7519   effect(TEMP tmp);
7520   ins_cost(INSN_COST * 13);
7521 
7522   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
7523             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7524             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7525             "mov    $dst, $tmp\t# vector (1D)" %}
7526   ins_encode %{
7527     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7528     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7529     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7530     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7531   %}
7532 
7533   ins_pipe(pipe_class_default);
7534 %}
7535 
7536 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
7537   predicate(UsePopCountInstruction);
7538   match(Set dst (PopCountL (LoadL mem)));
7539   effect(TEMP tmp);
7540   ins_cost(INSN_COST * 13);
7541 
7542   format %{ "ldrd   $tmp, $mem\n\t"
7543             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7544             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7545             "mov    $dst, $tmp\t# vector (1D)" %}
7546   ins_encode %{
7547     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7548     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
7549                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7550     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7551     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7552     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7553   %}
7554 
7555   ins_pipe(pipe_class_default);
7556 %}
7557 
7558 // ============================================================================
7559 // MemBar Instruction
7560 
7561 instruct load_fence() %{
7562   match(LoadFence);
7563   ins_cost(VOLATILE_REF_COST);
7564 
7565   format %{ "load_fence" %}
7566 
7567   ins_encode %{
7568     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7569   %}
7570   ins_pipe(pipe_serial);
7571 %}
7572 
7573 instruct unnecessary_membar_acquire() %{
7574   predicate(unnecessary_acquire(n));
7575   match(MemBarAcquire);
7576   ins_cost(0);
7577 
7578   format %{ "membar_acquire (elided)" %}
7579 
7580   ins_encode %{
7581     __ block_comment("membar_acquire (elided)");
7582   %}
7583 
7584   ins_pipe(pipe_class_empty);
7585 %}
7586 
7587 instruct membar_acquire() %{
7588   match(MemBarAcquire);
7589   ins_cost(VOLATILE_REF_COST);
7590 
7591   format %{ "membar_acquire\n\t"
7592             "dmb ish" %}
7593 
7594   ins_encode %{
7595     __ block_comment("membar_acquire");
7596     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7597   %}
7598 
7599   ins_pipe(pipe_serial);
7600 %}
7601 
7602 
7603 instruct membar_acquire_lock() %{
7604   match(MemBarAcquireLock);
7605   ins_cost(VOLATILE_REF_COST);
7606 
7607   format %{ "membar_acquire_lock (elided)" %}
7608 
7609   ins_encode %{
7610     __ block_comment("membar_acquire_lock (elided)");
7611   %}
7612 
7613   ins_pipe(pipe_serial);
7614 %}
7615 
7616 instruct store_fence() %{
7617   match(StoreFence);
7618   ins_cost(VOLATILE_REF_COST);
7619 
7620   format %{ "store_fence" %}
7621 
7622   ins_encode %{
7623     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7624   %}
7625   ins_pipe(pipe_serial);
7626 %}
7627 
7628 instruct unnecessary_membar_release() %{
7629   predicate(unnecessary_release(n));
7630   match(MemBarRelease);
7631   ins_cost(0);
7632 
7633   format %{ "membar_release (elided)" %}
7634 
7635   ins_encode %{
7636     __ block_comment("membar_release (elided)");
7637   %}
7638   ins_pipe(pipe_serial);
7639 %}
7640 
7641 instruct membar_release() %{
7642   match(MemBarRelease);
7643   ins_cost(VOLATILE_REF_COST);
7644 
7645   format %{ "membar_release\n\t"
7646             "dmb ish" %}
7647 
7648   ins_encode %{
7649     __ block_comment("membar_release");
7650     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7651   %}
7652   ins_pipe(pipe_serial);
7653 %}
7654 
7655 instruct membar_storestore() %{
7656   match(MemBarStoreStore);
7657   ins_cost(VOLATILE_REF_COST);
7658 
7659   format %{ "MEMBAR-store-store" %}
7660 
7661   ins_encode %{
7662     __ membar(Assembler::StoreStore);
7663   %}
7664   ins_pipe(pipe_serial);
7665 %}
7666 
7667 instruct membar_release_lock() %{
7668   match(MemBarReleaseLock);
7669   ins_cost(VOLATILE_REF_COST);
7670 
7671   format %{ "membar_release_lock (elided)" %}
7672 
7673   ins_encode %{
7674     __ block_comment("membar_release_lock (elided)");
7675   %}
7676 
7677   ins_pipe(pipe_serial);
7678 %}
7679 
7680 instruct unnecessary_membar_volatile() %{
7681   predicate(unnecessary_volatile(n));
7682   match(MemBarVolatile);
7683   ins_cost(0);
7684 
7685   format %{ "membar_volatile (elided)" %}
7686 
7687   ins_encode %{
7688     __ block_comment("membar_volatile (elided)");
7689   %}
7690 
7691   ins_pipe(pipe_serial);
7692 %}
7693 
7694 instruct membar_volatile() %{
7695   match(MemBarVolatile);
7696   ins_cost(VOLATILE_REF_COST*100);
7697 
7698   format %{ "membar_volatile\n\t"
7699              "dmb ish"%}
7700 
7701   ins_encode %{
7702     __ block_comment("membar_volatile");
7703     __ membar(Assembler::StoreLoad);
7704   %}
7705 
7706   ins_pipe(pipe_serial);
7707 %}
7708 
7709 // ============================================================================
7710 // Cast/Convert Instructions
7711 
7712 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7713   match(Set dst (CastX2P src));
7714 
7715   ins_cost(INSN_COST);
7716   format %{ "mov $dst, $src\t# long -> ptr" %}
7717 
7718   ins_encode %{
7719     if ($dst$$reg != $src$$reg) {
7720       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7721     }
7722   %}
7723 
7724   ins_pipe(ialu_reg);
7725 %}
7726 
7727 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7728   match(Set dst (CastP2X src));
7729 
7730   ins_cost(INSN_COST);
7731   format %{ "mov $dst, $src\t# ptr -> long" %}
7732 
7733   ins_encode %{
7734     if ($dst$$reg != $src$$reg) {
7735       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7736     }
7737   %}
7738 
7739   ins_pipe(ialu_reg);
7740 %}
7741 
7742 // Convert oop into int for vectors alignment masking
7743 instruct convP2I(iRegINoSp dst, iRegP src) %{
7744   match(Set dst (ConvL2I (CastP2X src)));
7745 
7746   ins_cost(INSN_COST);
7747   format %{ "movw $dst, $src\t# ptr -> int" %}
7748   ins_encode %{
7749     __ movw($dst$$Register, $src$$Register);
7750   %}
7751 
7752   ins_pipe(ialu_reg);
7753 %}
7754 
7755 // Convert compressed oop into int for vectors alignment masking
7756 // in case of 32bit oops (heap < 4Gb).
7757 instruct convN2I(iRegINoSp dst, iRegN src)
7758 %{
7759   predicate(Universe::narrow_oop_shift() == 0);
7760   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7761 
7762   ins_cost(INSN_COST);
7763   format %{ "mov dst, $src\t# compressed ptr -> int" %}
7764   ins_encode %{
7765     __ movw($dst$$Register, $src$$Register);
7766   %}
7767 
7768   ins_pipe(ialu_reg);
7769 %}
7770 
7771 
7772 // Convert oop pointer into compressed form
7773 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7774   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7775   match(Set dst (EncodeP src));
7776   effect(KILL cr);
7777   ins_cost(INSN_COST * 3);
7778   format %{ "encode_heap_oop $dst, $src" %}
7779   ins_encode %{
7780     Register s = $src$$Register;
7781     Register d = $dst$$Register;
7782     __ encode_heap_oop(d, s);
7783   %}
7784   ins_pipe(ialu_reg);
7785 %}
7786 
7787 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7788   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7789   match(Set dst (EncodeP src));
7790   ins_cost(INSN_COST * 3);
7791   format %{ "encode_heap_oop_not_null $dst, $src" %}
7792   ins_encode %{
7793     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7794   %}
7795   ins_pipe(ialu_reg);
7796 %}
7797 
7798 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7799   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
7800             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
7801   match(Set dst (DecodeN src));
7802   ins_cost(INSN_COST * 3);
7803   format %{ "decode_heap_oop $dst, $src" %}
7804   ins_encode %{
7805     Register s = $src$$Register;
7806     Register d = $dst$$Register;
7807     __ decode_heap_oop(d, s);
7808   %}
7809   ins_pipe(ialu_reg);
7810 %}
7811 
7812 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7813   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
7814             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
7815   match(Set dst (DecodeN src));
7816   ins_cost(INSN_COST * 3);
7817   format %{ "decode_heap_oop_not_null $dst, $src" %}
7818   ins_encode %{
7819     Register s = $src$$Register;
7820     Register d = $dst$$Register;
7821     __ decode_heap_oop_not_null(d, s);
7822   %}
7823   ins_pipe(ialu_reg);
7824 %}
7825 
7826 // n.b. AArch64 implementations of encode_klass_not_null and
7827 // decode_klass_not_null do not modify the flags register so, unlike
7828 // Intel, we don't kill CR as a side effect here
7829 
7830 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
7831   match(Set dst (EncodePKlass src));
7832 
7833   ins_cost(INSN_COST * 3);
7834   format %{ "encode_klass_not_null $dst,$src" %}
7835 
7836   ins_encode %{
7837     Register src_reg = as_Register($src$$reg);
7838     Register dst_reg = as_Register($dst$$reg);
7839     __ encode_klass_not_null(dst_reg, src_reg);
7840   %}
7841 
7842    ins_pipe(ialu_reg);
7843 %}
7844 
7845 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
7846   match(Set dst (DecodeNKlass src));
7847 
7848   ins_cost(INSN_COST * 3);
7849   format %{ "decode_klass_not_null $dst,$src" %}
7850 
7851   ins_encode %{
7852     Register src_reg = as_Register($src$$reg);
7853     Register dst_reg = as_Register($dst$$reg);
7854     if (dst_reg != src_reg) {
7855       __ decode_klass_not_null(dst_reg, src_reg);
7856     } else {
7857       __ decode_klass_not_null(dst_reg);
7858     }
7859   %}
7860 
7861    ins_pipe(ialu_reg);
7862 %}
7863 
7864 instruct checkCastPP(iRegPNoSp dst)
7865 %{
7866   match(Set dst (CheckCastPP dst));
7867 
7868   size(0);
7869   format %{ "# checkcastPP of $dst" %}
7870   ins_encode(/* empty encoding */);
7871   ins_pipe(pipe_class_empty);
7872 %}
7873 
7874 instruct castPP(iRegPNoSp dst)
7875 %{
7876   match(Set dst (CastPP dst));
7877 
7878   size(0);
7879   format %{ "# castPP of $dst" %}
7880   ins_encode(/* empty encoding */);
7881   ins_pipe(pipe_class_empty);
7882 %}
7883 
7884 instruct castII(iRegI dst)
7885 %{
7886   match(Set dst (CastII dst));
7887 
7888   size(0);
7889   format %{ "# castII of $dst" %}
7890   ins_encode(/* empty encoding */);
7891   ins_cost(0);
7892   ins_pipe(pipe_class_empty);
7893 %}
7894 
7895 // ============================================================================
7896 // Atomic operation instructions
7897 //
7898 // Intel and SPARC both implement Ideal Node LoadPLocked and
7899 // Store{PIL}Conditional instructions using a normal load for the
7900 // LoadPLocked and a CAS for the Store{PIL}Conditional.
7901 //
7902 // The ideal code appears only to use LoadPLocked/StorePLocked as a
7903 // pair to lock object allocations from Eden space when not using
7904 // TLABs.
7905 //
7906 // There does not appear to be a Load{IL}Locked Ideal Node and the
7907 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
7908 // and to use StoreIConditional only for 32-bit and StoreLConditional
7909 // only for 64-bit.
7910 //
7911 // We implement LoadPLocked and StorePLocked instructions using,
7912 // respectively the AArch64 hw load-exclusive and store-conditional
7913 // instructions. Whereas we must implement each of
7914 // Store{IL}Conditional using a CAS which employs a pair of
7915 // instructions comprising a load-exclusive followed by a
7916 // store-conditional.
7917 
7918 
7919 // Locked-load (linked load) of the current heap-top
7920 // used when updating the eden heap top
7921 // implemented using ldaxr on AArch64
7922 
7923 instruct loadPLocked(iRegPNoSp dst, indirect mem)
7924 %{
7925   match(Set dst (LoadPLocked mem));
7926 
7927   ins_cost(VOLATILE_REF_COST);
7928 
7929   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
7930 
7931   ins_encode(aarch64_enc_ldaxr(dst, mem));
7932 
7933   ins_pipe(pipe_serial);
7934 %}
7935 
7936 // Conditional-store of the updated heap-top.
7937 // Used during allocation of the shared heap.
7938 // Sets flag (EQ) on success.
7939 // implemented using stlxr on AArch64.
7940 
7941 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
7942 %{
7943   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7944 
7945   ins_cost(VOLATILE_REF_COST);
7946 
7947  // TODO
7948  // do we need to do a store-conditional release or can we just use a
7949  // plain store-conditional?
7950 
7951   format %{
7952     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
7953     "cmpw rscratch1, zr\t# EQ on successful write"
7954   %}
7955 
7956   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
7957 
7958   ins_pipe(pipe_serial);
7959 %}
7960 
7961 
7962 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
7963 // when attempting to rebias a lock towards the current thread.  We
7964 // must use the acquire form of cmpxchg in order to guarantee acquire
7965 // semantics in this case.
7966 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
7967 %{
7968   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7969 
7970   ins_cost(VOLATILE_REF_COST);
7971 
7972   format %{
7973     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
7974     "cmpw rscratch1, zr\t# EQ on successful write"
7975   %}
7976 
7977   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
7978 
7979   ins_pipe(pipe_slow);
7980 %}
7981 
7982 // storeIConditional also has acquire semantics, for no better reason
7983 // than matching storeLConditional.  At the time of writing this
7984 // comment storeIConditional was not used anywhere by AArch64.
7985 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
7986 %{
7987   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7988 
7989   ins_cost(VOLATILE_REF_COST);
7990 
7991   format %{
7992     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
7993     "cmpw rscratch1, zr\t# EQ on successful write"
7994   %}
7995 
7996   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
7997 
7998   ins_pipe(pipe_slow);
7999 %}
8000 
8001 // standard CompareAndSwapX when we are using barriers
8002 // these have higher priority than the rules selected by a predicate
8003 
8004 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8005 // can't match them
8006 
8007 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8008 
8009   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
8010   ins_cost(2 * VOLATILE_REF_COST);
8011 
8012   effect(KILL cr);
8013 
8014   format %{
8015     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8016     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8017   %}
8018 
8019   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
8020             aarch64_enc_cset_eq(res));
8021 
8022   ins_pipe(pipe_slow);
8023 %}
8024 
8025 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8026 
8027   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
8028   ins_cost(2 * VOLATILE_REF_COST);
8029 
8030   effect(KILL cr);
8031 
8032   format %{
8033     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8034     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8035   %}
8036 
8037   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
8038             aarch64_enc_cset_eq(res));
8039 
8040   ins_pipe(pipe_slow);
8041 %}
8042 
8043 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8044 
8045   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8046   ins_cost(2 * VOLATILE_REF_COST);
8047 
8048   effect(KILL cr);
8049 
8050  format %{
8051     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8052     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8053  %}
8054 
8055  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8056             aarch64_enc_cset_eq(res));
8057 
8058   ins_pipe(pipe_slow);
8059 %}
8060 
8061 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8062 
8063   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8064   ins_cost(2 * VOLATILE_REF_COST);
8065 
8066   effect(KILL cr);
8067 
8068  format %{
8069     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8070     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8071  %}
8072 
8073  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8074             aarch64_enc_cset_eq(res));
8075 
8076   ins_pipe(pipe_slow);
8077 %}
8078 
8079 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8080 
8081   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8082   ins_cost(2 * VOLATILE_REF_COST);
8083 
8084   effect(KILL cr);
8085 
8086  format %{
8087     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8088     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8089  %}
8090 
8091  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8092             aarch64_enc_cset_eq(res));
8093 
8094   ins_pipe(pipe_slow);
8095 %}
8096 
8097 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8098 
8099   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8100   ins_cost(2 * VOLATILE_REF_COST);
8101 
8102   effect(KILL cr);
8103 
8104  format %{
8105     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8106     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8107  %}
8108 
8109  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8110             aarch64_enc_cset_eq(res));
8111 
8112   ins_pipe(pipe_slow);
8113 %}
8114 
8115 // alternative CompareAndSwapX when we are eliding barriers
8116 
8117 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8118 
8119   predicate(needs_acquiring_load_exclusive(n));
8120   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8121   ins_cost(VOLATILE_REF_COST);
8122 
8123   effect(KILL cr);
8124 
8125  format %{
8126     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8127     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8128  %}
8129 
8130  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8131             aarch64_enc_cset_eq(res));
8132 
8133   ins_pipe(pipe_slow);
8134 %}
8135 
8136 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8137 
8138   predicate(needs_acquiring_load_exclusive(n));
8139   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8140   ins_cost(VOLATILE_REF_COST);
8141 
8142   effect(KILL cr);
8143 
8144  format %{
8145     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8146     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8147  %}
8148 
8149  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8150             aarch64_enc_cset_eq(res));
8151 
8152   ins_pipe(pipe_slow);
8153 %}
8154 
8155 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8156 
8157   predicate(needs_acquiring_load_exclusive(n));
8158   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8159   ins_cost(VOLATILE_REF_COST);
8160 
8161   effect(KILL cr);
8162 
8163  format %{
8164     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8165     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8166  %}
8167 
8168  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8169             aarch64_enc_cset_eq(res));
8170 
8171   ins_pipe(pipe_slow);
8172 %}
8173 
8174 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8175 
8176   predicate(needs_acquiring_load_exclusive(n));
8177   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8178   ins_cost(VOLATILE_REF_COST);
8179 
8180   effect(KILL cr);
8181 
8182  format %{
8183     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8184     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8185  %}
8186 
8187  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8188             aarch64_enc_cset_eq(res));
8189 
8190   ins_pipe(pipe_slow);
8191 %}
8192 
8193 
8194 // ---------------------------------------------------------------------
8195 
8196 
8197 // BEGIN This section of the file is automatically generated. Do not edit --------------
8198 
8199 // Sundry CAS operations.  Note that release is always true,
8200 // regardless of the memory ordering of the CAS.  This is because we
8201 // need the volatile case to be sequentially consistent but there is
8202 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
8203 // can't check the type of memory ordering here, so we always emit a
8204 // STLXR.
8205 
8206 // This section is generated from aarch64_ad_cas.m4
8207 
8208 
8209 
8210 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8211   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
8212   ins_cost(2 * VOLATILE_REF_COST);
8213   effect(TEMP_DEF res, KILL cr);
8214   format %{
8215     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8216   %}
8217   ins_encode %{
8218     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8219                Assembler::byte, /*acquire*/ false, /*release*/ true,
8220                /*weak*/ false, $res$$Register);
8221     __ sxtbw($res$$Register, $res$$Register);
8222   %}
8223   ins_pipe(pipe_slow);
8224 %}
8225 
8226 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8227   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
8228   ins_cost(2 * VOLATILE_REF_COST);
8229   effect(TEMP_DEF res, KILL cr);
8230   format %{
8231     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8232   %}
8233   ins_encode %{
8234     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8235                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8236                /*weak*/ false, $res$$Register);
8237     __ sxthw($res$$Register, $res$$Register);
8238   %}
8239   ins_pipe(pipe_slow);
8240 %}
8241 
8242 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8243   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
8244   ins_cost(2 * VOLATILE_REF_COST);
8245   effect(TEMP_DEF res, KILL cr);
8246   format %{
8247     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8248   %}
8249   ins_encode %{
8250     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8251                Assembler::word, /*acquire*/ false, /*release*/ true,
8252                /*weak*/ false, $res$$Register);
8253   %}
8254   ins_pipe(pipe_slow);
8255 %}
8256 
8257 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8258   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
8259   ins_cost(2 * VOLATILE_REF_COST);
8260   effect(TEMP_DEF res, KILL cr);
8261   format %{
8262     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8263   %}
8264   ins_encode %{
8265     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8266                Assembler::xword, /*acquire*/ false, /*release*/ true,
8267                /*weak*/ false, $res$$Register);
8268   %}
8269   ins_pipe(pipe_slow);
8270 %}
8271 
8272 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8273   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
8274   ins_cost(2 * VOLATILE_REF_COST);
8275   effect(TEMP_DEF res, KILL cr);
8276   format %{
8277     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8278   %}
8279   ins_encode %{
8280     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8281                Assembler::word, /*acquire*/ false, /*release*/ true,
8282                /*weak*/ false, $res$$Register);
8283   %}
8284   ins_pipe(pipe_slow);
8285 %}
8286 
8287 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8288   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
8289   ins_cost(2 * VOLATILE_REF_COST);
8290   effect(TEMP_DEF res, KILL cr);
8291   format %{
8292     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8293   %}
8294   ins_encode %{
8295     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8296                Assembler::xword, /*acquire*/ false, /*release*/ true,
8297                /*weak*/ false, $res$$Register);
8298   %}
8299   ins_pipe(pipe_slow);
8300 %}
8301 
8302 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8303   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
8304   ins_cost(2 * VOLATILE_REF_COST);
8305   effect(KILL cr);
8306   format %{
8307     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8308     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8309   %}
8310   ins_encode %{
8311     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8312                Assembler::byte, /*acquire*/ false, /*release*/ true,
8313                /*weak*/ true, noreg);
8314     __ csetw($res$$Register, Assembler::EQ);
8315   %}
8316   ins_pipe(pipe_slow);
8317 %}
8318 
8319 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8320   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
8321   ins_cost(2 * VOLATILE_REF_COST);
8322   effect(KILL cr);
8323   format %{
8324     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8325     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8326   %}
8327   ins_encode %{
8328     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8329                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8330                /*weak*/ true, noreg);
8331     __ csetw($res$$Register, Assembler::EQ);
8332   %}
8333   ins_pipe(pipe_slow);
8334 %}
8335 
8336 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8337   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
8338   ins_cost(2 * VOLATILE_REF_COST);
8339   effect(KILL cr);
8340   format %{
8341     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8342     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8343   %}
8344   ins_encode %{
8345     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8346                Assembler::word, /*acquire*/ false, /*release*/ true,
8347                /*weak*/ true, noreg);
8348     __ csetw($res$$Register, Assembler::EQ);
8349   %}
8350   ins_pipe(pipe_slow);
8351 %}
8352 
8353 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8354   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
8355   ins_cost(2 * VOLATILE_REF_COST);
8356   effect(KILL cr);
8357   format %{
8358     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8359     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8360   %}
8361   ins_encode %{
8362     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8363                Assembler::xword, /*acquire*/ false, /*release*/ true,
8364                /*weak*/ true, noreg);
8365     __ csetw($res$$Register, Assembler::EQ);
8366   %}
8367   ins_pipe(pipe_slow);
8368 %}
8369 
8370 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8371   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
8372   ins_cost(2 * VOLATILE_REF_COST);
8373   effect(KILL cr);
8374   format %{
8375     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8376     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8377   %}
8378   ins_encode %{
8379     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8380                Assembler::word, /*acquire*/ false, /*release*/ true,
8381                /*weak*/ true, noreg);
8382     __ csetw($res$$Register, Assembler::EQ);
8383   %}
8384   ins_pipe(pipe_slow);
8385 %}
8386 
8387 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8388   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
8389   ins_cost(2 * VOLATILE_REF_COST);
8390   effect(KILL cr);
8391   format %{
8392     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8393     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8394   %}
8395   ins_encode %{
8396     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8397                Assembler::xword, /*acquire*/ false, /*release*/ true,
8398                /*weak*/ true, noreg);
8399     __ csetw($res$$Register, Assembler::EQ);
8400   %}
8401   ins_pipe(pipe_slow);
8402 %}
8403 
8404 // END This section of the file is automatically generated. Do not edit --------------
8405 // ---------------------------------------------------------------------
8406 
8407 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
8408   match(Set prev (GetAndSetI mem newv));
8409   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8410   ins_encode %{
8411     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8412   %}
8413   ins_pipe(pipe_serial);
8414 %}
8415 
8416 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
8417   match(Set prev (GetAndSetL mem newv));
8418   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8419   ins_encode %{
8420     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8421   %}
8422   ins_pipe(pipe_serial);
8423 %}
8424 
8425 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
8426   match(Set prev (GetAndSetN mem newv));
8427   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8428   ins_encode %{
8429     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8430   %}
8431   ins_pipe(pipe_serial);
8432 %}
8433 
8434 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
8435   match(Set prev (GetAndSetP mem newv));
8436   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8437   ins_encode %{
8438     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8439   %}
8440   ins_pipe(pipe_serial);
8441 %}
8442 
8443 
8444 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8445   match(Set newval (GetAndAddL mem incr));
8446   ins_cost(INSN_COST * 10);
8447   format %{ "get_and_addL $newval, [$mem], $incr" %}
8448   ins_encode %{
8449     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
8450   %}
8451   ins_pipe(pipe_serial);
8452 %}
8453 
8454 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
8455   predicate(n->as_LoadStore()->result_not_used());
8456   match(Set dummy (GetAndAddL mem incr));
8457   ins_cost(INSN_COST * 9);
8458   format %{ "get_and_addL [$mem], $incr" %}
8459   ins_encode %{
8460     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
8461   %}
8462   ins_pipe(pipe_serial);
8463 %}
8464 
8465 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8466   match(Set newval (GetAndAddL mem incr));
8467   ins_cost(INSN_COST * 10);
8468   format %{ "get_and_addL $newval, [$mem], $incr" %}
8469   ins_encode %{
8470     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
8471   %}
8472   ins_pipe(pipe_serial);
8473 %}
8474 
8475 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
8476   predicate(n->as_LoadStore()->result_not_used());
8477   match(Set dummy (GetAndAddL mem incr));
8478   ins_cost(INSN_COST * 9);
8479   format %{ "get_and_addL [$mem], $incr" %}
8480   ins_encode %{
8481     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
8482   %}
8483   ins_pipe(pipe_serial);
8484 %}
8485 
8486 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8487   match(Set newval (GetAndAddI mem incr));
8488   ins_cost(INSN_COST * 10);
8489   format %{ "get_and_addI $newval, [$mem], $incr" %}
8490   ins_encode %{
8491     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8492   %}
8493   ins_pipe(pipe_serial);
8494 %}
8495 
8496 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
8497   predicate(n->as_LoadStore()->result_not_used());
8498   match(Set dummy (GetAndAddI mem incr));
8499   ins_cost(INSN_COST * 9);
8500   format %{ "get_and_addI [$mem], $incr" %}
8501   ins_encode %{
8502     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
8503   %}
8504   ins_pipe(pipe_serial);
8505 %}
8506 
8507 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8508   match(Set newval (GetAndAddI mem incr));
8509   ins_cost(INSN_COST * 10);
8510   format %{ "get_and_addI $newval, [$mem], $incr" %}
8511   ins_encode %{
8512     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8513   %}
8514   ins_pipe(pipe_serial);
8515 %}
8516 
8517 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
8518   predicate(n->as_LoadStore()->result_not_used());
8519   match(Set dummy (GetAndAddI mem incr));
8520   ins_cost(INSN_COST * 9);
8521   format %{ "get_and_addI [$mem], $incr" %}
8522   ins_encode %{
8523     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
8524   %}
8525   ins_pipe(pipe_serial);
8526 %}
8527 
8528 // Manifest a CmpL result in an integer register.
8529 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
8530 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
8531 %{
8532   match(Set dst (CmpL3 src1 src2));
8533   effect(KILL flags);
8534 
8535   ins_cost(INSN_COST * 6);
8536   format %{
8537       "cmp $src1, $src2"
8538       "csetw $dst, ne"
8539       "cnegw $dst, lt"
8540   %}
8541   // format %{ "CmpL3 $dst, $src1, $src2" %}
8542   ins_encode %{
8543     __ cmp($src1$$Register, $src2$$Register);
8544     __ csetw($dst$$Register, Assembler::NE);
8545     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8546   %}
8547 
8548   ins_pipe(pipe_class_default);
8549 %}
8550 
8551 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
8552 %{
8553   match(Set dst (CmpL3 src1 src2));
8554   effect(KILL flags);
8555 
8556   ins_cost(INSN_COST * 6);
8557   format %{
8558       "cmp $src1, $src2"
8559       "csetw $dst, ne"
8560       "cnegw $dst, lt"
8561   %}
8562   ins_encode %{
8563     int32_t con = (int32_t)$src2$$constant;
8564      if (con < 0) {
8565       __ adds(zr, $src1$$Register, -con);
8566     } else {
8567       __ subs(zr, $src1$$Register, con);
8568     }
8569     __ csetw($dst$$Register, Assembler::NE);
8570     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8571   %}
8572 
8573   ins_pipe(pipe_class_default);
8574 %}
8575 
8576 // ============================================================================
8577 // Conditional Move Instructions
8578 
8579 // n.b. we have identical rules for both a signed compare op (cmpOp)
8580 // and an unsigned compare op (cmpOpU). it would be nice if we could
8581 // define an op class which merged both inputs and use it to type the
8582 // argument to a single rule. unfortunatelyt his fails because the
8583 // opclass does not live up to the COND_INTER interface of its
8584 // component operands. When the generic code tries to negate the
8585 // operand it ends up running the generci Machoper::negate method
8586 // which throws a ShouldNotHappen. So, we have to provide two flavours
8587 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
8588 
8589 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8590   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8591 
8592   ins_cost(INSN_COST * 2);
8593   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
8594 
8595   ins_encode %{
8596     __ cselw(as_Register($dst$$reg),
8597              as_Register($src2$$reg),
8598              as_Register($src1$$reg),
8599              (Assembler::Condition)$cmp$$cmpcode);
8600   %}
8601 
8602   ins_pipe(icond_reg_reg);
8603 %}
8604 
8605 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8606   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8607 
8608   ins_cost(INSN_COST * 2);
8609   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
8610 
8611   ins_encode %{
8612     __ cselw(as_Register($dst$$reg),
8613              as_Register($src2$$reg),
8614              as_Register($src1$$reg),
8615              (Assembler::Condition)$cmp$$cmpcode);
8616   %}
8617 
8618   ins_pipe(icond_reg_reg);
8619 %}
8620 
8621 // special cases where one arg is zero
8622 
8623 // n.b. this is selected in preference to the rule above because it
8624 // avoids loading constant 0 into a source register
8625 
8626 // TODO
8627 // we ought only to be able to cull one of these variants as the ideal
8628 // transforms ought always to order the zero consistently (to left/right?)
8629 
8630 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8631   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8632 
8633   ins_cost(INSN_COST * 2);
8634   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
8635 
8636   ins_encode %{
8637     __ cselw(as_Register($dst$$reg),
8638              as_Register($src$$reg),
8639              zr,
8640              (Assembler::Condition)$cmp$$cmpcode);
8641   %}
8642 
8643   ins_pipe(icond_reg);
8644 %}
8645 
8646 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8647   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8648 
8649   ins_cost(INSN_COST * 2);
8650   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
8651 
8652   ins_encode %{
8653     __ cselw(as_Register($dst$$reg),
8654              as_Register($src$$reg),
8655              zr,
8656              (Assembler::Condition)$cmp$$cmpcode);
8657   %}
8658 
8659   ins_pipe(icond_reg);
8660 %}
8661 
8662 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8663   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8664 
8665   ins_cost(INSN_COST * 2);
8666   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
8667 
8668   ins_encode %{
8669     __ cselw(as_Register($dst$$reg),
8670              zr,
8671              as_Register($src$$reg),
8672              (Assembler::Condition)$cmp$$cmpcode);
8673   %}
8674 
8675   ins_pipe(icond_reg);
8676 %}
8677 
8678 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8679   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8680 
8681   ins_cost(INSN_COST * 2);
8682   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
8683 
8684   ins_encode %{
8685     __ cselw(as_Register($dst$$reg),
8686              zr,
8687              as_Register($src$$reg),
8688              (Assembler::Condition)$cmp$$cmpcode);
8689   %}
8690 
8691   ins_pipe(icond_reg);
8692 %}
8693 
8694 // special case for creating a boolean 0 or 1
8695 
8696 // n.b. this is selected in preference to the rule above because it
8697 // avoids loading constants 0 and 1 into a source register
8698 
8699 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8700   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8701 
8702   ins_cost(INSN_COST * 2);
8703   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
8704 
8705   ins_encode %{
8706     // equivalently
8707     // cset(as_Register($dst$$reg),
8708     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8709     __ csincw(as_Register($dst$$reg),
8710              zr,
8711              zr,
8712              (Assembler::Condition)$cmp$$cmpcode);
8713   %}
8714 
8715   ins_pipe(icond_none);
8716 %}
8717 
8718 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8719   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8720 
8721   ins_cost(INSN_COST * 2);
8722   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
8723 
8724   ins_encode %{
8725     // equivalently
8726     // cset(as_Register($dst$$reg),
8727     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8728     __ csincw(as_Register($dst$$reg),
8729              zr,
8730              zr,
8731              (Assembler::Condition)$cmp$$cmpcode);
8732   %}
8733 
8734   ins_pipe(icond_none);
8735 %}
8736 
8737 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8738   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8739 
8740   ins_cost(INSN_COST * 2);
8741   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
8742 
8743   ins_encode %{
8744     __ csel(as_Register($dst$$reg),
8745             as_Register($src2$$reg),
8746             as_Register($src1$$reg),
8747             (Assembler::Condition)$cmp$$cmpcode);
8748   %}
8749 
8750   ins_pipe(icond_reg_reg);
8751 %}
8752 
8753 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8754   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8755 
8756   ins_cost(INSN_COST * 2);
8757   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
8758 
8759   ins_encode %{
8760     __ csel(as_Register($dst$$reg),
8761             as_Register($src2$$reg),
8762             as_Register($src1$$reg),
8763             (Assembler::Condition)$cmp$$cmpcode);
8764   %}
8765 
8766   ins_pipe(icond_reg_reg);
8767 %}
8768 
8769 // special cases where one arg is zero
8770 
8771 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8772   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8773 
8774   ins_cost(INSN_COST * 2);
8775   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
8776 
8777   ins_encode %{
8778     __ csel(as_Register($dst$$reg),
8779             zr,
8780             as_Register($src$$reg),
8781             (Assembler::Condition)$cmp$$cmpcode);
8782   %}
8783 
8784   ins_pipe(icond_reg);
8785 %}
8786 
8787 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8788   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8789 
8790   ins_cost(INSN_COST * 2);
8791   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
8792 
8793   ins_encode %{
8794     __ csel(as_Register($dst$$reg),
8795             zr,
8796             as_Register($src$$reg),
8797             (Assembler::Condition)$cmp$$cmpcode);
8798   %}
8799 
8800   ins_pipe(icond_reg);
8801 %}
8802 
8803 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8804   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8805 
8806   ins_cost(INSN_COST * 2);
8807   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
8808 
8809   ins_encode %{
8810     __ csel(as_Register($dst$$reg),
8811             as_Register($src$$reg),
8812             zr,
8813             (Assembler::Condition)$cmp$$cmpcode);
8814   %}
8815 
8816   ins_pipe(icond_reg);
8817 %}
8818 
8819 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8820   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8821 
8822   ins_cost(INSN_COST * 2);
8823   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
8824 
8825   ins_encode %{
8826     __ csel(as_Register($dst$$reg),
8827             as_Register($src$$reg),
8828             zr,
8829             (Assembler::Condition)$cmp$$cmpcode);
8830   %}
8831 
8832   ins_pipe(icond_reg);
8833 %}
8834 
8835 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8836   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8837 
8838   ins_cost(INSN_COST * 2);
8839   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
8840 
8841   ins_encode %{
8842     __ csel(as_Register($dst$$reg),
8843             as_Register($src2$$reg),
8844             as_Register($src1$$reg),
8845             (Assembler::Condition)$cmp$$cmpcode);
8846   %}
8847 
8848   ins_pipe(icond_reg_reg);
8849 %}
8850 
8851 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8852   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8853 
8854   ins_cost(INSN_COST * 2);
8855   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
8856 
8857   ins_encode %{
8858     __ csel(as_Register($dst$$reg),
8859             as_Register($src2$$reg),
8860             as_Register($src1$$reg),
8861             (Assembler::Condition)$cmp$$cmpcode);
8862   %}
8863 
8864   ins_pipe(icond_reg_reg);
8865 %}
8866 
8867 // special cases where one arg is zero
8868 
8869 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8870   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8871 
8872   ins_cost(INSN_COST * 2);
8873   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
8874 
8875   ins_encode %{
8876     __ csel(as_Register($dst$$reg),
8877             zr,
8878             as_Register($src$$reg),
8879             (Assembler::Condition)$cmp$$cmpcode);
8880   %}
8881 
8882   ins_pipe(icond_reg);
8883 %}
8884 
8885 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8886   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8887 
8888   ins_cost(INSN_COST * 2);
8889   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
8890 
8891   ins_encode %{
8892     __ csel(as_Register($dst$$reg),
8893             zr,
8894             as_Register($src$$reg),
8895             (Assembler::Condition)$cmp$$cmpcode);
8896   %}
8897 
8898   ins_pipe(icond_reg);
8899 %}
8900 
8901 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8902   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8903 
8904   ins_cost(INSN_COST * 2);
8905   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
8906 
8907   ins_encode %{
8908     __ csel(as_Register($dst$$reg),
8909             as_Register($src$$reg),
8910             zr,
8911             (Assembler::Condition)$cmp$$cmpcode);
8912   %}
8913 
8914   ins_pipe(icond_reg);
8915 %}
8916 
8917 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8918   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8919 
8920   ins_cost(INSN_COST * 2);
8921   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
8922 
8923   ins_encode %{
8924     __ csel(as_Register($dst$$reg),
8925             as_Register($src$$reg),
8926             zr,
8927             (Assembler::Condition)$cmp$$cmpcode);
8928   %}
8929 
8930   ins_pipe(icond_reg);
8931 %}
8932 
8933 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8934   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8935 
8936   ins_cost(INSN_COST * 2);
8937   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8938 
8939   ins_encode %{
8940     __ cselw(as_Register($dst$$reg),
8941              as_Register($src2$$reg),
8942              as_Register($src1$$reg),
8943              (Assembler::Condition)$cmp$$cmpcode);
8944   %}
8945 
8946   ins_pipe(icond_reg_reg);
8947 %}
8948 
8949 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8950   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8951 
8952   ins_cost(INSN_COST * 2);
8953   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8954 
8955   ins_encode %{
8956     __ cselw(as_Register($dst$$reg),
8957              as_Register($src2$$reg),
8958              as_Register($src1$$reg),
8959              (Assembler::Condition)$cmp$$cmpcode);
8960   %}
8961 
8962   ins_pipe(icond_reg_reg);
8963 %}
8964 
8965 // special cases where one arg is zero
8966 
8967 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8968   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8969 
8970   ins_cost(INSN_COST * 2);
8971   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
8972 
8973   ins_encode %{
8974     __ cselw(as_Register($dst$$reg),
8975              zr,
8976              as_Register($src$$reg),
8977              (Assembler::Condition)$cmp$$cmpcode);
8978   %}
8979 
8980   ins_pipe(icond_reg);
8981 %}
8982 
8983 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8984   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8985 
8986   ins_cost(INSN_COST * 2);
8987   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
8988 
8989   ins_encode %{
8990     __ cselw(as_Register($dst$$reg),
8991              zr,
8992              as_Register($src$$reg),
8993              (Assembler::Condition)$cmp$$cmpcode);
8994   %}
8995 
8996   ins_pipe(icond_reg);
8997 %}
8998 
8999 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9000   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9001 
9002   ins_cost(INSN_COST * 2);
9003   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
9004 
9005   ins_encode %{
9006     __ cselw(as_Register($dst$$reg),
9007              as_Register($src$$reg),
9008              zr,
9009              (Assembler::Condition)$cmp$$cmpcode);
9010   %}
9011 
9012   ins_pipe(icond_reg);
9013 %}
9014 
9015 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9016   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9017 
9018   ins_cost(INSN_COST * 2);
9019   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
9020 
9021   ins_encode %{
9022     __ cselw(as_Register($dst$$reg),
9023              as_Register($src$$reg),
9024              zr,
9025              (Assembler::Condition)$cmp$$cmpcode);
9026   %}
9027 
9028   ins_pipe(icond_reg);
9029 %}
9030 
9031 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
9032 %{
9033   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9034 
9035   ins_cost(INSN_COST * 3);
9036 
9037   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9038   ins_encode %{
9039     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9040     __ fcsels(as_FloatRegister($dst$$reg),
9041               as_FloatRegister($src2$$reg),
9042               as_FloatRegister($src1$$reg),
9043               cond);
9044   %}
9045 
9046   ins_pipe(fp_cond_reg_reg_s);
9047 %}
9048 
9049 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
9050 %{
9051   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9052 
9053   ins_cost(INSN_COST * 3);
9054 
9055   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9056   ins_encode %{
9057     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9058     __ fcsels(as_FloatRegister($dst$$reg),
9059               as_FloatRegister($src2$$reg),
9060               as_FloatRegister($src1$$reg),
9061               cond);
9062   %}
9063 
9064   ins_pipe(fp_cond_reg_reg_s);
9065 %}
9066 
9067 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
9068 %{
9069   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9070 
9071   ins_cost(INSN_COST * 3);
9072 
9073   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9074   ins_encode %{
9075     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9076     __ fcseld(as_FloatRegister($dst$$reg),
9077               as_FloatRegister($src2$$reg),
9078               as_FloatRegister($src1$$reg),
9079               cond);
9080   %}
9081 
9082   ins_pipe(fp_cond_reg_reg_d);
9083 %}
9084 
9085 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
9086 %{
9087   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9088 
9089   ins_cost(INSN_COST * 3);
9090 
9091   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9092   ins_encode %{
9093     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9094     __ fcseld(as_FloatRegister($dst$$reg),
9095               as_FloatRegister($src2$$reg),
9096               as_FloatRegister($src1$$reg),
9097               cond);
9098   %}
9099 
9100   ins_pipe(fp_cond_reg_reg_d);
9101 %}
9102 
9103 // ============================================================================
9104 // Arithmetic Instructions
9105 //
9106 
9107 // Integer Addition
9108 
9109 // TODO
9110 // these currently employ operations which do not set CR and hence are
9111 // not flagged as killing CR but we would like to isolate the cases
9112 // where we want to set flags from those where we don't. need to work
9113 // out how to do that.
9114 
9115 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9116   match(Set dst (AddI src1 src2));
9117 
9118   ins_cost(INSN_COST);
9119   format %{ "addw  $dst, $src1, $src2" %}
9120 
9121   ins_encode %{
9122     __ addw(as_Register($dst$$reg),
9123             as_Register($src1$$reg),
9124             as_Register($src2$$reg));
9125   %}
9126 
9127   ins_pipe(ialu_reg_reg);
9128 %}
9129 
9130 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9131   match(Set dst (AddI src1 src2));
9132 
9133   ins_cost(INSN_COST);
9134   format %{ "addw $dst, $src1, $src2" %}
9135 
9136   // use opcode to indicate that this is an add not a sub
9137   opcode(0x0);
9138 
9139   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9140 
9141   ins_pipe(ialu_reg_imm);
9142 %}
9143 
9144 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
9145   match(Set dst (AddI (ConvL2I src1) src2));
9146 
9147   ins_cost(INSN_COST);
9148   format %{ "addw $dst, $src1, $src2" %}
9149 
9150   // use opcode to indicate that this is an add not a sub
9151   opcode(0x0);
9152 
9153   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9154 
9155   ins_pipe(ialu_reg_imm);
9156 %}
9157 
9158 // Pointer Addition
9159 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9160   match(Set dst (AddP src1 src2));
9161 
9162   ins_cost(INSN_COST);
9163   format %{ "add $dst, $src1, $src2\t# ptr" %}
9164 
9165   ins_encode %{
9166     __ add(as_Register($dst$$reg),
9167            as_Register($src1$$reg),
9168            as_Register($src2$$reg));
9169   %}
9170 
9171   ins_pipe(ialu_reg_reg);
9172 %}
9173 
9174 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9175   match(Set dst (AddP src1 (ConvI2L src2)));
9176 
9177   ins_cost(1.9 * INSN_COST);
9178   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9179 
9180   ins_encode %{
9181     __ add(as_Register($dst$$reg),
9182            as_Register($src1$$reg),
9183            as_Register($src2$$reg), ext::sxtw);
9184   %}
9185 
9186   ins_pipe(ialu_reg_reg);
9187 %}
9188 
9189 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9190   match(Set dst (AddP src1 (LShiftL src2 scale)));
9191 
9192   ins_cost(1.9 * INSN_COST);
9193   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9194 
9195   ins_encode %{
9196     __ lea(as_Register($dst$$reg),
9197            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9198                    Address::lsl($scale$$constant)));
9199   %}
9200 
9201   ins_pipe(ialu_reg_reg_shift);
9202 %}
9203 
9204 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9205   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9206 
9207   ins_cost(1.9 * INSN_COST);
9208   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9209 
9210   ins_encode %{
9211     __ lea(as_Register($dst$$reg),
9212            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9213                    Address::sxtw($scale$$constant)));
9214   %}
9215 
9216   ins_pipe(ialu_reg_reg_shift);
9217 %}
9218 
9219 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9220   match(Set dst (LShiftL (ConvI2L src) scale));
9221 
9222   ins_cost(INSN_COST);
9223   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9224 
9225   ins_encode %{
9226     __ sbfiz(as_Register($dst$$reg),
9227           as_Register($src$$reg),
9228           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9229   %}
9230 
9231   ins_pipe(ialu_reg_shift);
9232 %}
9233 
9234 // Pointer Immediate Addition
9235 // n.b. this needs to be more expensive than using an indirect memory
9236 // operand
9237 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9238   match(Set dst (AddP src1 src2));
9239 
9240   ins_cost(INSN_COST);
9241   format %{ "add $dst, $src1, $src2\t# ptr" %}
9242 
9243   // use opcode to indicate that this is an add not a sub
9244   opcode(0x0);
9245 
9246   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9247 
9248   ins_pipe(ialu_reg_imm);
9249 %}
9250 
9251 // Long Addition
9252 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9253 
9254   match(Set dst (AddL src1 src2));
9255 
9256   ins_cost(INSN_COST);
9257   format %{ "add  $dst, $src1, $src2" %}
9258 
9259   ins_encode %{
9260     __ add(as_Register($dst$$reg),
9261            as_Register($src1$$reg),
9262            as_Register($src2$$reg));
9263   %}
9264 
9265   ins_pipe(ialu_reg_reg);
9266 %}
9267 
9268 // No constant pool entries requiredLong Immediate Addition.
9269 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9270   match(Set dst (AddL src1 src2));
9271 
9272   ins_cost(INSN_COST);
9273   format %{ "add $dst, $src1, $src2" %}
9274 
9275   // use opcode to indicate that this is an add not a sub
9276   opcode(0x0);
9277 
9278   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9279 
9280   ins_pipe(ialu_reg_imm);
9281 %}
9282 
9283 // Integer Subtraction
9284 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9285   match(Set dst (SubI src1 src2));
9286 
9287   ins_cost(INSN_COST);
9288   format %{ "subw  $dst, $src1, $src2" %}
9289 
9290   ins_encode %{
9291     __ subw(as_Register($dst$$reg),
9292             as_Register($src1$$reg),
9293             as_Register($src2$$reg));
9294   %}
9295 
9296   ins_pipe(ialu_reg_reg);
9297 %}
9298 
9299 // Immediate Subtraction
9300 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9301   match(Set dst (SubI src1 src2));
9302 
9303   ins_cost(INSN_COST);
9304   format %{ "subw $dst, $src1, $src2" %}
9305 
9306   // use opcode to indicate that this is a sub not an add
9307   opcode(0x1);
9308 
9309   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9310 
9311   ins_pipe(ialu_reg_imm);
9312 %}
9313 
9314 // Long Subtraction
9315 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9316 
9317   match(Set dst (SubL src1 src2));
9318 
9319   ins_cost(INSN_COST);
9320   format %{ "sub  $dst, $src1, $src2" %}
9321 
9322   ins_encode %{
9323     __ sub(as_Register($dst$$reg),
9324            as_Register($src1$$reg),
9325            as_Register($src2$$reg));
9326   %}
9327 
9328   ins_pipe(ialu_reg_reg);
9329 %}
9330 
9331 // No constant pool entries requiredLong Immediate Subtraction.
9332 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9333   match(Set dst (SubL src1 src2));
9334 
9335   ins_cost(INSN_COST);
9336   format %{ "sub$dst, $src1, $src2" %}
9337 
9338   // use opcode to indicate that this is a sub not an add
9339   opcode(0x1);
9340 
9341   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9342 
9343   ins_pipe(ialu_reg_imm);
9344 %}
9345 
9346 // Integer Negation (special case for sub)
9347 
9348 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9349   match(Set dst (SubI zero src));
9350 
9351   ins_cost(INSN_COST);
9352   format %{ "negw $dst, $src\t# int" %}
9353 
9354   ins_encode %{
9355     __ negw(as_Register($dst$$reg),
9356             as_Register($src$$reg));
9357   %}
9358 
9359   ins_pipe(ialu_reg);
9360 %}
9361 
9362 // Long Negation
9363 
9364 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
9365   match(Set dst (SubL zero src));
9366 
9367   ins_cost(INSN_COST);
9368   format %{ "neg $dst, $src\t# long" %}
9369 
9370   ins_encode %{
9371     __ neg(as_Register($dst$$reg),
9372            as_Register($src$$reg));
9373   %}
9374 
9375   ins_pipe(ialu_reg);
9376 %}
9377 
9378 // Integer Multiply
9379 
9380 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9381   match(Set dst (MulI src1 src2));
9382 
9383   ins_cost(INSN_COST * 3);
9384   format %{ "mulw  $dst, $src1, $src2" %}
9385 
9386   ins_encode %{
9387     __ mulw(as_Register($dst$$reg),
9388             as_Register($src1$$reg),
9389             as_Register($src2$$reg));
9390   %}
9391 
9392   ins_pipe(imul_reg_reg);
9393 %}
9394 
9395 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9396   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9397 
9398   ins_cost(INSN_COST * 3);
9399   format %{ "smull  $dst, $src1, $src2" %}
9400 
9401   ins_encode %{
9402     __ smull(as_Register($dst$$reg),
9403              as_Register($src1$$reg),
9404              as_Register($src2$$reg));
9405   %}
9406 
9407   ins_pipe(imul_reg_reg);
9408 %}
9409 
9410 // Long Multiply
9411 
9412 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9413   match(Set dst (MulL src1 src2));
9414 
9415   ins_cost(INSN_COST * 5);
9416   format %{ "mul  $dst, $src1, $src2" %}
9417 
9418   ins_encode %{
9419     __ mul(as_Register($dst$$reg),
9420            as_Register($src1$$reg),
9421            as_Register($src2$$reg));
9422   %}
9423 
9424   ins_pipe(lmul_reg_reg);
9425 %}
9426 
9427 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
9428 %{
9429   match(Set dst (MulHiL src1 src2));
9430 
9431   ins_cost(INSN_COST * 7);
9432   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
9433 
9434   ins_encode %{
9435     __ smulh(as_Register($dst$$reg),
9436              as_Register($src1$$reg),
9437              as_Register($src2$$reg));
9438   %}
9439 
9440   ins_pipe(lmul_reg_reg);
9441 %}
9442 
9443 // Combined Integer Multiply & Add/Sub
9444 
9445 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9446   match(Set dst (AddI src3 (MulI src1 src2)));
9447 
9448   ins_cost(INSN_COST * 3);
9449   format %{ "madd  $dst, $src1, $src2, $src3" %}
9450 
9451   ins_encode %{
9452     __ maddw(as_Register($dst$$reg),
9453              as_Register($src1$$reg),
9454              as_Register($src2$$reg),
9455              as_Register($src3$$reg));
9456   %}
9457 
9458   ins_pipe(imac_reg_reg);
9459 %}
9460 
9461 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9462   match(Set dst (SubI src3 (MulI src1 src2)));
9463 
9464   ins_cost(INSN_COST * 3);
9465   format %{ "msub  $dst, $src1, $src2, $src3" %}
9466 
9467   ins_encode %{
9468     __ msubw(as_Register($dst$$reg),
9469              as_Register($src1$$reg),
9470              as_Register($src2$$reg),
9471              as_Register($src3$$reg));
9472   %}
9473 
9474   ins_pipe(imac_reg_reg);
9475 %}
9476 
9477 // Combined Long Multiply & Add/Sub
9478 
9479 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9480   match(Set dst (AddL src3 (MulL src1 src2)));
9481 
9482   ins_cost(INSN_COST * 5);
9483   format %{ "madd  $dst, $src1, $src2, $src3" %}
9484 
9485   ins_encode %{
9486     __ madd(as_Register($dst$$reg),
9487             as_Register($src1$$reg),
9488             as_Register($src2$$reg),
9489             as_Register($src3$$reg));
9490   %}
9491 
9492   ins_pipe(lmac_reg_reg);
9493 %}
9494 
9495 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9496   match(Set dst (SubL src3 (MulL src1 src2)));
9497 
9498   ins_cost(INSN_COST * 5);
9499   format %{ "msub  $dst, $src1, $src2, $src3" %}
9500 
9501   ins_encode %{
9502     __ msub(as_Register($dst$$reg),
9503             as_Register($src1$$reg),
9504             as_Register($src2$$reg),
9505             as_Register($src3$$reg));
9506   %}
9507 
9508   ins_pipe(lmac_reg_reg);
9509 %}
9510 
9511 // Integer Divide
9512 
9513 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9514   match(Set dst (DivI src1 src2));
9515 
9516   ins_cost(INSN_COST * 19);
9517   format %{ "sdivw  $dst, $src1, $src2" %}
9518 
9519   ins_encode(aarch64_enc_divw(dst, src1, src2));
9520   ins_pipe(idiv_reg_reg);
9521 %}
9522 
9523 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
9524   match(Set dst (URShiftI (RShiftI src1 div1) div2));
9525   ins_cost(INSN_COST);
9526   format %{ "lsrw $dst, $src1, $div1" %}
9527   ins_encode %{
9528     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
9529   %}
9530   ins_pipe(ialu_reg_shift);
9531 %}
9532 
9533 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
9534   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
9535   ins_cost(INSN_COST);
9536   format %{ "addw $dst, $src, LSR $div1" %}
9537 
9538   ins_encode %{
9539     __ addw(as_Register($dst$$reg),
9540               as_Register($src$$reg),
9541               as_Register($src$$reg),
9542               Assembler::LSR, 31);
9543   %}
9544   ins_pipe(ialu_reg);
9545 %}
9546 
9547 // Long Divide
9548 
9549 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9550   match(Set dst (DivL src1 src2));
9551 
9552   ins_cost(INSN_COST * 35);
9553   format %{ "sdiv   $dst, $src1, $src2" %}
9554 
9555   ins_encode(aarch64_enc_div(dst, src1, src2));
9556   ins_pipe(ldiv_reg_reg);
9557 %}
9558 
9559 instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
9560   match(Set dst (URShiftL (RShiftL src1 div1) div2));
9561   ins_cost(INSN_COST);
9562   format %{ "lsr $dst, $src1, $div1" %}
9563   ins_encode %{
9564     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
9565   %}
9566   ins_pipe(ialu_reg_shift);
9567 %}
9568 
9569 instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
9570   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
9571   ins_cost(INSN_COST);
9572   format %{ "add $dst, $src, $div1" %}
9573 
9574   ins_encode %{
9575     __ add(as_Register($dst$$reg),
9576               as_Register($src$$reg),
9577               as_Register($src$$reg),
9578               Assembler::LSR, 63);
9579   %}
9580   ins_pipe(ialu_reg);
9581 %}
9582 
9583 // Integer Remainder
9584 
9585 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9586   match(Set dst (ModI src1 src2));
9587 
9588   ins_cost(INSN_COST * 22);
9589   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
9590             "msubw($dst, rscratch1, $src2, $src1" %}
9591 
9592   ins_encode(aarch64_enc_modw(dst, src1, src2));
9593   ins_pipe(idiv_reg_reg);
9594 %}
9595 
9596 // Long Remainder
9597 
9598 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9599   match(Set dst (ModL src1 src2));
9600 
9601   ins_cost(INSN_COST * 38);
9602   format %{ "sdiv   rscratch1, $src1, $src2\n"
9603             "msub($dst, rscratch1, $src2, $src1" %}
9604 
9605   ins_encode(aarch64_enc_mod(dst, src1, src2));
9606   ins_pipe(ldiv_reg_reg);
9607 %}
9608 
9609 // Integer Shifts
9610 
9611 // Shift Left Register
9612 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9613   match(Set dst (LShiftI src1 src2));
9614 
9615   ins_cost(INSN_COST * 2);
9616   format %{ "lslvw  $dst, $src1, $src2" %}
9617 
9618   ins_encode %{
9619     __ lslvw(as_Register($dst$$reg),
9620              as_Register($src1$$reg),
9621              as_Register($src2$$reg));
9622   %}
9623 
9624   ins_pipe(ialu_reg_reg_vshift);
9625 %}
9626 
9627 // Shift Left Immediate
9628 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9629   match(Set dst (LShiftI src1 src2));
9630 
9631   ins_cost(INSN_COST);
9632   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
9633 
9634   ins_encode %{
9635     __ lslw(as_Register($dst$$reg),
9636             as_Register($src1$$reg),
9637             $src2$$constant & 0x1f);
9638   %}
9639 
9640   ins_pipe(ialu_reg_shift);
9641 %}
9642 
9643 // Shift Right Logical Register
9644 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9645   match(Set dst (URShiftI src1 src2));
9646 
9647   ins_cost(INSN_COST * 2);
9648   format %{ "lsrvw  $dst, $src1, $src2" %}
9649 
9650   ins_encode %{
9651     __ lsrvw(as_Register($dst$$reg),
9652              as_Register($src1$$reg),
9653              as_Register($src2$$reg));
9654   %}
9655 
9656   ins_pipe(ialu_reg_reg_vshift);
9657 %}
9658 
9659 // Shift Right Logical Immediate
9660 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9661   match(Set dst (URShiftI src1 src2));
9662 
9663   ins_cost(INSN_COST);
9664   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
9665 
9666   ins_encode %{
9667     __ lsrw(as_Register($dst$$reg),
9668             as_Register($src1$$reg),
9669             $src2$$constant & 0x1f);
9670   %}
9671 
9672   ins_pipe(ialu_reg_shift);
9673 %}
9674 
9675 // Shift Right Arithmetic Register
9676 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9677   match(Set dst (RShiftI src1 src2));
9678 
9679   ins_cost(INSN_COST * 2);
9680   format %{ "asrvw  $dst, $src1, $src2" %}
9681 
9682   ins_encode %{
9683     __ asrvw(as_Register($dst$$reg),
9684              as_Register($src1$$reg),
9685              as_Register($src2$$reg));
9686   %}
9687 
9688   ins_pipe(ialu_reg_reg_vshift);
9689 %}
9690 
9691 // Shift Right Arithmetic Immediate
9692 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9693   match(Set dst (RShiftI src1 src2));
9694 
9695   ins_cost(INSN_COST);
9696   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
9697 
9698   ins_encode %{
9699     __ asrw(as_Register($dst$$reg),
9700             as_Register($src1$$reg),
9701             $src2$$constant & 0x1f);
9702   %}
9703 
9704   ins_pipe(ialu_reg_shift);
9705 %}
9706 
9707 // Combined Int Mask and Right Shift (using UBFM)
9708 // TODO
9709 
9710 // Long Shifts
9711 
9712 // Shift Left Register
9713 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9714   match(Set dst (LShiftL src1 src2));
9715 
9716   ins_cost(INSN_COST * 2);
9717   format %{ "lslv  $dst, $src1, $src2" %}
9718 
9719   ins_encode %{
9720     __ lslv(as_Register($dst$$reg),
9721             as_Register($src1$$reg),
9722             as_Register($src2$$reg));
9723   %}
9724 
9725   ins_pipe(ialu_reg_reg_vshift);
9726 %}
9727 
9728 // Shift Left Immediate
9729 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9730   match(Set dst (LShiftL src1 src2));
9731 
9732   ins_cost(INSN_COST);
9733   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
9734 
9735   ins_encode %{
9736     __ lsl(as_Register($dst$$reg),
9737             as_Register($src1$$reg),
9738             $src2$$constant & 0x3f);
9739   %}
9740 
9741   ins_pipe(ialu_reg_shift);
9742 %}
9743 
9744 // Shift Right Logical Register
9745 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9746   match(Set dst (URShiftL src1 src2));
9747 
9748   ins_cost(INSN_COST * 2);
9749   format %{ "lsrv  $dst, $src1, $src2" %}
9750 
9751   ins_encode %{
9752     __ lsrv(as_Register($dst$$reg),
9753             as_Register($src1$$reg),
9754             as_Register($src2$$reg));
9755   %}
9756 
9757   ins_pipe(ialu_reg_reg_vshift);
9758 %}
9759 
9760 // Shift Right Logical Immediate
9761 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9762   match(Set dst (URShiftL src1 src2));
9763 
9764   ins_cost(INSN_COST);
9765   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
9766 
9767   ins_encode %{
9768     __ lsr(as_Register($dst$$reg),
9769            as_Register($src1$$reg),
9770            $src2$$constant & 0x3f);
9771   %}
9772 
9773   ins_pipe(ialu_reg_shift);
9774 %}
9775 
9776 // A special-case pattern for card table stores.
9777 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
9778   match(Set dst (URShiftL (CastP2X src1) src2));
9779 
9780   ins_cost(INSN_COST);
9781   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
9782 
9783   ins_encode %{
9784     __ lsr(as_Register($dst$$reg),
9785            as_Register($src1$$reg),
9786            $src2$$constant & 0x3f);
9787   %}
9788 
9789   ins_pipe(ialu_reg_shift);
9790 %}
9791 
9792 // Shift Right Arithmetic Register
9793 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9794   match(Set dst (RShiftL src1 src2));
9795 
9796   ins_cost(INSN_COST * 2);
9797   format %{ "asrv  $dst, $src1, $src2" %}
9798 
9799   ins_encode %{
9800     __ asrv(as_Register($dst$$reg),
9801             as_Register($src1$$reg),
9802             as_Register($src2$$reg));
9803   %}
9804 
9805   ins_pipe(ialu_reg_reg_vshift);
9806 %}
9807 
9808 // Shift Right Arithmetic Immediate
9809 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9810   match(Set dst (RShiftL src1 src2));
9811 
9812   ins_cost(INSN_COST);
9813   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
9814 
9815   ins_encode %{
9816     __ asr(as_Register($dst$$reg),
9817            as_Register($src1$$reg),
9818            $src2$$constant & 0x3f);
9819   %}
9820 
9821   ins_pipe(ialu_reg_shift);
9822 %}
9823 
9824 // BEGIN This section of the file is automatically generated. Do not edit --------------
9825 
9826 instruct regL_not_reg(iRegLNoSp dst,
9827                          iRegL src1, immL_M1 m1,
9828                          rFlagsReg cr) %{
9829   match(Set dst (XorL src1 m1));
9830   ins_cost(INSN_COST);
9831   format %{ "eon  $dst, $src1, zr" %}
9832 
9833   ins_encode %{
9834     __ eon(as_Register($dst$$reg),
9835               as_Register($src1$$reg),
9836               zr,
9837               Assembler::LSL, 0);
9838   %}
9839 
9840   ins_pipe(ialu_reg);
9841 %}
9842 instruct regI_not_reg(iRegINoSp dst,
9843                          iRegIorL2I src1, immI_M1 m1,
9844                          rFlagsReg cr) %{
9845   match(Set dst (XorI src1 m1));
9846   ins_cost(INSN_COST);
9847   format %{ "eonw  $dst, $src1, zr" %}
9848 
9849   ins_encode %{
9850     __ eonw(as_Register($dst$$reg),
9851               as_Register($src1$$reg),
9852               zr,
9853               Assembler::LSL, 0);
9854   %}
9855 
9856   ins_pipe(ialu_reg);
9857 %}
9858 
9859 instruct AndI_reg_not_reg(iRegINoSp dst,
9860                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9861                          rFlagsReg cr) %{
9862   match(Set dst (AndI src1 (XorI src2 m1)));
9863   ins_cost(INSN_COST);
9864   format %{ "bicw  $dst, $src1, $src2" %}
9865 
9866   ins_encode %{
9867     __ bicw(as_Register($dst$$reg),
9868               as_Register($src1$$reg),
9869               as_Register($src2$$reg),
9870               Assembler::LSL, 0);
9871   %}
9872 
9873   ins_pipe(ialu_reg_reg);
9874 %}
9875 
9876 instruct AndL_reg_not_reg(iRegLNoSp dst,
9877                          iRegL src1, iRegL src2, immL_M1 m1,
9878                          rFlagsReg cr) %{
9879   match(Set dst (AndL src1 (XorL src2 m1)));
9880   ins_cost(INSN_COST);
9881   format %{ "bic  $dst, $src1, $src2" %}
9882 
9883   ins_encode %{
9884     __ bic(as_Register($dst$$reg),
9885               as_Register($src1$$reg),
9886               as_Register($src2$$reg),
9887               Assembler::LSL, 0);
9888   %}
9889 
9890   ins_pipe(ialu_reg_reg);
9891 %}
9892 
9893 instruct OrI_reg_not_reg(iRegINoSp dst,
9894                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9895                          rFlagsReg cr) %{
9896   match(Set dst (OrI src1 (XorI src2 m1)));
9897   ins_cost(INSN_COST);
9898   format %{ "ornw  $dst, $src1, $src2" %}
9899 
9900   ins_encode %{
9901     __ ornw(as_Register($dst$$reg),
9902               as_Register($src1$$reg),
9903               as_Register($src2$$reg),
9904               Assembler::LSL, 0);
9905   %}
9906 
9907   ins_pipe(ialu_reg_reg);
9908 %}
9909 
9910 instruct OrL_reg_not_reg(iRegLNoSp dst,
9911                          iRegL src1, iRegL src2, immL_M1 m1,
9912                          rFlagsReg cr) %{
9913   match(Set dst (OrL src1 (XorL src2 m1)));
9914   ins_cost(INSN_COST);
9915   format %{ "orn  $dst, $src1, $src2" %}
9916 
9917   ins_encode %{
9918     __ orn(as_Register($dst$$reg),
9919               as_Register($src1$$reg),
9920               as_Register($src2$$reg),
9921               Assembler::LSL, 0);
9922   %}
9923 
9924   ins_pipe(ialu_reg_reg);
9925 %}
9926 
9927 instruct XorI_reg_not_reg(iRegINoSp dst,
9928                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9929                          rFlagsReg cr) %{
9930   match(Set dst (XorI m1 (XorI src2 src1)));
9931   ins_cost(INSN_COST);
9932   format %{ "eonw  $dst, $src1, $src2" %}
9933 
9934   ins_encode %{
9935     __ eonw(as_Register($dst$$reg),
9936               as_Register($src1$$reg),
9937               as_Register($src2$$reg),
9938               Assembler::LSL, 0);
9939   %}
9940 
9941   ins_pipe(ialu_reg_reg);
9942 %}
9943 
9944 instruct XorL_reg_not_reg(iRegLNoSp dst,
9945                          iRegL src1, iRegL src2, immL_M1 m1,
9946                          rFlagsReg cr) %{
9947   match(Set dst (XorL m1 (XorL src2 src1)));
9948   ins_cost(INSN_COST);
9949   format %{ "eon  $dst, $src1, $src2" %}
9950 
9951   ins_encode %{
9952     __ eon(as_Register($dst$$reg),
9953               as_Register($src1$$reg),
9954               as_Register($src2$$reg),
9955               Assembler::LSL, 0);
9956   %}
9957 
9958   ins_pipe(ialu_reg_reg);
9959 %}
9960 
9961 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
9962                          iRegIorL2I src1, iRegIorL2I src2,
9963                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9964   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
9965   ins_cost(1.9 * INSN_COST);
9966   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
9967 
9968   ins_encode %{
9969     __ bicw(as_Register($dst$$reg),
9970               as_Register($src1$$reg),
9971               as_Register($src2$$reg),
9972               Assembler::LSR,
9973               $src3$$constant & 0x1f);
9974   %}
9975 
9976   ins_pipe(ialu_reg_reg_shift);
9977 %}
9978 
9979 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
9980                          iRegL src1, iRegL src2,
9981                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9982   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
9983   ins_cost(1.9 * INSN_COST);
9984   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
9985 
9986   ins_encode %{
9987     __ bic(as_Register($dst$$reg),
9988               as_Register($src1$$reg),
9989               as_Register($src2$$reg),
9990               Assembler::LSR,
9991               $src3$$constant & 0x3f);
9992   %}
9993 
9994   ins_pipe(ialu_reg_reg_shift);
9995 %}
9996 
9997 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
9998                          iRegIorL2I src1, iRegIorL2I src2,
9999                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10000   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
10001   ins_cost(1.9 * INSN_COST);
10002   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
10003 
10004   ins_encode %{
10005     __ bicw(as_Register($dst$$reg),
10006               as_Register($src1$$reg),
10007               as_Register($src2$$reg),
10008               Assembler::ASR,
10009               $src3$$constant & 0x1f);
10010   %}
10011 
10012   ins_pipe(ialu_reg_reg_shift);
10013 %}
10014 
10015 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
10016                          iRegL src1, iRegL src2,
10017                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10018   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
10019   ins_cost(1.9 * INSN_COST);
10020   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
10021 
10022   ins_encode %{
10023     __ bic(as_Register($dst$$reg),
10024               as_Register($src1$$reg),
10025               as_Register($src2$$reg),
10026               Assembler::ASR,
10027               $src3$$constant & 0x3f);
10028   %}
10029 
10030   ins_pipe(ialu_reg_reg_shift);
10031 %}
10032 
10033 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
10034                          iRegIorL2I src1, iRegIorL2I src2,
10035                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10036   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
10037   ins_cost(1.9 * INSN_COST);
10038   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
10039 
10040   ins_encode %{
10041     __ bicw(as_Register($dst$$reg),
10042               as_Register($src1$$reg),
10043               as_Register($src2$$reg),
10044               Assembler::LSL,
10045               $src3$$constant & 0x1f);
10046   %}
10047 
10048   ins_pipe(ialu_reg_reg_shift);
10049 %}
10050 
10051 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
10052                          iRegL src1, iRegL src2,
10053                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10054   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
10055   ins_cost(1.9 * INSN_COST);
10056   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
10057 
10058   ins_encode %{
10059     __ bic(as_Register($dst$$reg),
10060               as_Register($src1$$reg),
10061               as_Register($src2$$reg),
10062               Assembler::LSL,
10063               $src3$$constant & 0x3f);
10064   %}
10065 
10066   ins_pipe(ialu_reg_reg_shift);
10067 %}
10068 
10069 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
10070                          iRegIorL2I src1, iRegIorL2I src2,
10071                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10072   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
10073   ins_cost(1.9 * INSN_COST);
10074   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
10075 
10076   ins_encode %{
10077     __ eonw(as_Register($dst$$reg),
10078               as_Register($src1$$reg),
10079               as_Register($src2$$reg),
10080               Assembler::LSR,
10081               $src3$$constant & 0x1f);
10082   %}
10083 
10084   ins_pipe(ialu_reg_reg_shift);
10085 %}
10086 
10087 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
10088                          iRegL src1, iRegL src2,
10089                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10090   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
10091   ins_cost(1.9 * INSN_COST);
10092   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
10093 
10094   ins_encode %{
10095     __ eon(as_Register($dst$$reg),
10096               as_Register($src1$$reg),
10097               as_Register($src2$$reg),
10098               Assembler::LSR,
10099               $src3$$constant & 0x3f);
10100   %}
10101 
10102   ins_pipe(ialu_reg_reg_shift);
10103 %}
10104 
10105 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
10106                          iRegIorL2I src1, iRegIorL2I src2,
10107                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10108   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
10109   ins_cost(1.9 * INSN_COST);
10110   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
10111 
10112   ins_encode %{
10113     __ eonw(as_Register($dst$$reg),
10114               as_Register($src1$$reg),
10115               as_Register($src2$$reg),
10116               Assembler::ASR,
10117               $src3$$constant & 0x1f);
10118   %}
10119 
10120   ins_pipe(ialu_reg_reg_shift);
10121 %}
10122 
10123 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
10124                          iRegL src1, iRegL src2,
10125                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10126   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
10127   ins_cost(1.9 * INSN_COST);
10128   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
10129 
10130   ins_encode %{
10131     __ eon(as_Register($dst$$reg),
10132               as_Register($src1$$reg),
10133               as_Register($src2$$reg),
10134               Assembler::ASR,
10135               $src3$$constant & 0x3f);
10136   %}
10137 
10138   ins_pipe(ialu_reg_reg_shift);
10139 %}
10140 
10141 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
10142                          iRegIorL2I src1, iRegIorL2I src2,
10143                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10144   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
10145   ins_cost(1.9 * INSN_COST);
10146   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10147 
10148   ins_encode %{
10149     __ eonw(as_Register($dst$$reg),
10150               as_Register($src1$$reg),
10151               as_Register($src2$$reg),
10152               Assembler::LSL,
10153               $src3$$constant & 0x1f);
10154   %}
10155 
10156   ins_pipe(ialu_reg_reg_shift);
10157 %}
10158 
10159 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10160                          iRegL src1, iRegL src2,
10161                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10162   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10163   ins_cost(1.9 * INSN_COST);
10164   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10165 
10166   ins_encode %{
10167     __ eon(as_Register($dst$$reg),
10168               as_Register($src1$$reg),
10169               as_Register($src2$$reg),
10170               Assembler::LSL,
10171               $src3$$constant & 0x3f);
10172   %}
10173 
10174   ins_pipe(ialu_reg_reg_shift);
10175 %}
10176 
10177 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10178                          iRegIorL2I src1, iRegIorL2I src2,
10179                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10180   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10181   ins_cost(1.9 * INSN_COST);
10182   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10183 
10184   ins_encode %{
10185     __ ornw(as_Register($dst$$reg),
10186               as_Register($src1$$reg),
10187               as_Register($src2$$reg),
10188               Assembler::LSR,
10189               $src3$$constant & 0x1f);
10190   %}
10191 
10192   ins_pipe(ialu_reg_reg_shift);
10193 %}
10194 
10195 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10196                          iRegL src1, iRegL src2,
10197                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10198   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10199   ins_cost(1.9 * INSN_COST);
10200   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10201 
10202   ins_encode %{
10203     __ orn(as_Register($dst$$reg),
10204               as_Register($src1$$reg),
10205               as_Register($src2$$reg),
10206               Assembler::LSR,
10207               $src3$$constant & 0x3f);
10208   %}
10209 
10210   ins_pipe(ialu_reg_reg_shift);
10211 %}
10212 
10213 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10214                          iRegIorL2I src1, iRegIorL2I src2,
10215                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10216   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10217   ins_cost(1.9 * INSN_COST);
10218   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10219 
10220   ins_encode %{
10221     __ ornw(as_Register($dst$$reg),
10222               as_Register($src1$$reg),
10223               as_Register($src2$$reg),
10224               Assembler::ASR,
10225               $src3$$constant & 0x1f);
10226   %}
10227 
10228   ins_pipe(ialu_reg_reg_shift);
10229 %}
10230 
10231 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10232                          iRegL src1, iRegL src2,
10233                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10234   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10235   ins_cost(1.9 * INSN_COST);
10236   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10237 
10238   ins_encode %{
10239     __ orn(as_Register($dst$$reg),
10240               as_Register($src1$$reg),
10241               as_Register($src2$$reg),
10242               Assembler::ASR,
10243               $src3$$constant & 0x3f);
10244   %}
10245 
10246   ins_pipe(ialu_reg_reg_shift);
10247 %}
10248 
10249 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10250                          iRegIorL2I src1, iRegIorL2I src2,
10251                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10252   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10253   ins_cost(1.9 * INSN_COST);
10254   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10255 
10256   ins_encode %{
10257     __ ornw(as_Register($dst$$reg),
10258               as_Register($src1$$reg),
10259               as_Register($src2$$reg),
10260               Assembler::LSL,
10261               $src3$$constant & 0x1f);
10262   %}
10263 
10264   ins_pipe(ialu_reg_reg_shift);
10265 %}
10266 
10267 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10268                          iRegL src1, iRegL src2,
10269                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10270   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10271   ins_cost(1.9 * INSN_COST);
10272   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10273 
10274   ins_encode %{
10275     __ orn(as_Register($dst$$reg),
10276               as_Register($src1$$reg),
10277               as_Register($src2$$reg),
10278               Assembler::LSL,
10279               $src3$$constant & 0x3f);
10280   %}
10281 
10282   ins_pipe(ialu_reg_reg_shift);
10283 %}
10284 
10285 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10286                          iRegIorL2I src1, iRegIorL2I src2,
10287                          immI src3, rFlagsReg cr) %{
10288   match(Set dst (AndI src1 (URShiftI src2 src3)));
10289 
10290   ins_cost(1.9 * INSN_COST);
10291   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10292 
10293   ins_encode %{
10294     __ andw(as_Register($dst$$reg),
10295               as_Register($src1$$reg),
10296               as_Register($src2$$reg),
10297               Assembler::LSR,
10298               $src3$$constant & 0x1f);
10299   %}
10300 
10301   ins_pipe(ialu_reg_reg_shift);
10302 %}
10303 
10304 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10305                          iRegL src1, iRegL src2,
10306                          immI src3, rFlagsReg cr) %{
10307   match(Set dst (AndL src1 (URShiftL src2 src3)));
10308 
10309   ins_cost(1.9 * INSN_COST);
10310   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10311 
10312   ins_encode %{
10313     __ andr(as_Register($dst$$reg),
10314               as_Register($src1$$reg),
10315               as_Register($src2$$reg),
10316               Assembler::LSR,
10317               $src3$$constant & 0x3f);
10318   %}
10319 
10320   ins_pipe(ialu_reg_reg_shift);
10321 %}
10322 
10323 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10324                          iRegIorL2I src1, iRegIorL2I src2,
10325                          immI src3, rFlagsReg cr) %{
10326   match(Set dst (AndI src1 (RShiftI src2 src3)));
10327 
10328   ins_cost(1.9 * INSN_COST);
10329   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10330 
10331   ins_encode %{
10332     __ andw(as_Register($dst$$reg),
10333               as_Register($src1$$reg),
10334               as_Register($src2$$reg),
10335               Assembler::ASR,
10336               $src3$$constant & 0x1f);
10337   %}
10338 
10339   ins_pipe(ialu_reg_reg_shift);
10340 %}
10341 
10342 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10343                          iRegL src1, iRegL src2,
10344                          immI src3, rFlagsReg cr) %{
10345   match(Set dst (AndL src1 (RShiftL src2 src3)));
10346 
10347   ins_cost(1.9 * INSN_COST);
10348   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10349 
10350   ins_encode %{
10351     __ andr(as_Register($dst$$reg),
10352               as_Register($src1$$reg),
10353               as_Register($src2$$reg),
10354               Assembler::ASR,
10355               $src3$$constant & 0x3f);
10356   %}
10357 
10358   ins_pipe(ialu_reg_reg_shift);
10359 %}
10360 
10361 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10362                          iRegIorL2I src1, iRegIorL2I src2,
10363                          immI src3, rFlagsReg cr) %{
10364   match(Set dst (AndI src1 (LShiftI src2 src3)));
10365 
10366   ins_cost(1.9 * INSN_COST);
10367   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10368 
10369   ins_encode %{
10370     __ andw(as_Register($dst$$reg),
10371               as_Register($src1$$reg),
10372               as_Register($src2$$reg),
10373               Assembler::LSL,
10374               $src3$$constant & 0x1f);
10375   %}
10376 
10377   ins_pipe(ialu_reg_reg_shift);
10378 %}
10379 
10380 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10381                          iRegL src1, iRegL src2,
10382                          immI src3, rFlagsReg cr) %{
10383   match(Set dst (AndL src1 (LShiftL src2 src3)));
10384 
10385   ins_cost(1.9 * INSN_COST);
10386   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10387 
10388   ins_encode %{
10389     __ andr(as_Register($dst$$reg),
10390               as_Register($src1$$reg),
10391               as_Register($src2$$reg),
10392               Assembler::LSL,
10393               $src3$$constant & 0x3f);
10394   %}
10395 
10396   ins_pipe(ialu_reg_reg_shift);
10397 %}
10398 
10399 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10400                          iRegIorL2I src1, iRegIorL2I src2,
10401                          immI src3, rFlagsReg cr) %{
10402   match(Set dst (XorI src1 (URShiftI src2 src3)));
10403 
10404   ins_cost(1.9 * INSN_COST);
10405   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10406 
10407   ins_encode %{
10408     __ eorw(as_Register($dst$$reg),
10409               as_Register($src1$$reg),
10410               as_Register($src2$$reg),
10411               Assembler::LSR,
10412               $src3$$constant & 0x1f);
10413   %}
10414 
10415   ins_pipe(ialu_reg_reg_shift);
10416 %}
10417 
10418 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10419                          iRegL src1, iRegL src2,
10420                          immI src3, rFlagsReg cr) %{
10421   match(Set dst (XorL src1 (URShiftL src2 src3)));
10422 
10423   ins_cost(1.9 * INSN_COST);
10424   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10425 
10426   ins_encode %{
10427     __ eor(as_Register($dst$$reg),
10428               as_Register($src1$$reg),
10429               as_Register($src2$$reg),
10430               Assembler::LSR,
10431               $src3$$constant & 0x3f);
10432   %}
10433 
10434   ins_pipe(ialu_reg_reg_shift);
10435 %}
10436 
10437 instruct XorI_reg_RShift_reg(iRegINoSp dst,
10438                          iRegIorL2I src1, iRegIorL2I src2,
10439                          immI src3, rFlagsReg cr) %{
10440   match(Set dst (XorI src1 (RShiftI src2 src3)));
10441 
10442   ins_cost(1.9 * INSN_COST);
10443   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
10444 
10445   ins_encode %{
10446     __ eorw(as_Register($dst$$reg),
10447               as_Register($src1$$reg),
10448               as_Register($src2$$reg),
10449               Assembler::ASR,
10450               $src3$$constant & 0x1f);
10451   %}
10452 
10453   ins_pipe(ialu_reg_reg_shift);
10454 %}
10455 
10456 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
10457                          iRegL src1, iRegL src2,
10458                          immI src3, rFlagsReg cr) %{
10459   match(Set dst (XorL src1 (RShiftL src2 src3)));
10460 
10461   ins_cost(1.9 * INSN_COST);
10462   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
10463 
10464   ins_encode %{
10465     __ eor(as_Register($dst$$reg),
10466               as_Register($src1$$reg),
10467               as_Register($src2$$reg),
10468               Assembler::ASR,
10469               $src3$$constant & 0x3f);
10470   %}
10471 
10472   ins_pipe(ialu_reg_reg_shift);
10473 %}
10474 
10475 instruct XorI_reg_LShift_reg(iRegINoSp dst,
10476                          iRegIorL2I src1, iRegIorL2I src2,
10477                          immI src3, rFlagsReg cr) %{
10478   match(Set dst (XorI src1 (LShiftI src2 src3)));
10479 
10480   ins_cost(1.9 * INSN_COST);
10481   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
10482 
10483   ins_encode %{
10484     __ eorw(as_Register($dst$$reg),
10485               as_Register($src1$$reg),
10486               as_Register($src2$$reg),
10487               Assembler::LSL,
10488               $src3$$constant & 0x1f);
10489   %}
10490 
10491   ins_pipe(ialu_reg_reg_shift);
10492 %}
10493 
10494 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
10495                          iRegL src1, iRegL src2,
10496                          immI src3, rFlagsReg cr) %{
10497   match(Set dst (XorL src1 (LShiftL src2 src3)));
10498 
10499   ins_cost(1.9 * INSN_COST);
10500   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
10501 
10502   ins_encode %{
10503     __ eor(as_Register($dst$$reg),
10504               as_Register($src1$$reg),
10505               as_Register($src2$$reg),
10506               Assembler::LSL,
10507               $src3$$constant & 0x3f);
10508   %}
10509 
10510   ins_pipe(ialu_reg_reg_shift);
10511 %}
10512 
10513 instruct OrI_reg_URShift_reg(iRegINoSp dst,
10514                          iRegIorL2I src1, iRegIorL2I src2,
10515                          immI src3, rFlagsReg cr) %{
10516   match(Set dst (OrI src1 (URShiftI src2 src3)));
10517 
10518   ins_cost(1.9 * INSN_COST);
10519   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
10520 
10521   ins_encode %{
10522     __ orrw(as_Register($dst$$reg),
10523               as_Register($src1$$reg),
10524               as_Register($src2$$reg),
10525               Assembler::LSR,
10526               $src3$$constant & 0x1f);
10527   %}
10528 
10529   ins_pipe(ialu_reg_reg_shift);
10530 %}
10531 
10532 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
10533                          iRegL src1, iRegL src2,
10534                          immI src3, rFlagsReg cr) %{
10535   match(Set dst (OrL src1 (URShiftL src2 src3)));
10536 
10537   ins_cost(1.9 * INSN_COST);
10538   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
10539 
10540   ins_encode %{
10541     __ orr(as_Register($dst$$reg),
10542               as_Register($src1$$reg),
10543               as_Register($src2$$reg),
10544               Assembler::LSR,
10545               $src3$$constant & 0x3f);
10546   %}
10547 
10548   ins_pipe(ialu_reg_reg_shift);
10549 %}
10550 
10551 instruct OrI_reg_RShift_reg(iRegINoSp dst,
10552                          iRegIorL2I src1, iRegIorL2I src2,
10553                          immI src3, rFlagsReg cr) %{
10554   match(Set dst (OrI src1 (RShiftI src2 src3)));
10555 
10556   ins_cost(1.9 * INSN_COST);
10557   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
10558 
10559   ins_encode %{
10560     __ orrw(as_Register($dst$$reg),
10561               as_Register($src1$$reg),
10562               as_Register($src2$$reg),
10563               Assembler::ASR,
10564               $src3$$constant & 0x1f);
10565   %}
10566 
10567   ins_pipe(ialu_reg_reg_shift);
10568 %}
10569 
10570 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
10571                          iRegL src1, iRegL src2,
10572                          immI src3, rFlagsReg cr) %{
10573   match(Set dst (OrL src1 (RShiftL src2 src3)));
10574 
10575   ins_cost(1.9 * INSN_COST);
10576   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
10577 
10578   ins_encode %{
10579     __ orr(as_Register($dst$$reg),
10580               as_Register($src1$$reg),
10581               as_Register($src2$$reg),
10582               Assembler::ASR,
10583               $src3$$constant & 0x3f);
10584   %}
10585 
10586   ins_pipe(ialu_reg_reg_shift);
10587 %}
10588 
10589 instruct OrI_reg_LShift_reg(iRegINoSp dst,
10590                          iRegIorL2I src1, iRegIorL2I src2,
10591                          immI src3, rFlagsReg cr) %{
10592   match(Set dst (OrI src1 (LShiftI src2 src3)));
10593 
10594   ins_cost(1.9 * INSN_COST);
10595   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
10596 
10597   ins_encode %{
10598     __ orrw(as_Register($dst$$reg),
10599               as_Register($src1$$reg),
10600               as_Register($src2$$reg),
10601               Assembler::LSL,
10602               $src3$$constant & 0x1f);
10603   %}
10604 
10605   ins_pipe(ialu_reg_reg_shift);
10606 %}
10607 
10608 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
10609                          iRegL src1, iRegL src2,
10610                          immI src3, rFlagsReg cr) %{
10611   match(Set dst (OrL src1 (LShiftL src2 src3)));
10612 
10613   ins_cost(1.9 * INSN_COST);
10614   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
10615 
10616   ins_encode %{
10617     __ orr(as_Register($dst$$reg),
10618               as_Register($src1$$reg),
10619               as_Register($src2$$reg),
10620               Assembler::LSL,
10621               $src3$$constant & 0x3f);
10622   %}
10623 
10624   ins_pipe(ialu_reg_reg_shift);
10625 %}
10626 
10627 instruct AddI_reg_URShift_reg(iRegINoSp dst,
10628                          iRegIorL2I src1, iRegIorL2I src2,
10629                          immI src3, rFlagsReg cr) %{
10630   match(Set dst (AddI src1 (URShiftI src2 src3)));
10631 
10632   ins_cost(1.9 * INSN_COST);
10633   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
10634 
10635   ins_encode %{
10636     __ addw(as_Register($dst$$reg),
10637               as_Register($src1$$reg),
10638               as_Register($src2$$reg),
10639               Assembler::LSR,
10640               $src3$$constant & 0x1f);
10641   %}
10642 
10643   ins_pipe(ialu_reg_reg_shift);
10644 %}
10645 
10646 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
10647                          iRegL src1, iRegL src2,
10648                          immI src3, rFlagsReg cr) %{
10649   match(Set dst (AddL src1 (URShiftL src2 src3)));
10650 
10651   ins_cost(1.9 * INSN_COST);
10652   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
10653 
10654   ins_encode %{
10655     __ add(as_Register($dst$$reg),
10656               as_Register($src1$$reg),
10657               as_Register($src2$$reg),
10658               Assembler::LSR,
10659               $src3$$constant & 0x3f);
10660   %}
10661 
10662   ins_pipe(ialu_reg_reg_shift);
10663 %}
10664 
10665 instruct AddI_reg_RShift_reg(iRegINoSp dst,
10666                          iRegIorL2I src1, iRegIorL2I src2,
10667                          immI src3, rFlagsReg cr) %{
10668   match(Set dst (AddI src1 (RShiftI src2 src3)));
10669 
10670   ins_cost(1.9 * INSN_COST);
10671   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
10672 
10673   ins_encode %{
10674     __ addw(as_Register($dst$$reg),
10675               as_Register($src1$$reg),
10676               as_Register($src2$$reg),
10677               Assembler::ASR,
10678               $src3$$constant & 0x1f);
10679   %}
10680 
10681   ins_pipe(ialu_reg_reg_shift);
10682 %}
10683 
10684 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
10685                          iRegL src1, iRegL src2,
10686                          immI src3, rFlagsReg cr) %{
10687   match(Set dst (AddL src1 (RShiftL src2 src3)));
10688 
10689   ins_cost(1.9 * INSN_COST);
10690   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
10691 
10692   ins_encode %{
10693     __ add(as_Register($dst$$reg),
10694               as_Register($src1$$reg),
10695               as_Register($src2$$reg),
10696               Assembler::ASR,
10697               $src3$$constant & 0x3f);
10698   %}
10699 
10700   ins_pipe(ialu_reg_reg_shift);
10701 %}
10702 
10703 instruct AddI_reg_LShift_reg(iRegINoSp dst,
10704                          iRegIorL2I src1, iRegIorL2I src2,
10705                          immI src3, rFlagsReg cr) %{
10706   match(Set dst (AddI src1 (LShiftI src2 src3)));
10707 
10708   ins_cost(1.9 * INSN_COST);
10709   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
10710 
10711   ins_encode %{
10712     __ addw(as_Register($dst$$reg),
10713               as_Register($src1$$reg),
10714               as_Register($src2$$reg),
10715               Assembler::LSL,
10716               $src3$$constant & 0x1f);
10717   %}
10718 
10719   ins_pipe(ialu_reg_reg_shift);
10720 %}
10721 
10722 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
10723                          iRegL src1, iRegL src2,
10724                          immI src3, rFlagsReg cr) %{
10725   match(Set dst (AddL src1 (LShiftL src2 src3)));
10726 
10727   ins_cost(1.9 * INSN_COST);
10728   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
10729 
10730   ins_encode %{
10731     __ add(as_Register($dst$$reg),
10732               as_Register($src1$$reg),
10733               as_Register($src2$$reg),
10734               Assembler::LSL,
10735               $src3$$constant & 0x3f);
10736   %}
10737 
10738   ins_pipe(ialu_reg_reg_shift);
10739 %}
10740 
10741 instruct SubI_reg_URShift_reg(iRegINoSp dst,
10742                          iRegIorL2I src1, iRegIorL2I src2,
10743                          immI src3, rFlagsReg cr) %{
10744   match(Set dst (SubI src1 (URShiftI src2 src3)));
10745 
10746   ins_cost(1.9 * INSN_COST);
10747   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
10748 
10749   ins_encode %{
10750     __ subw(as_Register($dst$$reg),
10751               as_Register($src1$$reg),
10752               as_Register($src2$$reg),
10753               Assembler::LSR,
10754               $src3$$constant & 0x1f);
10755   %}
10756 
10757   ins_pipe(ialu_reg_reg_shift);
10758 %}
10759 
10760 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
10761                          iRegL src1, iRegL src2,
10762                          immI src3, rFlagsReg cr) %{
10763   match(Set dst (SubL src1 (URShiftL src2 src3)));
10764 
10765   ins_cost(1.9 * INSN_COST);
10766   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
10767 
10768   ins_encode %{
10769     __ sub(as_Register($dst$$reg),
10770               as_Register($src1$$reg),
10771               as_Register($src2$$reg),
10772               Assembler::LSR,
10773               $src3$$constant & 0x3f);
10774   %}
10775 
10776   ins_pipe(ialu_reg_reg_shift);
10777 %}
10778 
10779 instruct SubI_reg_RShift_reg(iRegINoSp dst,
10780                          iRegIorL2I src1, iRegIorL2I src2,
10781                          immI src3, rFlagsReg cr) %{
10782   match(Set dst (SubI src1 (RShiftI src2 src3)));
10783 
10784   ins_cost(1.9 * INSN_COST);
10785   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
10786 
10787   ins_encode %{
10788     __ subw(as_Register($dst$$reg),
10789               as_Register($src1$$reg),
10790               as_Register($src2$$reg),
10791               Assembler::ASR,
10792               $src3$$constant & 0x1f);
10793   %}
10794 
10795   ins_pipe(ialu_reg_reg_shift);
10796 %}
10797 
10798 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
10799                          iRegL src1, iRegL src2,
10800                          immI src3, rFlagsReg cr) %{
10801   match(Set dst (SubL src1 (RShiftL src2 src3)));
10802 
10803   ins_cost(1.9 * INSN_COST);
10804   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
10805 
10806   ins_encode %{
10807     __ sub(as_Register($dst$$reg),
10808               as_Register($src1$$reg),
10809               as_Register($src2$$reg),
10810               Assembler::ASR,
10811               $src3$$constant & 0x3f);
10812   %}
10813 
10814   ins_pipe(ialu_reg_reg_shift);
10815 %}
10816 
10817 instruct SubI_reg_LShift_reg(iRegINoSp dst,
10818                          iRegIorL2I src1, iRegIorL2I src2,
10819                          immI src3, rFlagsReg cr) %{
10820   match(Set dst (SubI src1 (LShiftI src2 src3)));
10821 
10822   ins_cost(1.9 * INSN_COST);
10823   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
10824 
10825   ins_encode %{
10826     __ subw(as_Register($dst$$reg),
10827               as_Register($src1$$reg),
10828               as_Register($src2$$reg),
10829               Assembler::LSL,
10830               $src3$$constant & 0x1f);
10831   %}
10832 
10833   ins_pipe(ialu_reg_reg_shift);
10834 %}
10835 
10836 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
10837                          iRegL src1, iRegL src2,
10838                          immI src3, rFlagsReg cr) %{
10839   match(Set dst (SubL src1 (LShiftL src2 src3)));
10840 
10841   ins_cost(1.9 * INSN_COST);
10842   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
10843 
10844   ins_encode %{
10845     __ sub(as_Register($dst$$reg),
10846               as_Register($src1$$reg),
10847               as_Register($src2$$reg),
10848               Assembler::LSL,
10849               $src3$$constant & 0x3f);
10850   %}
10851 
10852   ins_pipe(ialu_reg_reg_shift);
10853 %}
10854 
10855 
10856 
10857 // Shift Left followed by Shift Right.
10858 // This idiom is used by the compiler for the i2b bytecode etc.
10859 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10860 %{
10861   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
10862   // Make sure we are not going to exceed what sbfm can do.
10863   predicate((unsigned int)n->in(2)->get_int() <= 63
10864             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10865 
10866   ins_cost(INSN_COST * 2);
10867   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10868   ins_encode %{
10869     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10870     int s = 63 - lshift;
10871     int r = (rshift - lshift) & 63;
10872     __ sbfm(as_Register($dst$$reg),
10873             as_Register($src$$reg),
10874             r, s);
10875   %}
10876 
10877   ins_pipe(ialu_reg_shift);
10878 %}
10879 
10880 // Shift Left followed by Shift Right.
10881 // This idiom is used by the compiler for the i2b bytecode etc.
10882 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10883 %{
10884   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
10885   // Make sure we are not going to exceed what sbfmw can do.
10886   predicate((unsigned int)n->in(2)->get_int() <= 31
10887             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10888 
10889   ins_cost(INSN_COST * 2);
10890   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10891   ins_encode %{
10892     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10893     int s = 31 - lshift;
10894     int r = (rshift - lshift) & 31;
10895     __ sbfmw(as_Register($dst$$reg),
10896             as_Register($src$$reg),
10897             r, s);
10898   %}
10899 
10900   ins_pipe(ialu_reg_shift);
10901 %}
10902 
10903 // Shift Left followed by Shift Right.
10904 // This idiom is used by the compiler for the i2b bytecode etc.
10905 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10906 %{
10907   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
10908   // Make sure we are not going to exceed what ubfm can do.
10909   predicate((unsigned int)n->in(2)->get_int() <= 63
10910             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10911 
10912   ins_cost(INSN_COST * 2);
10913   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10914   ins_encode %{
10915     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10916     int s = 63 - lshift;
10917     int r = (rshift - lshift) & 63;
10918     __ ubfm(as_Register($dst$$reg),
10919             as_Register($src$$reg),
10920             r, s);
10921   %}
10922 
10923   ins_pipe(ialu_reg_shift);
10924 %}
10925 
10926 // Shift Left followed by Shift Right.
10927 // This idiom is used by the compiler for the i2b bytecode etc.
10928 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10929 %{
10930   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
10931   // Make sure we are not going to exceed what ubfmw can do.
10932   predicate((unsigned int)n->in(2)->get_int() <= 31
10933             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10934 
10935   ins_cost(INSN_COST * 2);
10936   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10937   ins_encode %{
10938     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10939     int s = 31 - lshift;
10940     int r = (rshift - lshift) & 31;
10941     __ ubfmw(as_Register($dst$$reg),
10942             as_Register($src$$reg),
10943             r, s);
10944   %}
10945 
10946   ins_pipe(ialu_reg_shift);
10947 %}
10948 // Bitfield extract with shift & mask
10949 
10950 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10951 %{
10952   match(Set dst (AndI (URShiftI src rshift) mask));
10953 
10954   ins_cost(INSN_COST);
10955   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
10956   ins_encode %{
10957     int rshift = $rshift$$constant;
10958     long mask = $mask$$constant;
10959     int width = exact_log2(mask+1);
10960     __ ubfxw(as_Register($dst$$reg),
10961             as_Register($src$$reg), rshift, width);
10962   %}
10963   ins_pipe(ialu_reg_shift);
10964 %}
10965 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
10966 %{
10967   match(Set dst (AndL (URShiftL src rshift) mask));
10968 
10969   ins_cost(INSN_COST);
10970   format %{ "ubfx $dst, $src, $rshift, $mask" %}
10971   ins_encode %{
10972     int rshift = $rshift$$constant;
10973     long mask = $mask$$constant;
10974     int width = exact_log2(mask+1);
10975     __ ubfx(as_Register($dst$$reg),
10976             as_Register($src$$reg), rshift, width);
10977   %}
10978   ins_pipe(ialu_reg_shift);
10979 %}
10980 
10981 // We can use ubfx when extending an And with a mask when we know mask
10982 // is positive.  We know that because immI_bitmask guarantees it.
10983 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10984 %{
10985   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
10986 
10987   ins_cost(INSN_COST * 2);
10988   format %{ "ubfx $dst, $src, $rshift, $mask" %}
10989   ins_encode %{
10990     int rshift = $rshift$$constant;
10991     long mask = $mask$$constant;
10992     int width = exact_log2(mask+1);
10993     __ ubfx(as_Register($dst$$reg),
10994             as_Register($src$$reg), rshift, width);
10995   %}
10996   ins_pipe(ialu_reg_shift);
10997 %}
10998 
10999 // We can use ubfiz when masking by a positive number and then left shifting the result.
11000 // We know that the mask is positive because immI_bitmask guarantees it.
11001 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
11002 %{
11003   match(Set dst (LShiftI (AndI src mask) lshift));
11004   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
11005     (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
11006 
11007   ins_cost(INSN_COST);
11008   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
11009   ins_encode %{
11010     int lshift = $lshift$$constant;
11011     long mask = $mask$$constant;
11012     int width = exact_log2(mask+1);
11013     __ ubfizw(as_Register($dst$$reg),
11014           as_Register($src$$reg), lshift, width);
11015   %}
11016   ins_pipe(ialu_reg_shift);
11017 %}
11018 // We can use ubfiz when masking by a positive number and then left shifting the result.
11019 // We know that the mask is positive because immL_bitmask guarantees it.
11020 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
11021 %{
11022   match(Set dst (LShiftL (AndL src mask) lshift));
11023   predicate((unsigned int)n->in(2)->get_int() <= 63 &&
11024     (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
11025 
11026   ins_cost(INSN_COST);
11027   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
11028   ins_encode %{
11029     int lshift = $lshift$$constant;
11030     long mask = $mask$$constant;
11031     int width = exact_log2(mask+1);
11032     __ ubfiz(as_Register($dst$$reg),
11033           as_Register($src$$reg), lshift, width);
11034   %}
11035   ins_pipe(ialu_reg_shift);
11036 %}
11037 
11038 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
11039 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
11040 %{
11041   match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
11042   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
11043     (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
11044 
11045   ins_cost(INSN_COST);
11046   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
11047   ins_encode %{
11048     int lshift = $lshift$$constant;
11049     long mask = $mask$$constant;
11050     int width = exact_log2(mask+1);
11051     __ ubfiz(as_Register($dst$$reg),
11052              as_Register($src$$reg), lshift, width);
11053   %}
11054   ins_pipe(ialu_reg_shift);
11055 %}
11056 
11057 // Rotations
11058 
11059 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11060 %{
11061   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11062   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11063 
11064   ins_cost(INSN_COST);
11065   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11066 
11067   ins_encode %{
11068     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11069             $rshift$$constant & 63);
11070   %}
11071   ins_pipe(ialu_reg_reg_extr);
11072 %}
11073 
11074 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11075 %{
11076   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11077   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11078 
11079   ins_cost(INSN_COST);
11080   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11081 
11082   ins_encode %{
11083     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11084             $rshift$$constant & 31);
11085   %}
11086   ins_pipe(ialu_reg_reg_extr);
11087 %}
11088 
11089 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11090 %{
11091   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11092   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11093 
11094   ins_cost(INSN_COST);
11095   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11096 
11097   ins_encode %{
11098     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11099             $rshift$$constant & 63);
11100   %}
11101   ins_pipe(ialu_reg_reg_extr);
11102 %}
11103 
11104 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11105 %{
11106   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11107   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11108 
11109   ins_cost(INSN_COST);
11110   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11111 
11112   ins_encode %{
11113     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11114             $rshift$$constant & 31);
11115   %}
11116   ins_pipe(ialu_reg_reg_extr);
11117 %}
11118 
11119 
11120 // rol expander
11121 
11122 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11123 %{
11124   effect(DEF dst, USE src, USE shift);
11125 
11126   format %{ "rol    $dst, $src, $shift" %}
11127   ins_cost(INSN_COST * 3);
11128   ins_encode %{
11129     __ subw(rscratch1, zr, as_Register($shift$$reg));
11130     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11131             rscratch1);
11132     %}
11133   ins_pipe(ialu_reg_reg_vshift);
11134 %}
11135 
11136 // rol expander
11137 
11138 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11139 %{
11140   effect(DEF dst, USE src, USE shift);
11141 
11142   format %{ "rol    $dst, $src, $shift" %}
11143   ins_cost(INSN_COST * 3);
11144   ins_encode %{
11145     __ subw(rscratch1, zr, as_Register($shift$$reg));
11146     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11147             rscratch1);
11148     %}
11149   ins_pipe(ialu_reg_reg_vshift);
11150 %}
11151 
11152 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11153 %{
11154   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
11155 
11156   expand %{
11157     rolL_rReg(dst, src, shift, cr);
11158   %}
11159 %}
11160 
11161 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11162 %{
11163   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
11164 
11165   expand %{
11166     rolL_rReg(dst, src, shift, cr);
11167   %}
11168 %}
11169 
11170 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11171 %{
11172   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
11173 
11174   expand %{
11175     rolI_rReg(dst, src, shift, cr);
11176   %}
11177 %}
11178 
11179 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11180 %{
11181   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
11182 
11183   expand %{
11184     rolI_rReg(dst, src, shift, cr);
11185   %}
11186 %}
11187 
11188 // ror expander
11189 
11190 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11191 %{
11192   effect(DEF dst, USE src, USE shift);
11193 
11194   format %{ "ror    $dst, $src, $shift" %}
11195   ins_cost(INSN_COST);
11196   ins_encode %{
11197     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11198             as_Register($shift$$reg));
11199     %}
11200   ins_pipe(ialu_reg_reg_vshift);
11201 %}
11202 
11203 // ror expander
11204 
11205 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11206 %{
11207   effect(DEF dst, USE src, USE shift);
11208 
11209   format %{ "ror    $dst, $src, $shift" %}
11210   ins_cost(INSN_COST);
11211   ins_encode %{
11212     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11213             as_Register($shift$$reg));
11214     %}
11215   ins_pipe(ialu_reg_reg_vshift);
11216 %}
11217 
11218 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11219 %{
11220   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11221 
11222   expand %{
11223     rorL_rReg(dst, src, shift, cr);
11224   %}
11225 %}
11226 
11227 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11228 %{
11229   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11230 
11231   expand %{
11232     rorL_rReg(dst, src, shift, cr);
11233   %}
11234 %}
11235 
11236 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11237 %{
11238   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11239 
11240   expand %{
11241     rorI_rReg(dst, src, shift, cr);
11242   %}
11243 %}
11244 
11245 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11246 %{
11247   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11248 
11249   expand %{
11250     rorI_rReg(dst, src, shift, cr);
11251   %}
11252 %}
11253 
11254 // Add/subtract (extended)
11255 
11256 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11257 %{
11258   match(Set dst (AddL src1 (ConvI2L src2)));
11259   ins_cost(INSN_COST);
11260   format %{ "add  $dst, $src1, $src2, sxtw" %}
11261 
11262    ins_encode %{
11263      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11264             as_Register($src2$$reg), ext::sxtw);
11265    %}
11266   ins_pipe(ialu_reg_reg);
11267 %};
11268 
11269 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11270 %{
11271   match(Set dst (SubL src1 (ConvI2L src2)));
11272   ins_cost(INSN_COST);
11273   format %{ "sub  $dst, $src1, $src2, sxtw" %}
11274 
11275    ins_encode %{
11276      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11277             as_Register($src2$$reg), ext::sxtw);
11278    %}
11279   ins_pipe(ialu_reg_reg);
11280 %};
11281 
11282 
11283 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11284 %{
11285   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11286   ins_cost(INSN_COST);
11287   format %{ "add  $dst, $src1, $src2, sxth" %}
11288 
11289    ins_encode %{
11290      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11291             as_Register($src2$$reg), ext::sxth);
11292    %}
11293   ins_pipe(ialu_reg_reg);
11294 %}
11295 
11296 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11297 %{
11298   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11299   ins_cost(INSN_COST);
11300   format %{ "add  $dst, $src1, $src2, sxtb" %}
11301 
11302    ins_encode %{
11303      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11304             as_Register($src2$$reg), ext::sxtb);
11305    %}
11306   ins_pipe(ialu_reg_reg);
11307 %}
11308 
11309 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11310 %{
11311   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11312   ins_cost(INSN_COST);
11313   format %{ "add  $dst, $src1, $src2, uxtb" %}
11314 
11315    ins_encode %{
11316      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11317             as_Register($src2$$reg), ext::uxtb);
11318    %}
11319   ins_pipe(ialu_reg_reg);
11320 %}
11321 
11322 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11323 %{
11324   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11325   ins_cost(INSN_COST);
11326   format %{ "add  $dst, $src1, $src2, sxth" %}
11327 
11328    ins_encode %{
11329      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11330             as_Register($src2$$reg), ext::sxth);
11331    %}
11332   ins_pipe(ialu_reg_reg);
11333 %}
11334 
11335 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11336 %{
11337   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11338   ins_cost(INSN_COST);
11339   format %{ "add  $dst, $src1, $src2, sxtw" %}
11340 
11341    ins_encode %{
11342      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11343             as_Register($src2$$reg), ext::sxtw);
11344    %}
11345   ins_pipe(ialu_reg_reg);
11346 %}
11347 
11348 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11349 %{
11350   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11351   ins_cost(INSN_COST);
11352   format %{ "add  $dst, $src1, $src2, sxtb" %}
11353 
11354    ins_encode %{
11355      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11356             as_Register($src2$$reg), ext::sxtb);
11357    %}
11358   ins_pipe(ialu_reg_reg);
11359 %}
11360 
11361 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11362 %{
11363   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11364   ins_cost(INSN_COST);
11365   format %{ "add  $dst, $src1, $src2, uxtb" %}
11366 
11367    ins_encode %{
11368      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11369             as_Register($src2$$reg), ext::uxtb);
11370    %}
11371   ins_pipe(ialu_reg_reg);
11372 %}
11373 
11374 
11375 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11376 %{
11377   match(Set dst (AddI src1 (AndI src2 mask)));
11378   ins_cost(INSN_COST);
11379   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11380 
11381    ins_encode %{
11382      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11383             as_Register($src2$$reg), ext::uxtb);
11384    %}
11385   ins_pipe(ialu_reg_reg);
11386 %}
11387 
11388 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11389 %{
11390   match(Set dst (AddI src1 (AndI src2 mask)));
11391   ins_cost(INSN_COST);
11392   format %{ "addw  $dst, $src1, $src2, uxth" %}
11393 
11394    ins_encode %{
11395      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11396             as_Register($src2$$reg), ext::uxth);
11397    %}
11398   ins_pipe(ialu_reg_reg);
11399 %}
11400 
11401 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11402 %{
11403   match(Set dst (AddL src1 (AndL src2 mask)));
11404   ins_cost(INSN_COST);
11405   format %{ "add  $dst, $src1, $src2, uxtb" %}
11406 
11407    ins_encode %{
11408      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11409             as_Register($src2$$reg), ext::uxtb);
11410    %}
11411   ins_pipe(ialu_reg_reg);
11412 %}
11413 
11414 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11415 %{
11416   match(Set dst (AddL src1 (AndL src2 mask)));
11417   ins_cost(INSN_COST);
11418   format %{ "add  $dst, $src1, $src2, uxth" %}
11419 
11420    ins_encode %{
11421      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11422             as_Register($src2$$reg), ext::uxth);
11423    %}
11424   ins_pipe(ialu_reg_reg);
11425 %}
11426 
11427 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11428 %{
11429   match(Set dst (AddL src1 (AndL src2 mask)));
11430   ins_cost(INSN_COST);
11431   format %{ "add  $dst, $src1, $src2, uxtw" %}
11432 
11433    ins_encode %{
11434      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11435             as_Register($src2$$reg), ext::uxtw);
11436    %}
11437   ins_pipe(ialu_reg_reg);
11438 %}
11439 
11440 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11441 %{
11442   match(Set dst (SubI src1 (AndI src2 mask)));
11443   ins_cost(INSN_COST);
11444   format %{ "subw  $dst, $src1, $src2, uxtb" %}
11445 
11446    ins_encode %{
11447      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11448             as_Register($src2$$reg), ext::uxtb);
11449    %}
11450   ins_pipe(ialu_reg_reg);
11451 %}
11452 
11453 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11454 %{
11455   match(Set dst (SubI src1 (AndI src2 mask)));
11456   ins_cost(INSN_COST);
11457   format %{ "subw  $dst, $src1, $src2, uxth" %}
11458 
11459    ins_encode %{
11460      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11461             as_Register($src2$$reg), ext::uxth);
11462    %}
11463   ins_pipe(ialu_reg_reg);
11464 %}
11465 
11466 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11467 %{
11468   match(Set dst (SubL src1 (AndL src2 mask)));
11469   ins_cost(INSN_COST);
11470   format %{ "sub  $dst, $src1, $src2, uxtb" %}
11471 
11472    ins_encode %{
11473      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11474             as_Register($src2$$reg), ext::uxtb);
11475    %}
11476   ins_pipe(ialu_reg_reg);
11477 %}
11478 
11479 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11480 %{
11481   match(Set dst (SubL src1 (AndL src2 mask)));
11482   ins_cost(INSN_COST);
11483   format %{ "sub  $dst, $src1, $src2, uxth" %}
11484 
11485    ins_encode %{
11486      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11487             as_Register($src2$$reg), ext::uxth);
11488    %}
11489   ins_pipe(ialu_reg_reg);
11490 %}
11491 
11492 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11493 %{
11494   match(Set dst (SubL src1 (AndL src2 mask)));
11495   ins_cost(INSN_COST);
11496   format %{ "sub  $dst, $src1, $src2, uxtw" %}
11497 
11498    ins_encode %{
11499      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11500             as_Register($src2$$reg), ext::uxtw);
11501    %}
11502   ins_pipe(ialu_reg_reg);
11503 %}
11504 
11505 
11506 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
11507 %{
11508   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11509   ins_cost(1.9 * INSN_COST);
11510   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
11511 
11512    ins_encode %{
11513      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11514             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11515    %}
11516   ins_pipe(ialu_reg_reg_shift);
11517 %}
11518 
11519 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
11520 %{
11521   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11522   ins_cost(1.9 * INSN_COST);
11523   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
11524 
11525    ins_encode %{
11526      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11527             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
11528    %}
11529   ins_pipe(ialu_reg_reg_shift);
11530 %}
11531 
11532 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
11533 %{
11534   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11535   ins_cost(1.9 * INSN_COST);
11536   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
11537 
11538    ins_encode %{
11539      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11540             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
11541    %}
11542   ins_pipe(ialu_reg_reg_shift);
11543 %}
11544 
11545 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
11546 %{
11547   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11548   ins_cost(1.9 * INSN_COST);
11549   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
11550 
11551    ins_encode %{
11552      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11553             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11554    %}
11555   ins_pipe(ialu_reg_reg_shift);
11556 %}
11557 
11558 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
11559 %{
11560   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11561   ins_cost(1.9 * INSN_COST);
11562   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
11563 
11564    ins_encode %{
11565      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11566             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
11567    %}
11568   ins_pipe(ialu_reg_reg_shift);
11569 %}
11570 
11571 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
11572 %{
11573   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11574   ins_cost(1.9 * INSN_COST);
11575   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
11576 
11577    ins_encode %{
11578      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11579             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
11580    %}
11581   ins_pipe(ialu_reg_reg_shift);
11582 %}
11583 
11584 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
11585 %{
11586   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
11587   ins_cost(1.9 * INSN_COST);
11588   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
11589 
11590    ins_encode %{
11591      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11592             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11593    %}
11594   ins_pipe(ialu_reg_reg_shift);
11595 %}
11596 
11597 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
11598 %{
11599   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
11600   ins_cost(1.9 * INSN_COST);
11601   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
11602 
11603    ins_encode %{
11604      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11605             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
11606    %}
11607   ins_pipe(ialu_reg_reg_shift);
11608 %}
11609 
11610 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
11611 %{
11612   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
11613   ins_cost(1.9 * INSN_COST);
11614   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
11615 
11616    ins_encode %{
11617      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11618             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11619    %}
11620   ins_pipe(ialu_reg_reg_shift);
11621 %}
11622 
11623 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
11624 %{
11625   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
11626   ins_cost(1.9 * INSN_COST);
11627   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
11628 
11629    ins_encode %{
11630      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11631             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
11632    %}
11633   ins_pipe(ialu_reg_reg_shift);
11634 %}
11635 
11636 
11637 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
11638 %{
11639   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
11640   ins_cost(1.9 * INSN_COST);
11641   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
11642 
11643    ins_encode %{
11644      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11645             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
11646    %}
11647   ins_pipe(ialu_reg_reg_shift);
11648 %};
11649 
11650 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
11651 %{
11652   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
11653   ins_cost(1.9 * INSN_COST);
11654   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
11655 
11656    ins_encode %{
11657      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11658             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
11659    %}
11660   ins_pipe(ialu_reg_reg_shift);
11661 %};
11662 
11663 
11664 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
11665 %{
11666   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
11667   ins_cost(1.9 * INSN_COST);
11668   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
11669 
11670    ins_encode %{
11671      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11672             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
11673    %}
11674   ins_pipe(ialu_reg_reg_shift);
11675 %}
11676 
11677 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
11678 %{
11679   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
11680   ins_cost(1.9 * INSN_COST);
11681   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
11682 
11683    ins_encode %{
11684      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11685             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
11686    %}
11687   ins_pipe(ialu_reg_reg_shift);
11688 %}
11689 
11690 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
11691 %{
11692   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
11693   ins_cost(1.9 * INSN_COST);
11694   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
11695 
11696    ins_encode %{
11697      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11698             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
11699    %}
11700   ins_pipe(ialu_reg_reg_shift);
11701 %}
11702 
11703 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
11704 %{
11705   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
11706   ins_cost(1.9 * INSN_COST);
11707   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
11708 
11709    ins_encode %{
11710      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11711             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
11712    %}
11713   ins_pipe(ialu_reg_reg_shift);
11714 %}
11715 
11716 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
11717 %{
11718   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
11719   ins_cost(1.9 * INSN_COST);
11720   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
11721 
11722    ins_encode %{
11723      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11724             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
11725    %}
11726   ins_pipe(ialu_reg_reg_shift);
11727 %}
11728 
11729 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
11730 %{
11731   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
11732   ins_cost(1.9 * INSN_COST);
11733   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
11734 
11735    ins_encode %{
11736      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11737             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
11738    %}
11739   ins_pipe(ialu_reg_reg_shift);
11740 %}
11741 
11742 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
11743 %{
11744   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
11745   ins_cost(1.9 * INSN_COST);
11746   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
11747 
11748    ins_encode %{
11749      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11750             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
11751    %}
11752   ins_pipe(ialu_reg_reg_shift);
11753 %}
11754 
11755 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
11756 %{
11757   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
11758   ins_cost(1.9 * INSN_COST);
11759   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
11760 
11761    ins_encode %{
11762      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11763             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
11764    %}
11765   ins_pipe(ialu_reg_reg_shift);
11766 %}
11767 
11768 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
11769 %{
11770   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
11771   ins_cost(1.9 * INSN_COST);
11772   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
11773 
11774    ins_encode %{
11775      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11776             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
11777    %}
11778   ins_pipe(ialu_reg_reg_shift);
11779 %}
11780 
11781 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
11782 %{
11783   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
11784   ins_cost(1.9 * INSN_COST);
11785   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
11786 
11787    ins_encode %{
11788      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11789             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
11790    %}
11791   ins_pipe(ialu_reg_reg_shift);
11792 %}
11793 // END This section of the file is automatically generated. Do not edit --------------
11794 
11795 // ============================================================================
11796 // Floating Point Arithmetic Instructions
11797 
11798 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11799   match(Set dst (AddF src1 src2));
11800 
11801   ins_cost(INSN_COST * 5);
11802   format %{ "fadds   $dst, $src1, $src2" %}
11803 
11804   ins_encode %{
11805     __ fadds(as_FloatRegister($dst$$reg),
11806              as_FloatRegister($src1$$reg),
11807              as_FloatRegister($src2$$reg));
11808   %}
11809 
11810   ins_pipe(fp_dop_reg_reg_s);
11811 %}
11812 
11813 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11814   match(Set dst (AddD src1 src2));
11815 
11816   ins_cost(INSN_COST * 5);
11817   format %{ "faddd   $dst, $src1, $src2" %}
11818 
11819   ins_encode %{
11820     __ faddd(as_FloatRegister($dst$$reg),
11821              as_FloatRegister($src1$$reg),
11822              as_FloatRegister($src2$$reg));
11823   %}
11824 
11825   ins_pipe(fp_dop_reg_reg_d);
11826 %}
11827 
11828 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11829   match(Set dst (SubF src1 src2));
11830 
11831   ins_cost(INSN_COST * 5);
11832   format %{ "fsubs   $dst, $src1, $src2" %}
11833 
11834   ins_encode %{
11835     __ fsubs(as_FloatRegister($dst$$reg),
11836              as_FloatRegister($src1$$reg),
11837              as_FloatRegister($src2$$reg));
11838   %}
11839 
11840   ins_pipe(fp_dop_reg_reg_s);
11841 %}
11842 
11843 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11844   match(Set dst (SubD src1 src2));
11845 
11846   ins_cost(INSN_COST * 5);
11847   format %{ "fsubd   $dst, $src1, $src2" %}
11848 
11849   ins_encode %{
11850     __ fsubd(as_FloatRegister($dst$$reg),
11851              as_FloatRegister($src1$$reg),
11852              as_FloatRegister($src2$$reg));
11853   %}
11854 
11855   ins_pipe(fp_dop_reg_reg_d);
11856 %}
11857 
11858 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11859   match(Set dst (MulF src1 src2));
11860 
11861   ins_cost(INSN_COST * 6);
11862   format %{ "fmuls   $dst, $src1, $src2" %}
11863 
11864   ins_encode %{
11865     __ fmuls(as_FloatRegister($dst$$reg),
11866              as_FloatRegister($src1$$reg),
11867              as_FloatRegister($src2$$reg));
11868   %}
11869 
11870   ins_pipe(fp_dop_reg_reg_s);
11871 %}
11872 
11873 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11874   match(Set dst (MulD src1 src2));
11875 
11876   ins_cost(INSN_COST * 6);
11877   format %{ "fmuld   $dst, $src1, $src2" %}
11878 
11879   ins_encode %{
11880     __ fmuld(as_FloatRegister($dst$$reg),
11881              as_FloatRegister($src1$$reg),
11882              as_FloatRegister($src2$$reg));
11883   %}
11884 
11885   ins_pipe(fp_dop_reg_reg_d);
11886 %}
11887 
11888 // src1 * src2 + src3
11889 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11890   predicate(UseFMA);
11891   match(Set dst (FmaF src3 (Binary src1 src2)));
11892 
11893   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
11894 
11895   ins_encode %{
11896     __ fmadds(as_FloatRegister($dst$$reg),
11897              as_FloatRegister($src1$$reg),
11898              as_FloatRegister($src2$$reg),
11899              as_FloatRegister($src3$$reg));
11900   %}
11901 
11902   ins_pipe(pipe_class_default);
11903 %}
11904 
11905 // src1 * src2 + src3
11906 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11907   predicate(UseFMA);
11908   match(Set dst (FmaD src3 (Binary src1 src2)));
11909 
11910   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
11911 
11912   ins_encode %{
11913     __ fmaddd(as_FloatRegister($dst$$reg),
11914              as_FloatRegister($src1$$reg),
11915              as_FloatRegister($src2$$reg),
11916              as_FloatRegister($src3$$reg));
11917   %}
11918 
11919   ins_pipe(pipe_class_default);
11920 %}
11921 
11922 // -src1 * src2 + src3
11923 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11924   predicate(UseFMA);
11925   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
11926   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
11927 
11928   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
11929 
11930   ins_encode %{
11931     __ fmsubs(as_FloatRegister($dst$$reg),
11932               as_FloatRegister($src1$$reg),
11933               as_FloatRegister($src2$$reg),
11934               as_FloatRegister($src3$$reg));
11935   %}
11936 
11937   ins_pipe(pipe_class_default);
11938 %}
11939 
11940 // -src1 * src2 + src3
11941 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11942   predicate(UseFMA);
11943   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
11944   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
11945 
11946   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
11947 
11948   ins_encode %{
11949     __ fmsubd(as_FloatRegister($dst$$reg),
11950               as_FloatRegister($src1$$reg),
11951               as_FloatRegister($src2$$reg),
11952               as_FloatRegister($src3$$reg));
11953   %}
11954 
11955   ins_pipe(pipe_class_default);
11956 %}
11957 
11958 // -src1 * src2 - src3
11959 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11960   predicate(UseFMA);
11961   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
11962   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
11963 
11964   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
11965 
11966   ins_encode %{
11967     __ fnmadds(as_FloatRegister($dst$$reg),
11968                as_FloatRegister($src1$$reg),
11969                as_FloatRegister($src2$$reg),
11970                as_FloatRegister($src3$$reg));
11971   %}
11972 
11973   ins_pipe(pipe_class_default);
11974 %}
11975 
11976 // -src1 * src2 - src3
11977 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11978   predicate(UseFMA);
11979   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
11980   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
11981 
11982   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
11983 
11984   ins_encode %{
11985     __ fnmaddd(as_FloatRegister($dst$$reg),
11986                as_FloatRegister($src1$$reg),
11987                as_FloatRegister($src2$$reg),
11988                as_FloatRegister($src3$$reg));
11989   %}
11990 
11991   ins_pipe(pipe_class_default);
11992 %}
11993 
11994 // src1 * src2 - src3
11995 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
11996   predicate(UseFMA);
11997   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
11998 
11999   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
12000 
12001   ins_encode %{
12002     __ fnmsubs(as_FloatRegister($dst$$reg),
12003                as_FloatRegister($src1$$reg),
12004                as_FloatRegister($src2$$reg),
12005                as_FloatRegister($src3$$reg));
12006   %}
12007 
12008   ins_pipe(pipe_class_default);
12009 %}
12010 
12011 // src1 * src2 - src3
12012 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
12013   predicate(UseFMA);
12014   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
12015 
12016   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
12017 
12018   ins_encode %{
12019   // n.b. insn name should be fnmsubd
12020     __ fnmsub(as_FloatRegister($dst$$reg),
12021               as_FloatRegister($src1$$reg),
12022               as_FloatRegister($src2$$reg),
12023               as_FloatRegister($src3$$reg));
12024   %}
12025 
12026   ins_pipe(pipe_class_default);
12027 %}
12028 
12029 
12030 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12031   match(Set dst (DivF src1  src2));
12032 
12033   ins_cost(INSN_COST * 18);
12034   format %{ "fdivs   $dst, $src1, $src2" %}
12035 
12036   ins_encode %{
12037     __ fdivs(as_FloatRegister($dst$$reg),
12038              as_FloatRegister($src1$$reg),
12039              as_FloatRegister($src2$$reg));
12040   %}
12041 
12042   ins_pipe(fp_div_s);
12043 %}
12044 
12045 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12046   match(Set dst (DivD src1  src2));
12047 
12048   ins_cost(INSN_COST * 32);
12049   format %{ "fdivd   $dst, $src1, $src2" %}
12050 
12051   ins_encode %{
12052     __ fdivd(as_FloatRegister($dst$$reg),
12053              as_FloatRegister($src1$$reg),
12054              as_FloatRegister($src2$$reg));
12055   %}
12056 
12057   ins_pipe(fp_div_d);
12058 %}
12059 
12060 instruct negF_reg_reg(vRegF dst, vRegF src) %{
12061   match(Set dst (NegF src));
12062 
12063   ins_cost(INSN_COST * 3);
12064   format %{ "fneg   $dst, $src" %}
12065 
12066   ins_encode %{
12067     __ fnegs(as_FloatRegister($dst$$reg),
12068              as_FloatRegister($src$$reg));
12069   %}
12070 
12071   ins_pipe(fp_uop_s);
12072 %}
12073 
12074 instruct negD_reg_reg(vRegD dst, vRegD src) %{
12075   match(Set dst (NegD src));
12076 
12077   ins_cost(INSN_COST * 3);
12078   format %{ "fnegd   $dst, $src" %}
12079 
12080   ins_encode %{
12081     __ fnegd(as_FloatRegister($dst$$reg),
12082              as_FloatRegister($src$$reg));
12083   %}
12084 
12085   ins_pipe(fp_uop_d);
12086 %}
12087 
12088 instruct absF_reg(vRegF dst, vRegF src) %{
12089   match(Set dst (AbsF src));
12090 
12091   ins_cost(INSN_COST * 3);
12092   format %{ "fabss   $dst, $src" %}
12093   ins_encode %{
12094     __ fabss(as_FloatRegister($dst$$reg),
12095              as_FloatRegister($src$$reg));
12096   %}
12097 
12098   ins_pipe(fp_uop_s);
12099 %}
12100 
12101 instruct absD_reg(vRegD dst, vRegD src) %{
12102   match(Set dst (AbsD src));
12103 
12104   ins_cost(INSN_COST * 3);
12105   format %{ "fabsd   $dst, $src" %}
12106   ins_encode %{
12107     __ fabsd(as_FloatRegister($dst$$reg),
12108              as_FloatRegister($src$$reg));
12109   %}
12110 
12111   ins_pipe(fp_uop_d);
12112 %}
12113 
12114 instruct sqrtD_reg(vRegD dst, vRegD src) %{
12115   match(Set dst (SqrtD src));
12116 
12117   ins_cost(INSN_COST * 50);
12118   format %{ "fsqrtd  $dst, $src" %}
12119   ins_encode %{
12120     __ fsqrtd(as_FloatRegister($dst$$reg),
12121              as_FloatRegister($src$$reg));
12122   %}
12123 
12124   ins_pipe(fp_div_s);
12125 %}
12126 
12127 instruct sqrtF_reg(vRegF dst, vRegF src) %{
12128   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
12129 
12130   ins_cost(INSN_COST * 50);
12131   format %{ "fsqrts  $dst, $src" %}
12132   ins_encode %{
12133     __ fsqrts(as_FloatRegister($dst$$reg),
12134              as_FloatRegister($src$$reg));
12135   %}
12136 
12137   ins_pipe(fp_div_d);
12138 %}
12139 
12140 // ============================================================================
12141 // Logical Instructions
12142 
12143 // Integer Logical Instructions
12144 
12145 // And Instructions
12146 
12147 
12148 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
12149   match(Set dst (AndI src1 src2));
12150 
12151   format %{ "andw  $dst, $src1, $src2\t# int" %}
12152 
12153   ins_cost(INSN_COST);
12154   ins_encode %{
12155     __ andw(as_Register($dst$$reg),
12156             as_Register($src1$$reg),
12157             as_Register($src2$$reg));
12158   %}
12159 
12160   ins_pipe(ialu_reg_reg);
12161 %}
12162 
12163 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
12164   match(Set dst (AndI src1 src2));
12165 
12166   format %{ "andsw  $dst, $src1, $src2\t# int" %}
12167 
12168   ins_cost(INSN_COST);
12169   ins_encode %{
12170     __ andw(as_Register($dst$$reg),
12171             as_Register($src1$$reg),
12172             (unsigned long)($src2$$constant));
12173   %}
12174 
12175   ins_pipe(ialu_reg_imm);
12176 %}
12177 
12178 // Or Instructions
12179 
12180 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12181   match(Set dst (OrI src1 src2));
12182 
12183   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12184 
12185   ins_cost(INSN_COST);
12186   ins_encode %{
12187     __ orrw(as_Register($dst$$reg),
12188             as_Register($src1$$reg),
12189             as_Register($src2$$reg));
12190   %}
12191 
12192   ins_pipe(ialu_reg_reg);
12193 %}
12194 
12195 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12196   match(Set dst (OrI src1 src2));
12197 
12198   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12199 
12200   ins_cost(INSN_COST);
12201   ins_encode %{
12202     __ orrw(as_Register($dst$$reg),
12203             as_Register($src1$$reg),
12204             (unsigned long)($src2$$constant));
12205   %}
12206 
12207   ins_pipe(ialu_reg_imm);
12208 %}
12209 
12210 // Xor Instructions
12211 
12212 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12213   match(Set dst (XorI src1 src2));
12214 
12215   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12216 
12217   ins_cost(INSN_COST);
12218   ins_encode %{
12219     __ eorw(as_Register($dst$$reg),
12220             as_Register($src1$$reg),
12221             as_Register($src2$$reg));
12222   %}
12223 
12224   ins_pipe(ialu_reg_reg);
12225 %}
12226 
12227 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12228   match(Set dst (XorI src1 src2));
12229 
12230   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12231 
12232   ins_cost(INSN_COST);
12233   ins_encode %{
12234     __ eorw(as_Register($dst$$reg),
12235             as_Register($src1$$reg),
12236             (unsigned long)($src2$$constant));
12237   %}
12238 
12239   ins_pipe(ialu_reg_imm);
12240 %}
12241 
12242 // Long Logical Instructions
12243 // TODO
12244 
12245 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
12246   match(Set dst (AndL src1 src2));
12247 
12248   format %{ "and  $dst, $src1, $src2\t# int" %}
12249 
12250   ins_cost(INSN_COST);
12251   ins_encode %{
12252     __ andr(as_Register($dst$$reg),
12253             as_Register($src1$$reg),
12254             as_Register($src2$$reg));
12255   %}
12256 
12257   ins_pipe(ialu_reg_reg);
12258 %}
12259 
12260 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
12261   match(Set dst (AndL src1 src2));
12262 
12263   format %{ "and  $dst, $src1, $src2\t# int" %}
12264 
12265   ins_cost(INSN_COST);
12266   ins_encode %{
12267     __ andr(as_Register($dst$$reg),
12268             as_Register($src1$$reg),
12269             (unsigned long)($src2$$constant));
12270   %}
12271 
12272   ins_pipe(ialu_reg_imm);
12273 %}
12274 
12275 // Or Instructions
12276 
12277 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12278   match(Set dst (OrL src1 src2));
12279 
12280   format %{ "orr  $dst, $src1, $src2\t# int" %}
12281 
12282   ins_cost(INSN_COST);
12283   ins_encode %{
12284     __ orr(as_Register($dst$$reg),
12285            as_Register($src1$$reg),
12286            as_Register($src2$$reg));
12287   %}
12288 
12289   ins_pipe(ialu_reg_reg);
12290 %}
12291 
12292 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12293   match(Set dst (OrL src1 src2));
12294 
12295   format %{ "orr  $dst, $src1, $src2\t# int" %}
12296 
12297   ins_cost(INSN_COST);
12298   ins_encode %{
12299     __ orr(as_Register($dst$$reg),
12300            as_Register($src1$$reg),
12301            (unsigned long)($src2$$constant));
12302   %}
12303 
12304   ins_pipe(ialu_reg_imm);
12305 %}
12306 
12307 // Xor Instructions
12308 
12309 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12310   match(Set dst (XorL src1 src2));
12311 
12312   format %{ "eor  $dst, $src1, $src2\t# int" %}
12313 
12314   ins_cost(INSN_COST);
12315   ins_encode %{
12316     __ eor(as_Register($dst$$reg),
12317            as_Register($src1$$reg),
12318            as_Register($src2$$reg));
12319   %}
12320 
12321   ins_pipe(ialu_reg_reg);
12322 %}
12323 
12324 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12325   match(Set dst (XorL src1 src2));
12326 
12327   ins_cost(INSN_COST);
12328   format %{ "eor  $dst, $src1, $src2\t# int" %}
12329 
12330   ins_encode %{
12331     __ eor(as_Register($dst$$reg),
12332            as_Register($src1$$reg),
12333            (unsigned long)($src2$$constant));
12334   %}
12335 
12336   ins_pipe(ialu_reg_imm);
12337 %}
12338 
12339 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
12340 %{
12341   match(Set dst (ConvI2L src));
12342 
12343   ins_cost(INSN_COST);
12344   format %{ "sxtw  $dst, $src\t# i2l" %}
12345   ins_encode %{
12346     __ sbfm($dst$$Register, $src$$Register, 0, 31);
12347   %}
12348   ins_pipe(ialu_reg_shift);
12349 %}
12350 
12351 // this pattern occurs in bigmath arithmetic
12352 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
12353 %{
12354   match(Set dst (AndL (ConvI2L src) mask));
12355 
12356   ins_cost(INSN_COST);
12357   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
12358   ins_encode %{
12359     __ ubfm($dst$$Register, $src$$Register, 0, 31);
12360   %}
12361 
12362   ins_pipe(ialu_reg_shift);
12363 %}
12364 
12365 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
12366   match(Set dst (ConvL2I src));
12367 
12368   ins_cost(INSN_COST);
12369   format %{ "movw  $dst, $src \t// l2i" %}
12370 
12371   ins_encode %{
12372     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
12373   %}
12374 
12375   ins_pipe(ialu_reg);
12376 %}
12377 
12378 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
12379 %{
12380   match(Set dst (Conv2B src));
12381   effect(KILL cr);
12382 
12383   format %{
12384     "cmpw $src, zr\n\t"
12385     "cset $dst, ne"
12386   %}
12387 
12388   ins_encode %{
12389     __ cmpw(as_Register($src$$reg), zr);
12390     __ cset(as_Register($dst$$reg), Assembler::NE);
12391   %}
12392 
12393   ins_pipe(ialu_reg);
12394 %}
12395 
12396 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
12397 %{
12398   match(Set dst (Conv2B src));
12399   effect(KILL cr);
12400 
12401   format %{
12402     "cmp  $src, zr\n\t"
12403     "cset $dst, ne"
12404   %}
12405 
12406   ins_encode %{
12407     __ cmp(as_Register($src$$reg), zr);
12408     __ cset(as_Register($dst$$reg), Assembler::NE);
12409   %}
12410 
12411   ins_pipe(ialu_reg);
12412 %}
12413 
12414 instruct convD2F_reg(vRegF dst, vRegD src) %{
12415   match(Set dst (ConvD2F src));
12416 
12417   ins_cost(INSN_COST * 5);
12418   format %{ "fcvtd  $dst, $src \t// d2f" %}
12419 
12420   ins_encode %{
12421     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12422   %}
12423 
12424   ins_pipe(fp_d2f);
12425 %}
12426 
12427 instruct convF2D_reg(vRegD dst, vRegF src) %{
12428   match(Set dst (ConvF2D src));
12429 
12430   ins_cost(INSN_COST * 5);
12431   format %{ "fcvts  $dst, $src \t// f2d" %}
12432 
12433   ins_encode %{
12434     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12435   %}
12436 
12437   ins_pipe(fp_f2d);
12438 %}
12439 
12440 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12441   match(Set dst (ConvF2I src));
12442 
12443   ins_cost(INSN_COST * 5);
12444   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
12445 
12446   ins_encode %{
12447     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12448   %}
12449 
12450   ins_pipe(fp_f2i);
12451 %}
12452 
12453 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
12454   match(Set dst (ConvF2L src));
12455 
12456   ins_cost(INSN_COST * 5);
12457   format %{ "fcvtzs  $dst, $src \t// f2l" %}
12458 
12459   ins_encode %{
12460     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12461   %}
12462 
12463   ins_pipe(fp_f2l);
12464 %}
12465 
12466 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
12467   match(Set dst (ConvI2F src));
12468 
12469   ins_cost(INSN_COST * 5);
12470   format %{ "scvtfws  $dst, $src \t// i2f" %}
12471 
12472   ins_encode %{
12473     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12474   %}
12475 
12476   ins_pipe(fp_i2f);
12477 %}
12478 
12479 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
12480   match(Set dst (ConvL2F src));
12481 
12482   ins_cost(INSN_COST * 5);
12483   format %{ "scvtfs  $dst, $src \t// l2f" %}
12484 
12485   ins_encode %{
12486     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12487   %}
12488 
12489   ins_pipe(fp_l2f);
12490 %}
12491 
12492 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
12493   match(Set dst (ConvD2I src));
12494 
12495   ins_cost(INSN_COST * 5);
12496   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
12497 
12498   ins_encode %{
12499     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12500   %}
12501 
12502   ins_pipe(fp_d2i);
12503 %}
12504 
12505 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12506   match(Set dst (ConvD2L src));
12507 
12508   ins_cost(INSN_COST * 5);
12509   format %{ "fcvtzd  $dst, $src \t// d2l" %}
12510 
12511   ins_encode %{
12512     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12513   %}
12514 
12515   ins_pipe(fp_d2l);
12516 %}
12517 
12518 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
12519   match(Set dst (ConvI2D src));
12520 
12521   ins_cost(INSN_COST * 5);
12522   format %{ "scvtfwd  $dst, $src \t// i2d" %}
12523 
12524   ins_encode %{
12525     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12526   %}
12527 
12528   ins_pipe(fp_i2d);
12529 %}
12530 
12531 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
12532   match(Set dst (ConvL2D src));
12533 
12534   ins_cost(INSN_COST * 5);
12535   format %{ "scvtfd  $dst, $src \t// l2d" %}
12536 
12537   ins_encode %{
12538     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12539   %}
12540 
12541   ins_pipe(fp_l2d);
12542 %}
12543 
12544 // stack <-> reg and reg <-> reg shuffles with no conversion
12545 
12546 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
12547 
12548   match(Set dst (MoveF2I src));
12549 
12550   effect(DEF dst, USE src);
12551 
12552   ins_cost(4 * INSN_COST);
12553 
12554   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
12555 
12556   ins_encode %{
12557     __ ldrw($dst$$Register, Address(sp, $src$$disp));
12558   %}
12559 
12560   ins_pipe(iload_reg_reg);
12561 
12562 %}
12563 
12564 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
12565 
12566   match(Set dst (MoveI2F src));
12567 
12568   effect(DEF dst, USE src);
12569 
12570   ins_cost(4 * INSN_COST);
12571 
12572   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
12573 
12574   ins_encode %{
12575     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12576   %}
12577 
12578   ins_pipe(pipe_class_memory);
12579 
12580 %}
12581 
12582 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
12583 
12584   match(Set dst (MoveD2L src));
12585 
12586   effect(DEF dst, USE src);
12587 
12588   ins_cost(4 * INSN_COST);
12589 
12590   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
12591 
12592   ins_encode %{
12593     __ ldr($dst$$Register, Address(sp, $src$$disp));
12594   %}
12595 
12596   ins_pipe(iload_reg_reg);
12597 
12598 %}
12599 
12600 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
12601 
12602   match(Set dst (MoveL2D src));
12603 
12604   effect(DEF dst, USE src);
12605 
12606   ins_cost(4 * INSN_COST);
12607 
12608   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
12609 
12610   ins_encode %{
12611     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12612   %}
12613 
12614   ins_pipe(pipe_class_memory);
12615 
12616 %}
12617 
12618 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
12619 
12620   match(Set dst (MoveF2I src));
12621 
12622   effect(DEF dst, USE src);
12623 
12624   ins_cost(INSN_COST);
12625 
12626   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
12627 
12628   ins_encode %{
12629     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12630   %}
12631 
12632   ins_pipe(pipe_class_memory);
12633 
12634 %}
12635 
12636 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
12637 
12638   match(Set dst (MoveI2F src));
12639 
12640   effect(DEF dst, USE src);
12641 
12642   ins_cost(INSN_COST);
12643 
12644   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
12645 
12646   ins_encode %{
12647     __ strw($src$$Register, Address(sp, $dst$$disp));
12648   %}
12649 
12650   ins_pipe(istore_reg_reg);
12651 
12652 %}
12653 
12654 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
12655 
12656   match(Set dst (MoveD2L src));
12657 
12658   effect(DEF dst, USE src);
12659 
12660   ins_cost(INSN_COST);
12661 
12662   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
12663 
12664   ins_encode %{
12665     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12666   %}
12667 
12668   ins_pipe(pipe_class_memory);
12669 
12670 %}
12671 
12672 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
12673 
12674   match(Set dst (MoveL2D src));
12675 
12676   effect(DEF dst, USE src);
12677 
12678   ins_cost(INSN_COST);
12679 
12680   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
12681 
12682   ins_encode %{
12683     __ str($src$$Register, Address(sp, $dst$$disp));
12684   %}
12685 
12686   ins_pipe(istore_reg_reg);
12687 
12688 %}
12689 
12690 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12691 
12692   match(Set dst (MoveF2I src));
12693 
12694   effect(DEF dst, USE src);
12695 
12696   ins_cost(INSN_COST);
12697 
12698   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
12699 
12700   ins_encode %{
12701     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
12702   %}
12703 
12704   ins_pipe(fp_f2i);
12705 
12706 %}
12707 
12708 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
12709 
12710   match(Set dst (MoveI2F src));
12711 
12712   effect(DEF dst, USE src);
12713 
12714   ins_cost(INSN_COST);
12715 
12716   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
12717 
12718   ins_encode %{
12719     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
12720   %}
12721 
12722   ins_pipe(fp_i2f);
12723 
12724 %}
12725 
12726 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12727 
12728   match(Set dst (MoveD2L src));
12729 
12730   effect(DEF dst, USE src);
12731 
12732   ins_cost(INSN_COST);
12733 
12734   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
12735 
12736   ins_encode %{
12737     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
12738   %}
12739 
12740   ins_pipe(fp_d2l);
12741 
12742 %}
12743 
12744 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
12745 
12746   match(Set dst (MoveL2D src));
12747 
12748   effect(DEF dst, USE src);
12749 
12750   ins_cost(INSN_COST);
12751 
12752   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
12753 
12754   ins_encode %{
12755     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
12756   %}
12757 
12758   ins_pipe(fp_l2d);
12759 
12760 %}
12761 
12762 // ============================================================================
12763 // clearing of an array
12764 
12765 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
12766 %{
12767   match(Set dummy (ClearArray cnt base));
12768   effect(USE_KILL cnt, USE_KILL base);
12769 
12770   ins_cost(4 * INSN_COST);
12771   format %{ "ClearArray $cnt, $base" %}
12772 
12773   ins_encode %{
12774     __ zero_words($base$$Register, $cnt$$Register);
12775   %}
12776 
12777   ins_pipe(pipe_class_memory);
12778 %}
12779 
12780 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
12781 %{
12782   predicate((u_int64_t)n->in(2)->get_long()
12783             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
12784   match(Set dummy (ClearArray cnt base));
12785   effect(USE_KILL base);
12786 
12787   ins_cost(4 * INSN_COST);
12788   format %{ "ClearArray $cnt, $base" %}
12789 
12790   ins_encode %{
12791     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
12792   %}
12793 
12794   ins_pipe(pipe_class_memory);
12795 %}
12796 
12797 // ============================================================================
12798 // Overflow Math Instructions
12799 
12800 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12801 %{
12802   match(Set cr (OverflowAddI op1 op2));
12803 
12804   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12805   ins_cost(INSN_COST);
12806   ins_encode %{
12807     __ cmnw($op1$$Register, $op2$$Register);
12808   %}
12809 
12810   ins_pipe(icmp_reg_reg);
12811 %}
12812 
12813 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12814 %{
12815   match(Set cr (OverflowAddI op1 op2));
12816 
12817   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12818   ins_cost(INSN_COST);
12819   ins_encode %{
12820     __ cmnw($op1$$Register, $op2$$constant);
12821   %}
12822 
12823   ins_pipe(icmp_reg_imm);
12824 %}
12825 
12826 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12827 %{
12828   match(Set cr (OverflowAddL op1 op2));
12829 
12830   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12831   ins_cost(INSN_COST);
12832   ins_encode %{
12833     __ cmn($op1$$Register, $op2$$Register);
12834   %}
12835 
12836   ins_pipe(icmp_reg_reg);
12837 %}
12838 
12839 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12840 %{
12841   match(Set cr (OverflowAddL op1 op2));
12842 
12843   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12844   ins_cost(INSN_COST);
12845   ins_encode %{
12846     __ cmn($op1$$Register, $op2$$constant);
12847   %}
12848 
12849   ins_pipe(icmp_reg_imm);
12850 %}
12851 
12852 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12853 %{
12854   match(Set cr (OverflowSubI op1 op2));
12855 
12856   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12857   ins_cost(INSN_COST);
12858   ins_encode %{
12859     __ cmpw($op1$$Register, $op2$$Register);
12860   %}
12861 
12862   ins_pipe(icmp_reg_reg);
12863 %}
12864 
12865 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12866 %{
12867   match(Set cr (OverflowSubI op1 op2));
12868 
12869   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12870   ins_cost(INSN_COST);
12871   ins_encode %{
12872     __ cmpw($op1$$Register, $op2$$constant);
12873   %}
12874 
12875   ins_pipe(icmp_reg_imm);
12876 %}
12877 
12878 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12879 %{
12880   match(Set cr (OverflowSubL op1 op2));
12881 
12882   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12883   ins_cost(INSN_COST);
12884   ins_encode %{
12885     __ cmp($op1$$Register, $op2$$Register);
12886   %}
12887 
12888   ins_pipe(icmp_reg_reg);
12889 %}
12890 
12891 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12892 %{
12893   match(Set cr (OverflowSubL op1 op2));
12894 
12895   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12896   ins_cost(INSN_COST);
12897   ins_encode %{
12898     __ cmp($op1$$Register, $op2$$constant);
12899   %}
12900 
12901   ins_pipe(icmp_reg_imm);
12902 %}
12903 
12904 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
12905 %{
12906   match(Set cr (OverflowSubI zero op1));
12907 
12908   format %{ "cmpw  zr, $op1\t# overflow check int" %}
12909   ins_cost(INSN_COST);
12910   ins_encode %{
12911     __ cmpw(zr, $op1$$Register);
12912   %}
12913 
12914   ins_pipe(icmp_reg_imm);
12915 %}
12916 
12917 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
12918 %{
12919   match(Set cr (OverflowSubL zero op1));
12920 
12921   format %{ "cmp   zr, $op1\t# overflow check long" %}
12922   ins_cost(INSN_COST);
12923   ins_encode %{
12924     __ cmp(zr, $op1$$Register);
12925   %}
12926 
12927   ins_pipe(icmp_reg_imm);
12928 %}
12929 
12930 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12931 %{
12932   match(Set cr (OverflowMulI op1 op2));
12933 
12934   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12935             "cmp   rscratch1, rscratch1, sxtw\n\t"
12936             "movw  rscratch1, #0x80000000\n\t"
12937             "cselw rscratch1, rscratch1, zr, NE\n\t"
12938             "cmpw  rscratch1, #1" %}
12939   ins_cost(5 * INSN_COST);
12940   ins_encode %{
12941     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12942     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12943     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12944     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12945     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12946   %}
12947 
12948   ins_pipe(pipe_slow);
12949 %}
12950 
12951 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
12952 %{
12953   match(If cmp (OverflowMulI op1 op2));
12954   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12955             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12956   effect(USE labl, KILL cr);
12957 
12958   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12959             "cmp   rscratch1, rscratch1, sxtw\n\t"
12960             "b$cmp   $labl" %}
12961   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
12962   ins_encode %{
12963     Label* L = $labl$$label;
12964     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12965     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12966     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12967     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12968   %}
12969 
12970   ins_pipe(pipe_serial);
12971 %}
12972 
12973 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12974 %{
12975   match(Set cr (OverflowMulL op1 op2));
12976 
12977   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12978             "smulh rscratch2, $op1, $op2\n\t"
12979             "cmp   rscratch2, rscratch1, ASR #63\n\t"
12980             "movw  rscratch1, #0x80000000\n\t"
12981             "cselw rscratch1, rscratch1, zr, NE\n\t"
12982             "cmpw  rscratch1, #1" %}
12983   ins_cost(6 * INSN_COST);
12984   ins_encode %{
12985     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12986     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12987     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
12988     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12989     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12990     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12991   %}
12992 
12993   ins_pipe(pipe_slow);
12994 %}
12995 
12996 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
12997 %{
12998   match(If cmp (OverflowMulL op1 op2));
12999   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13000             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13001   effect(USE labl, KILL cr);
13002 
13003   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13004             "smulh rscratch2, $op1, $op2\n\t"
13005             "cmp   rscratch2, rscratch1, ASR #63\n\t"
13006             "b$cmp $labl" %}
13007   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
13008   ins_encode %{
13009     Label* L = $labl$$label;
13010     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13011     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13012     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13013     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
13014     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13015   %}
13016 
13017   ins_pipe(pipe_serial);
13018 %}
13019 
13020 // ============================================================================
13021 // Compare Instructions
13022 
13023 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
13024 %{
13025   match(Set cr (CmpI op1 op2));
13026 
13027   effect(DEF cr, USE op1, USE op2);
13028 
13029   ins_cost(INSN_COST);
13030   format %{ "cmpw  $op1, $op2" %}
13031 
13032   ins_encode(aarch64_enc_cmpw(op1, op2));
13033 
13034   ins_pipe(icmp_reg_reg);
13035 %}
13036 
13037 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
13038 %{
13039   match(Set cr (CmpI op1 zero));
13040 
13041   effect(DEF cr, USE op1);
13042 
13043   ins_cost(INSN_COST);
13044   format %{ "cmpw $op1, 0" %}
13045 
13046   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13047 
13048   ins_pipe(icmp_reg_imm);
13049 %}
13050 
13051 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
13052 %{
13053   match(Set cr (CmpI op1 op2));
13054 
13055   effect(DEF cr, USE op1);
13056 
13057   ins_cost(INSN_COST);
13058   format %{ "cmpw  $op1, $op2" %}
13059 
13060   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13061 
13062   ins_pipe(icmp_reg_imm);
13063 %}
13064 
13065 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
13066 %{
13067   match(Set cr (CmpI op1 op2));
13068 
13069   effect(DEF cr, USE op1);
13070 
13071   ins_cost(INSN_COST * 2);
13072   format %{ "cmpw  $op1, $op2" %}
13073 
13074   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13075 
13076   ins_pipe(icmp_reg_imm);
13077 %}
13078 
13079 // Unsigned compare Instructions; really, same as signed compare
13080 // except it should only be used to feed an If or a CMovI which takes a
13081 // cmpOpU.
13082 
13083 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
13084 %{
13085   match(Set cr (CmpU op1 op2));
13086 
13087   effect(DEF cr, USE op1, USE op2);
13088 
13089   ins_cost(INSN_COST);
13090   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13091 
13092   ins_encode(aarch64_enc_cmpw(op1, op2));
13093 
13094   ins_pipe(icmp_reg_reg);
13095 %}
13096 
13097 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
13098 %{
13099   match(Set cr (CmpU op1 zero));
13100 
13101   effect(DEF cr, USE op1);
13102 
13103   ins_cost(INSN_COST);
13104   format %{ "cmpw $op1, #0\t# unsigned" %}
13105 
13106   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13107 
13108   ins_pipe(icmp_reg_imm);
13109 %}
13110 
13111 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
13112 %{
13113   match(Set cr (CmpU op1 op2));
13114 
13115   effect(DEF cr, USE op1);
13116 
13117   ins_cost(INSN_COST);
13118   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13119 
13120   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13121 
13122   ins_pipe(icmp_reg_imm);
13123 %}
13124 
13125 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
13126 %{
13127   match(Set cr (CmpU op1 op2));
13128 
13129   effect(DEF cr, USE op1);
13130 
13131   ins_cost(INSN_COST * 2);
13132   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13133 
13134   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13135 
13136   ins_pipe(icmp_reg_imm);
13137 %}
13138 
13139 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13140 %{
13141   match(Set cr (CmpL op1 op2));
13142 
13143   effect(DEF cr, USE op1, USE op2);
13144 
13145   ins_cost(INSN_COST);
13146   format %{ "cmp  $op1, $op2" %}
13147 
13148   ins_encode(aarch64_enc_cmp(op1, op2));
13149 
13150   ins_pipe(icmp_reg_reg);
13151 %}
13152 
13153 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
13154 %{
13155   match(Set cr (CmpL op1 zero));
13156 
13157   effect(DEF cr, USE op1);
13158 
13159   ins_cost(INSN_COST);
13160   format %{ "tst  $op1" %}
13161 
13162   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13163 
13164   ins_pipe(icmp_reg_imm);
13165 %}
13166 
13167 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
13168 %{
13169   match(Set cr (CmpL op1 op2));
13170 
13171   effect(DEF cr, USE op1);
13172 
13173   ins_cost(INSN_COST);
13174   format %{ "cmp  $op1, $op2" %}
13175 
13176   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13177 
13178   ins_pipe(icmp_reg_imm);
13179 %}
13180 
13181 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
13182 %{
13183   match(Set cr (CmpL op1 op2));
13184 
13185   effect(DEF cr, USE op1);
13186 
13187   ins_cost(INSN_COST * 2);
13188   format %{ "cmp  $op1, $op2" %}
13189 
13190   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13191 
13192   ins_pipe(icmp_reg_imm);
13193 %}
13194 
13195 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
13196 %{
13197   match(Set cr (CmpUL op1 op2));
13198 
13199   effect(DEF cr, USE op1, USE op2);
13200 
13201   ins_cost(INSN_COST);
13202   format %{ "cmp  $op1, $op2" %}
13203 
13204   ins_encode(aarch64_enc_cmp(op1, op2));
13205 
13206   ins_pipe(icmp_reg_reg);
13207 %}
13208 
13209 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
13210 %{
13211   match(Set cr (CmpUL op1 zero));
13212 
13213   effect(DEF cr, USE op1);
13214 
13215   ins_cost(INSN_COST);
13216   format %{ "tst  $op1" %}
13217 
13218   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13219 
13220   ins_pipe(icmp_reg_imm);
13221 %}
13222 
13223 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
13224 %{
13225   match(Set cr (CmpUL op1 op2));
13226 
13227   effect(DEF cr, USE op1);
13228 
13229   ins_cost(INSN_COST);
13230   format %{ "cmp  $op1, $op2" %}
13231 
13232   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13233 
13234   ins_pipe(icmp_reg_imm);
13235 %}
13236 
13237 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
13238 %{
13239   match(Set cr (CmpUL op1 op2));
13240 
13241   effect(DEF cr, USE op1);
13242 
13243   ins_cost(INSN_COST * 2);
13244   format %{ "cmp  $op1, $op2" %}
13245 
13246   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13247 
13248   ins_pipe(icmp_reg_imm);
13249 %}
13250 
13251 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
13252 %{
13253   match(Set cr (CmpP op1 op2));
13254 
13255   effect(DEF cr, USE op1, USE op2);
13256 
13257   ins_cost(INSN_COST);
13258   format %{ "cmp  $op1, $op2\t // ptr" %}
13259 
13260   ins_encode(aarch64_enc_cmpp(op1, op2));
13261 
13262   ins_pipe(icmp_reg_reg);
13263 %}
13264 
13265 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
13266 %{
13267   match(Set cr (CmpN op1 op2));
13268 
13269   effect(DEF cr, USE op1, USE op2);
13270 
13271   ins_cost(INSN_COST);
13272   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
13273 
13274   ins_encode(aarch64_enc_cmpn(op1, op2));
13275 
13276   ins_pipe(icmp_reg_reg);
13277 %}
13278 
13279 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
13280 %{
13281   match(Set cr (CmpP op1 zero));
13282 
13283   effect(DEF cr, USE op1, USE zero);
13284 
13285   ins_cost(INSN_COST);
13286   format %{ "cmp  $op1, 0\t // ptr" %}
13287 
13288   ins_encode(aarch64_enc_testp(op1));
13289 
13290   ins_pipe(icmp_reg_imm);
13291 %}
13292 
13293 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
13294 %{
13295   match(Set cr (CmpN op1 zero));
13296 
13297   effect(DEF cr, USE op1, USE zero);
13298 
13299   ins_cost(INSN_COST);
13300   format %{ "cmp  $op1, 0\t // compressed ptr" %}
13301 
13302   ins_encode(aarch64_enc_testn(op1));
13303 
13304   ins_pipe(icmp_reg_imm);
13305 %}
13306 
13307 // FP comparisons
13308 //
13309 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
13310 // using normal cmpOp. See declaration of rFlagsReg for details.
13311 
13312 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
13313 %{
13314   match(Set cr (CmpF src1 src2));
13315 
13316   ins_cost(3 * INSN_COST);
13317   format %{ "fcmps $src1, $src2" %}
13318 
13319   ins_encode %{
13320     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13321   %}
13322 
13323   ins_pipe(pipe_class_compare);
13324 %}
13325 
13326 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
13327 %{
13328   match(Set cr (CmpF src1 src2));
13329 
13330   ins_cost(3 * INSN_COST);
13331   format %{ "fcmps $src1, 0.0" %}
13332 
13333   ins_encode %{
13334     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
13335   %}
13336 
13337   ins_pipe(pipe_class_compare);
13338 %}
13339 // FROM HERE
13340 
13341 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
13342 %{
13343   match(Set cr (CmpD src1 src2));
13344 
13345   ins_cost(3 * INSN_COST);
13346   format %{ "fcmpd $src1, $src2" %}
13347 
13348   ins_encode %{
13349     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13350   %}
13351 
13352   ins_pipe(pipe_class_compare);
13353 %}
13354 
13355 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
13356 %{
13357   match(Set cr (CmpD src1 src2));
13358 
13359   ins_cost(3 * INSN_COST);
13360   format %{ "fcmpd $src1, 0.0" %}
13361 
13362   ins_encode %{
13363     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
13364   %}
13365 
13366   ins_pipe(pipe_class_compare);
13367 %}
13368 
13369 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
13370 %{
13371   match(Set dst (CmpF3 src1 src2));
13372   effect(KILL cr);
13373 
13374   ins_cost(5 * INSN_COST);
13375   format %{ "fcmps $src1, $src2\n\t"
13376             "csinvw($dst, zr, zr, eq\n\t"
13377             "csnegw($dst, $dst, $dst, lt)"
13378   %}
13379 
13380   ins_encode %{
13381     Label done;
13382     FloatRegister s1 = as_FloatRegister($src1$$reg);
13383     FloatRegister s2 = as_FloatRegister($src2$$reg);
13384     Register d = as_Register($dst$$reg);
13385     __ fcmps(s1, s2);
13386     // installs 0 if EQ else -1
13387     __ csinvw(d, zr, zr, Assembler::EQ);
13388     // keeps -1 if less or unordered else installs 1
13389     __ csnegw(d, d, d, Assembler::LT);
13390     __ bind(done);
13391   %}
13392 
13393   ins_pipe(pipe_class_default);
13394 
13395 %}
13396 
13397 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
13398 %{
13399   match(Set dst (CmpD3 src1 src2));
13400   effect(KILL cr);
13401 
13402   ins_cost(5 * INSN_COST);
13403   format %{ "fcmpd $src1, $src2\n\t"
13404             "csinvw($dst, zr, zr, eq\n\t"
13405             "csnegw($dst, $dst, $dst, lt)"
13406   %}
13407 
13408   ins_encode %{
13409     Label done;
13410     FloatRegister s1 = as_FloatRegister($src1$$reg);
13411     FloatRegister s2 = as_FloatRegister($src2$$reg);
13412     Register d = as_Register($dst$$reg);
13413     __ fcmpd(s1, s2);
13414     // installs 0 if EQ else -1
13415     __ csinvw(d, zr, zr, Assembler::EQ);
13416     // keeps -1 if less or unordered else installs 1
13417     __ csnegw(d, d, d, Assembler::LT);
13418     __ bind(done);
13419   %}
13420   ins_pipe(pipe_class_default);
13421 
13422 %}
13423 
13424 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
13425 %{
13426   match(Set dst (CmpF3 src1 zero));
13427   effect(KILL cr);
13428 
13429   ins_cost(5 * INSN_COST);
13430   format %{ "fcmps $src1, 0.0\n\t"
13431             "csinvw($dst, zr, zr, eq\n\t"
13432             "csnegw($dst, $dst, $dst, lt)"
13433   %}
13434 
13435   ins_encode %{
13436     Label done;
13437     FloatRegister s1 = as_FloatRegister($src1$$reg);
13438     Register d = as_Register($dst$$reg);
13439     __ fcmps(s1, 0.0D);
13440     // installs 0 if EQ else -1
13441     __ csinvw(d, zr, zr, Assembler::EQ);
13442     // keeps -1 if less or unordered else installs 1
13443     __ csnegw(d, d, d, Assembler::LT);
13444     __ bind(done);
13445   %}
13446 
13447   ins_pipe(pipe_class_default);
13448 
13449 %}
13450 
13451 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
13452 %{
13453   match(Set dst (CmpD3 src1 zero));
13454   effect(KILL cr);
13455 
13456   ins_cost(5 * INSN_COST);
13457   format %{ "fcmpd $src1, 0.0\n\t"
13458             "csinvw($dst, zr, zr, eq\n\t"
13459             "csnegw($dst, $dst, $dst, lt)"
13460   %}
13461 
13462   ins_encode %{
13463     Label done;
13464     FloatRegister s1 = as_FloatRegister($src1$$reg);
13465     Register d = as_Register($dst$$reg);
13466     __ fcmpd(s1, 0.0D);
13467     // installs 0 if EQ else -1
13468     __ csinvw(d, zr, zr, Assembler::EQ);
13469     // keeps -1 if less or unordered else installs 1
13470     __ csnegw(d, d, d, Assembler::LT);
13471     __ bind(done);
13472   %}
13473   ins_pipe(pipe_class_default);
13474 
13475 %}
13476 
13477 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
13478 %{
13479   match(Set dst (CmpLTMask p q));
13480   effect(KILL cr);
13481 
13482   ins_cost(3 * INSN_COST);
13483 
13484   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
13485             "csetw $dst, lt\n\t"
13486             "subw $dst, zr, $dst"
13487   %}
13488 
13489   ins_encode %{
13490     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
13491     __ csetw(as_Register($dst$$reg), Assembler::LT);
13492     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
13493   %}
13494 
13495   ins_pipe(ialu_reg_reg);
13496 %}
13497 
13498 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
13499 %{
13500   match(Set dst (CmpLTMask src zero));
13501   effect(KILL cr);
13502 
13503   ins_cost(INSN_COST);
13504 
13505   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
13506 
13507   ins_encode %{
13508     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
13509   %}
13510 
13511   ins_pipe(ialu_reg_shift);
13512 %}
13513 
13514 // ============================================================================
13515 // Max and Min
13516 
13517 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13518 %{
13519   match(Set dst (MinI src1 src2));
13520 
13521   effect(DEF dst, USE src1, USE src2, KILL cr);
13522   size(8);
13523 
13524   ins_cost(INSN_COST * 3);
13525   format %{
13526     "cmpw $src1 $src2\t signed int\n\t"
13527     "cselw $dst, $src1, $src2 lt\t"
13528   %}
13529 
13530   ins_encode %{
13531     __ cmpw(as_Register($src1$$reg),
13532             as_Register($src2$$reg));
13533     __ cselw(as_Register($dst$$reg),
13534              as_Register($src1$$reg),
13535              as_Register($src2$$reg),
13536              Assembler::LT);
13537   %}
13538 
13539   ins_pipe(ialu_reg_reg);
13540 %}
13541 // FROM HERE
13542 
13543 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13544 %{
13545   match(Set dst (MaxI src1 src2));
13546 
13547   effect(DEF dst, USE src1, USE src2, KILL cr);
13548   size(8);
13549 
13550   ins_cost(INSN_COST * 3);
13551   format %{
13552     "cmpw $src1 $src2\t signed int\n\t"
13553     "cselw $dst, $src1, $src2 gt\t"
13554   %}
13555 
13556   ins_encode %{
13557     __ cmpw(as_Register($src1$$reg),
13558             as_Register($src2$$reg));
13559     __ cselw(as_Register($dst$$reg),
13560              as_Register($src1$$reg),
13561              as_Register($src2$$reg),
13562              Assembler::GT);
13563   %}
13564 
13565   ins_pipe(ialu_reg_reg);
13566 %}
13567 
13568 // ============================================================================
13569 // Branch Instructions
13570 
13571 // Direct Branch.
13572 instruct branch(label lbl)
13573 %{
13574   match(Goto);
13575 
13576   effect(USE lbl);
13577 
13578   ins_cost(BRANCH_COST);
13579   format %{ "b  $lbl" %}
13580 
13581   ins_encode(aarch64_enc_b(lbl));
13582 
13583   ins_pipe(pipe_branch);
13584 %}
13585 
13586 // Conditional Near Branch
13587 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
13588 %{
13589   // Same match rule as `branchConFar'.
13590   match(If cmp cr);
13591 
13592   effect(USE lbl);
13593 
13594   ins_cost(BRANCH_COST);
13595   // If set to 1 this indicates that the current instruction is a
13596   // short variant of a long branch. This avoids using this
13597   // instruction in first-pass matching. It will then only be used in
13598   // the `Shorten_branches' pass.
13599   // ins_short_branch(1);
13600   format %{ "b$cmp  $lbl" %}
13601 
13602   ins_encode(aarch64_enc_br_con(cmp, lbl));
13603 
13604   ins_pipe(pipe_branch_cond);
13605 %}
13606 
13607 // Conditional Near Branch Unsigned
13608 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13609 %{
13610   // Same match rule as `branchConFar'.
13611   match(If cmp cr);
13612 
13613   effect(USE lbl);
13614 
13615   ins_cost(BRANCH_COST);
13616   // If set to 1 this indicates that the current instruction is a
13617   // short variant of a long branch. This avoids using this
13618   // instruction in first-pass matching. It will then only be used in
13619   // the `Shorten_branches' pass.
13620   // ins_short_branch(1);
13621   format %{ "b$cmp  $lbl\t# unsigned" %}
13622 
13623   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13624 
13625   ins_pipe(pipe_branch_cond);
13626 %}
13627 
13628 // Make use of CBZ and CBNZ.  These instructions, as well as being
13629 // shorter than (cmp; branch), have the additional benefit of not
13630 // killing the flags.
13631 
13632 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
13633   match(If cmp (CmpI op1 op2));
13634   effect(USE labl);
13635 
13636   ins_cost(BRANCH_COST);
13637   format %{ "cbw$cmp   $op1, $labl" %}
13638   ins_encode %{
13639     Label* L = $labl$$label;
13640     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13641     if (cond == Assembler::EQ)
13642       __ cbzw($op1$$Register, *L);
13643     else
13644       __ cbnzw($op1$$Register, *L);
13645   %}
13646   ins_pipe(pipe_cmp_branch);
13647 %}
13648 
13649 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
13650   match(If cmp (CmpL op1 op2));
13651   effect(USE labl);
13652 
13653   ins_cost(BRANCH_COST);
13654   format %{ "cb$cmp   $op1, $labl" %}
13655   ins_encode %{
13656     Label* L = $labl$$label;
13657     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13658     if (cond == Assembler::EQ)
13659       __ cbz($op1$$Register, *L);
13660     else
13661       __ cbnz($op1$$Register, *L);
13662   %}
13663   ins_pipe(pipe_cmp_branch);
13664 %}
13665 
13666 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
13667   match(If cmp (CmpP op1 op2));
13668   effect(USE labl);
13669 
13670   ins_cost(BRANCH_COST);
13671   format %{ "cb$cmp   $op1, $labl" %}
13672   ins_encode %{
13673     Label* L = $labl$$label;
13674     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13675     if (cond == Assembler::EQ)
13676       __ cbz($op1$$Register, *L);
13677     else
13678       __ cbnz($op1$$Register, *L);
13679   %}
13680   ins_pipe(pipe_cmp_branch);
13681 %}
13682 
13683 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
13684   match(If cmp (CmpN op1 op2));
13685   effect(USE labl);
13686 
13687   ins_cost(BRANCH_COST);
13688   format %{ "cbw$cmp   $op1, $labl" %}
13689   ins_encode %{
13690     Label* L = $labl$$label;
13691     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13692     if (cond == Assembler::EQ)
13693       __ cbzw($op1$$Register, *L);
13694     else
13695       __ cbnzw($op1$$Register, *L);
13696   %}
13697   ins_pipe(pipe_cmp_branch);
13698 %}
13699 
13700 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
13701   match(If cmp (CmpP (DecodeN oop) zero));
13702   effect(USE labl);
13703 
13704   ins_cost(BRANCH_COST);
13705   format %{ "cb$cmp   $oop, $labl" %}
13706   ins_encode %{
13707     Label* L = $labl$$label;
13708     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13709     if (cond == Assembler::EQ)
13710       __ cbzw($oop$$Register, *L);
13711     else
13712       __ cbnzw($oop$$Register, *L);
13713   %}
13714   ins_pipe(pipe_cmp_branch);
13715 %}
13716 
13717 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
13718   match(If cmp (CmpU op1 op2));
13719   effect(USE labl);
13720 
13721   ins_cost(BRANCH_COST);
13722   format %{ "cbw$cmp   $op1, $labl" %}
13723   ins_encode %{
13724     Label* L = $labl$$label;
13725     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13726     if (cond == Assembler::EQ || cond == Assembler::LS)
13727       __ cbzw($op1$$Register, *L);
13728     else
13729       __ cbnzw($op1$$Register, *L);
13730   %}
13731   ins_pipe(pipe_cmp_branch);
13732 %}
13733 
13734 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
13735   match(If cmp (CmpUL op1 op2));
13736   effect(USE labl);
13737 
13738   ins_cost(BRANCH_COST);
13739   format %{ "cb$cmp   $op1, $labl" %}
13740   ins_encode %{
13741     Label* L = $labl$$label;
13742     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13743     if (cond == Assembler::EQ || cond == Assembler::LS)
13744       __ cbz($op1$$Register, *L);
13745     else
13746       __ cbnz($op1$$Register, *L);
13747   %}
13748   ins_pipe(pipe_cmp_branch);
13749 %}
13750 
13751 // Test bit and Branch
13752 
13753 // Patterns for short (< 32KiB) variants
13754 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
13755   match(If cmp (CmpL op1 op2));
13756   effect(USE labl);
13757 
13758   ins_cost(BRANCH_COST);
13759   format %{ "cb$cmp   $op1, $labl # long" %}
13760   ins_encode %{
13761     Label* L = $labl$$label;
13762     Assembler::Condition cond =
13763       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13764     __ tbr(cond, $op1$$Register, 63, *L);
13765   %}
13766   ins_pipe(pipe_cmp_branch);
13767   ins_short_branch(1);
13768 %}
13769 
13770 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13771   match(If cmp (CmpI op1 op2));
13772   effect(USE labl);
13773 
13774   ins_cost(BRANCH_COST);
13775   format %{ "cb$cmp   $op1, $labl # int" %}
13776   ins_encode %{
13777     Label* L = $labl$$label;
13778     Assembler::Condition cond =
13779       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13780     __ tbr(cond, $op1$$Register, 31, *L);
13781   %}
13782   ins_pipe(pipe_cmp_branch);
13783   ins_short_branch(1);
13784 %}
13785 
13786 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13787   match(If cmp (CmpL (AndL op1 op2) op3));
13788   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13789   effect(USE labl);
13790 
13791   ins_cost(BRANCH_COST);
13792   format %{ "tb$cmp   $op1, $op2, $labl" %}
13793   ins_encode %{
13794     Label* L = $labl$$label;
13795     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13796     int bit = exact_log2($op2$$constant);
13797     __ tbr(cond, $op1$$Register, bit, *L);
13798   %}
13799   ins_pipe(pipe_cmp_branch);
13800   ins_short_branch(1);
13801 %}
13802 
13803 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
13804   match(If cmp (CmpI (AndI op1 op2) op3));
13805   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
13806   effect(USE labl);
13807 
13808   ins_cost(BRANCH_COST);
13809   format %{ "tb$cmp   $op1, $op2, $labl" %}
13810   ins_encode %{
13811     Label* L = $labl$$label;
13812     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13813     int bit = exact_log2($op2$$constant);
13814     __ tbr(cond, $op1$$Register, bit, *L);
13815   %}
13816   ins_pipe(pipe_cmp_branch);
13817   ins_short_branch(1);
13818 %}
13819 
13820 // And far variants
13821 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
13822   match(If cmp (CmpL op1 op2));
13823   effect(USE labl);
13824 
13825   ins_cost(BRANCH_COST);
13826   format %{ "cb$cmp   $op1, $labl # long" %}
13827   ins_encode %{
13828     Label* L = $labl$$label;
13829     Assembler::Condition cond =
13830       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13831     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
13832   %}
13833   ins_pipe(pipe_cmp_branch);
13834 %}
13835 
13836 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13837   match(If cmp (CmpI op1 op2));
13838   effect(USE labl);
13839 
13840   ins_cost(BRANCH_COST);
13841   format %{ "cb$cmp   $op1, $labl # int" %}
13842   ins_encode %{
13843     Label* L = $labl$$label;
13844     Assembler::Condition cond =
13845       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13846     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
13847   %}
13848   ins_pipe(pipe_cmp_branch);
13849 %}
13850 
13851 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13852   match(If cmp (CmpL (AndL op1 op2) op3));
13853   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13854   effect(USE labl);
13855 
13856   ins_cost(BRANCH_COST);
13857   format %{ "tb$cmp   $op1, $op2, $labl" %}
13858   ins_encode %{
13859     Label* L = $labl$$label;
13860     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13861     int bit = exact_log2($op2$$constant);
13862     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
13863   %}
13864   ins_pipe(pipe_cmp_branch);
13865 %}
13866 
13867 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
13868   match(If cmp (CmpI (AndI op1 op2) op3));
13869   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
13870   effect(USE labl);
13871 
13872   ins_cost(BRANCH_COST);
13873   format %{ "tb$cmp   $op1, $op2, $labl" %}
13874   ins_encode %{
13875     Label* L = $labl$$label;
13876     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13877     int bit = exact_log2($op2$$constant);
13878     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
13879   %}
13880   ins_pipe(pipe_cmp_branch);
13881 %}
13882 
13883 // Test bits
13884 
13885 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
13886   match(Set cr (CmpL (AndL op1 op2) op3));
13887   predicate(Assembler::operand_valid_for_logical_immediate
13888             (/*is_32*/false, n->in(1)->in(2)->get_long()));
13889 
13890   ins_cost(INSN_COST);
13891   format %{ "tst $op1, $op2 # long" %}
13892   ins_encode %{
13893     __ tst($op1$$Register, $op2$$constant);
13894   %}
13895   ins_pipe(ialu_reg_reg);
13896 %}
13897 
13898 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
13899   match(Set cr (CmpI (AndI op1 op2) op3));
13900   predicate(Assembler::operand_valid_for_logical_immediate
13901             (/*is_32*/true, n->in(1)->in(2)->get_int()));
13902 
13903   ins_cost(INSN_COST);
13904   format %{ "tst $op1, $op2 # int" %}
13905   ins_encode %{
13906     __ tstw($op1$$Register, $op2$$constant);
13907   %}
13908   ins_pipe(ialu_reg_reg);
13909 %}
13910 
13911 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
13912   match(Set cr (CmpL (AndL op1 op2) op3));
13913 
13914   ins_cost(INSN_COST);
13915   format %{ "tst $op1, $op2 # long" %}
13916   ins_encode %{
13917     __ tst($op1$$Register, $op2$$Register);
13918   %}
13919   ins_pipe(ialu_reg_reg);
13920 %}
13921 
13922 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
13923   match(Set cr (CmpI (AndI op1 op2) op3));
13924 
13925   ins_cost(INSN_COST);
13926   format %{ "tstw $op1, $op2 # int" %}
13927   ins_encode %{
13928     __ tstw($op1$$Register, $op2$$Register);
13929   %}
13930   ins_pipe(ialu_reg_reg);
13931 %}
13932 
13933 
13934 // Conditional Far Branch
13935 // Conditional Far Branch Unsigned
13936 // TODO: fixme
13937 
13938 // counted loop end branch near
13939 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
13940 %{
13941   match(CountedLoopEnd cmp cr);
13942 
13943   effect(USE lbl);
13944 
13945   ins_cost(BRANCH_COST);
13946   // short variant.
13947   // ins_short_branch(1);
13948   format %{ "b$cmp $lbl \t// counted loop end" %}
13949 
13950   ins_encode(aarch64_enc_br_con(cmp, lbl));
13951 
13952   ins_pipe(pipe_branch);
13953 %}
13954 
13955 // counted loop end branch near Unsigned
13956 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13957 %{
13958   match(CountedLoopEnd cmp cr);
13959 
13960   effect(USE lbl);
13961 
13962   ins_cost(BRANCH_COST);
13963   // short variant.
13964   // ins_short_branch(1);
13965   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
13966 
13967   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13968 
13969   ins_pipe(pipe_branch);
13970 %}
13971 
13972 // counted loop end branch far
13973 // counted loop end branch far unsigned
13974 // TODO: fixme
13975 
13976 // ============================================================================
13977 // inlined locking and unlocking
13978 
13979 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13980 %{
13981   match(Set cr (FastLock object box));
13982   effect(TEMP tmp, TEMP tmp2);
13983 
13984   // TODO
13985   // identify correct cost
13986   ins_cost(5 * INSN_COST);
13987   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
13988 
13989   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
13990 
13991   ins_pipe(pipe_serial);
13992 %}
13993 
13994 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13995 %{
13996   match(Set cr (FastUnlock object box));
13997   effect(TEMP tmp, TEMP tmp2);
13998 
13999   ins_cost(5 * INSN_COST);
14000   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
14001 
14002   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
14003 
14004   ins_pipe(pipe_serial);
14005 %}
14006 
14007 
14008 // ============================================================================
14009 // Safepoint Instructions
14010 
14011 // TODO
14012 // provide a near and far version of this code
14013 
14014 instruct safePoint(rFlagsReg cr, iRegP poll)
14015 %{
14016   match(SafePoint poll);
14017   effect(KILL cr);
14018 
14019   format %{
14020     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
14021   %}
14022   ins_encode %{
14023     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
14024   %}
14025   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
14026 %}
14027 
14028 
14029 // ============================================================================
14030 // Procedure Call/Return Instructions
14031 
14032 // Call Java Static Instruction
14033 
14034 instruct CallStaticJavaDirect(method meth)
14035 %{
14036   match(CallStaticJava);
14037 
14038   effect(USE meth);
14039 
14040   ins_cost(CALL_COST);
14041 
14042   format %{ "call,static $meth \t// ==> " %}
14043 
14044   ins_encode( aarch64_enc_java_static_call(meth),
14045               aarch64_enc_call_epilog );
14046 
14047   ins_pipe(pipe_class_call);
14048 %}
14049 
14050 // TO HERE
14051 
14052 // Call Java Dynamic Instruction
14053 instruct CallDynamicJavaDirect(method meth)
14054 %{
14055   match(CallDynamicJava);
14056 
14057   effect(USE meth);
14058 
14059   ins_cost(CALL_COST);
14060 
14061   format %{ "CALL,dynamic $meth \t// ==> " %}
14062 
14063   ins_encode( aarch64_enc_java_dynamic_call(meth),
14064                aarch64_enc_call_epilog );
14065 
14066   ins_pipe(pipe_class_call);
14067 %}
14068 
14069 // Call Runtime Instruction
14070 
14071 instruct CallRuntimeDirect(method meth)
14072 %{
14073   match(CallRuntime);
14074 
14075   effect(USE meth);
14076 
14077   ins_cost(CALL_COST);
14078 
14079   format %{ "CALL, runtime $meth" %}
14080 
14081   ins_encode( aarch64_enc_java_to_runtime(meth) );
14082 
14083   ins_pipe(pipe_class_call);
14084 %}
14085 
14086 // Call Runtime Instruction
14087 
14088 instruct CallLeafDirect(method meth)
14089 %{
14090   match(CallLeaf);
14091 
14092   effect(USE meth);
14093 
14094   ins_cost(CALL_COST);
14095 
14096   format %{ "CALL, runtime leaf $meth" %}
14097 
14098   ins_encode( aarch64_enc_java_to_runtime(meth) );
14099 
14100   ins_pipe(pipe_class_call);
14101 %}
14102 
14103 // Call Runtime Instruction
14104 
14105 instruct CallLeafNoFPDirect(method meth)
14106 %{
14107   match(CallLeafNoFP);
14108 
14109   effect(USE meth);
14110 
14111   ins_cost(CALL_COST);
14112 
14113   format %{ "CALL, runtime leaf nofp $meth" %}
14114 
14115   ins_encode( aarch64_enc_java_to_runtime(meth) );
14116 
14117   ins_pipe(pipe_class_call);
14118 %}
14119 
14120 // Tail Call; Jump from runtime stub to Java code.
14121 // Also known as an 'interprocedural jump'.
14122 // Target of jump will eventually return to caller.
14123 // TailJump below removes the return address.
14124 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
14125 %{
14126   match(TailCall jump_target method_oop);
14127 
14128   ins_cost(CALL_COST);
14129 
14130   format %{ "br $jump_target\t# $method_oop holds method oop" %}
14131 
14132   ins_encode(aarch64_enc_tail_call(jump_target));
14133 
14134   ins_pipe(pipe_class_call);
14135 %}
14136 
14137 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
14138 %{
14139   match(TailJump jump_target ex_oop);
14140 
14141   ins_cost(CALL_COST);
14142 
14143   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
14144 
14145   ins_encode(aarch64_enc_tail_jmp(jump_target));
14146 
14147   ins_pipe(pipe_class_call);
14148 %}
14149 
14150 // Create exception oop: created by stack-crawling runtime code.
14151 // Created exception is now available to this handler, and is setup
14152 // just prior to jumping to this handler. No code emitted.
14153 // TODO check
14154 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
14155 instruct CreateException(iRegP_R0 ex_oop)
14156 %{
14157   match(Set ex_oop (CreateEx));
14158 
14159   format %{ " -- \t// exception oop; no code emitted" %}
14160 
14161   size(0);
14162 
14163   ins_encode( /*empty*/ );
14164 
14165   ins_pipe(pipe_class_empty);
14166 %}
14167 
14168 // Rethrow exception: The exception oop will come in the first
14169 // argument position. Then JUMP (not call) to the rethrow stub code.
14170 instruct RethrowException() %{
14171   match(Rethrow);
14172   ins_cost(CALL_COST);
14173 
14174   format %{ "b rethrow_stub" %}
14175 
14176   ins_encode( aarch64_enc_rethrow() );
14177 
14178   ins_pipe(pipe_class_call);
14179 %}
14180 
14181 
14182 // Return Instruction
14183 // epilog node loads ret address into lr as part of frame pop
14184 instruct Ret()
14185 %{
14186   match(Return);
14187 
14188   format %{ "ret\t// return register" %}
14189 
14190   ins_encode( aarch64_enc_ret() );
14191 
14192   ins_pipe(pipe_branch);
14193 %}
14194 
14195 // Die now.
14196 instruct ShouldNotReachHere() %{
14197   match(Halt);
14198 
14199   ins_cost(CALL_COST);
14200   format %{ "ShouldNotReachHere" %}
14201 
14202   ins_encode %{
14203     // +1 so NativeInstruction::is_sigill_zombie_not_entrant() doesn't
14204     // return true
14205     __ dpcs1(0xdead + 1);
14206   %}
14207 
14208   ins_pipe(pipe_class_default);
14209 %}
14210 
14211 // ============================================================================
14212 // Partial Subtype Check
14213 //
14214 // superklass array for an instance of the superklass.  Set a hidden
14215 // internal cache on a hit (cache is checked with exposed code in
14216 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
14217 // encoding ALSO sets flags.
14218 
14219 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
14220 %{
14221   match(Set result (PartialSubtypeCheck sub super));
14222   effect(KILL cr, KILL temp);
14223 
14224   ins_cost(1100);  // slightly larger than the next version
14225   format %{ "partialSubtypeCheck $result, $sub, $super" %}
14226 
14227   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14228 
14229   opcode(0x1); // Force zero of result reg on hit
14230 
14231   ins_pipe(pipe_class_memory);
14232 %}
14233 
14234 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
14235 %{
14236   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
14237   effect(KILL temp, KILL result);
14238 
14239   ins_cost(1100);  // slightly larger than the next version
14240   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
14241 
14242   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14243 
14244   opcode(0x0); // Don't zero result reg on hit
14245 
14246   ins_pipe(pipe_class_memory);
14247 %}
14248 
14249 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14250                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
14251 %{
14252   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14253   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14254   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14255 
14256   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14257   ins_encode %{
14258     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14259     __ string_compare($str1$$Register, $str2$$Register,
14260                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14261                       $tmp1$$Register, $tmp2$$Register,
14262                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU);
14263   %}
14264   ins_pipe(pipe_class_memory);
14265 %}
14266 
14267 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14268                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
14269 %{
14270   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14271   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14272   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14273 
14274   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14275   ins_encode %{
14276     __ string_compare($str1$$Register, $str2$$Register,
14277                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14278                       $tmp1$$Register, $tmp2$$Register,
14279                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL);
14280   %}
14281   ins_pipe(pipe_class_memory);
14282 %}
14283 
14284 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14285                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
14286                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
14287 %{
14288   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
14289   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14290   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
14291          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14292 
14293   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
14294   ins_encode %{
14295     __ string_compare($str1$$Register, $str2$$Register,
14296                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14297                       $tmp1$$Register, $tmp2$$Register,
14298                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
14299                       $vtmp3$$FloatRegister, StrIntrinsicNode::UL);
14300   %}
14301   ins_pipe(pipe_class_memory);
14302 %}
14303 
14304 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14305                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
14306                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
14307 %{
14308   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
14309   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14310   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
14311          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14312 
14313   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
14314   ins_encode %{
14315     __ string_compare($str1$$Register, $str2$$Register,
14316                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14317                       $tmp1$$Register, $tmp2$$Register,
14318                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
14319                       $vtmp3$$FloatRegister,StrIntrinsicNode::LU);
14320   %}
14321   ins_pipe(pipe_class_memory);
14322 %}
14323 
14324 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14325        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14326        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14327 %{
14328   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
14329   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14330   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14331          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14332   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
14333 
14334   ins_encode %{
14335     __ string_indexof($str1$$Register, $str2$$Register,
14336                       $cnt1$$Register, $cnt2$$Register,
14337                       $tmp1$$Register, $tmp2$$Register,
14338                       $tmp3$$Register, $tmp4$$Register,
14339                       $tmp5$$Register, $tmp6$$Register,
14340                       -1, $result$$Register, StrIntrinsicNode::UU);
14341   %}
14342   ins_pipe(pipe_class_memory);
14343 %}
14344 
14345 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14346        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14347        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14348 %{
14349   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
14350   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14351   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14352          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14353   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
14354 
14355   ins_encode %{
14356     __ string_indexof($str1$$Register, $str2$$Register,
14357                       $cnt1$$Register, $cnt2$$Register,
14358                       $tmp1$$Register, $tmp2$$Register,
14359                       $tmp3$$Register, $tmp4$$Register,
14360                       $tmp5$$Register, $tmp6$$Register,
14361                       -1, $result$$Register, StrIntrinsicNode::LL);
14362   %}
14363   ins_pipe(pipe_class_memory);
14364 %}
14365 
14366 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14367        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14368        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14369 %{
14370   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
14371   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14372   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14373          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14374   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
14375 
14376   ins_encode %{
14377     __ string_indexof($str1$$Register, $str2$$Register,
14378                       $cnt1$$Register, $cnt2$$Register,
14379                       $tmp1$$Register, $tmp2$$Register,
14380                       $tmp3$$Register, $tmp4$$Register,
14381                       $tmp5$$Register, $tmp6$$Register,
14382                       -1, $result$$Register, StrIntrinsicNode::UL);
14383   %}
14384   ins_pipe(pipe_class_memory);
14385 %}
14386 
14387 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14388                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14389                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14390 %{
14391   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
14392   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14393   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14394          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14395   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
14396 
14397   ins_encode %{
14398     int icnt2 = (int)$int_cnt2$$constant;
14399     __ string_indexof($str1$$Register, $str2$$Register,
14400                       $cnt1$$Register, zr,
14401                       $tmp1$$Register, $tmp2$$Register,
14402                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14403                       icnt2, $result$$Register, StrIntrinsicNode::UU);
14404   %}
14405   ins_pipe(pipe_class_memory);
14406 %}
14407 
14408 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14409                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14410                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14411 %{
14412   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
14413   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14414   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14415          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14416   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
14417 
14418   ins_encode %{
14419     int icnt2 = (int)$int_cnt2$$constant;
14420     __ string_indexof($str1$$Register, $str2$$Register,
14421                       $cnt1$$Register, zr,
14422                       $tmp1$$Register, $tmp2$$Register,
14423                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14424                       icnt2, $result$$Register, StrIntrinsicNode::LL);
14425   %}
14426   ins_pipe(pipe_class_memory);
14427 %}
14428 
14429 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14430                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14431                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14432 %{
14433   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
14434   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14435   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14436          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14437   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
14438 
14439   ins_encode %{
14440     int icnt2 = (int)$int_cnt2$$constant;
14441     __ string_indexof($str1$$Register, $str2$$Register,
14442                       $cnt1$$Register, zr,
14443                       $tmp1$$Register, $tmp2$$Register,
14444                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14445                       icnt2, $result$$Register, StrIntrinsicNode::UL);
14446   %}
14447   ins_pipe(pipe_class_memory);
14448 %}
14449 
14450 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
14451                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14452                               iRegINoSp tmp3, rFlagsReg cr)
14453 %{
14454   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
14455   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
14456          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
14457 
14458   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
14459 
14460   ins_encode %{
14461     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
14462                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
14463                            $tmp3$$Register);
14464   %}
14465   ins_pipe(pipe_class_memory);
14466 %}
14467 
14468 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
14469                         iRegI_R0 result, rFlagsReg cr)
14470 %{
14471   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
14472   match(Set result (StrEquals (Binary str1 str2) cnt));
14473   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
14474 
14475   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
14476   ins_encode %{
14477     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14478     __ string_equals($str1$$Register, $str2$$Register,
14479                      $result$$Register, $cnt$$Register, 1);
14480   %}
14481   ins_pipe(pipe_class_memory);
14482 %}
14483 
14484 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
14485                         iRegI_R0 result, rFlagsReg cr)
14486 %{
14487   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
14488   match(Set result (StrEquals (Binary str1 str2) cnt));
14489   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
14490 
14491   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
14492   ins_encode %{
14493     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14494     __ string_equals($str1$$Register, $str2$$Register,
14495                      $result$$Register, $cnt$$Register, 2);
14496   %}
14497   ins_pipe(pipe_class_memory);
14498 %}
14499 
14500 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
14501                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
14502                        iRegP_R10 tmp, rFlagsReg cr)
14503 %{
14504   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
14505   match(Set result (AryEq ary1 ary2));
14506   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
14507 
14508   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
14509   ins_encode %{
14510     __ arrays_equals($ary1$$Register, $ary2$$Register,
14511                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
14512                      $result$$Register, $tmp$$Register, 1);
14513     %}
14514   ins_pipe(pipe_class_memory);
14515 %}
14516 
14517 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
14518                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
14519                        iRegP_R10 tmp, rFlagsReg cr)
14520 %{
14521   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
14522   match(Set result (AryEq ary1 ary2));
14523   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
14524 
14525   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
14526   ins_encode %{
14527     __ arrays_equals($ary1$$Register, $ary2$$Register,
14528                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
14529                      $result$$Register, $tmp$$Register, 2);
14530   %}
14531   ins_pipe(pipe_class_memory);
14532 %}
14533 
14534 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
14535 %{
14536   match(Set result (HasNegatives ary1 len));
14537   effect(USE_KILL ary1, USE_KILL len, KILL cr);
14538   format %{ "has negatives byte[] $ary1,$len -> $result" %}
14539   ins_encode %{
14540     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
14541   %}
14542   ins_pipe( pipe_slow );
14543 %}
14544 
14545 // fast char[] to byte[] compression
14546 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
14547                          vRegD_V0 tmp1, vRegD_V1 tmp2,
14548                          vRegD_V2 tmp3, vRegD_V3 tmp4,
14549                          iRegI_R0 result, rFlagsReg cr)
14550 %{
14551   match(Set result (StrCompressedCopy src (Binary dst len)));
14552   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
14553 
14554   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
14555   ins_encode %{
14556     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
14557                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
14558                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
14559                            $result$$Register);
14560   %}
14561   ins_pipe( pipe_slow );
14562 %}
14563 
14564 // fast byte[] to char[] inflation
14565 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
14566                         vRegD_V0 tmp1, vRegD_V1 tmp2, vRegD_V2 tmp3, iRegP_R3 tmp4, rFlagsReg cr)
14567 %{
14568   match(Set dummy (StrInflatedCopy src (Binary dst len)));
14569   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
14570 
14571   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
14572   ins_encode %{
14573     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
14574                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
14575   %}
14576   ins_pipe(pipe_class_memory);
14577 %}
14578 
14579 // encode char[] to byte[] in ISO_8859_1
14580 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
14581                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
14582                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
14583                           iRegI_R0 result, rFlagsReg cr)
14584 %{
14585   match(Set result (EncodeISOArray src (Binary dst len)));
14586   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
14587          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
14588 
14589   format %{ "Encode array $src,$dst,$len -> $result" %}
14590   ins_encode %{
14591     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
14592          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
14593          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
14594   %}
14595   ins_pipe( pipe_class_memory );
14596 %}
14597 
14598 // ============================================================================
14599 // This name is KNOWN by the ADLC and cannot be changed.
14600 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
14601 // for this guy.
14602 instruct tlsLoadP(thread_RegP dst)
14603 %{
14604   match(Set dst (ThreadLocal));
14605 
14606   ins_cost(0);
14607 
14608   format %{ " -- \t// $dst=Thread::current(), empty" %}
14609 
14610   size(0);
14611 
14612   ins_encode( /*empty*/ );
14613 
14614   ins_pipe(pipe_class_empty);
14615 %}
14616 
14617 // ====================VECTOR INSTRUCTIONS=====================================
14618 
14619 // Load vector (32 bits)
14620 instruct loadV4(vecD dst, vmem4 mem)
14621 %{
14622   predicate(n->as_LoadVector()->memory_size() == 4);
14623   match(Set dst (LoadVector mem));
14624   ins_cost(4 * INSN_COST);
14625   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
14626   ins_encode( aarch64_enc_ldrvS(dst, mem) );
14627   ins_pipe(vload_reg_mem64);
14628 %}
14629 
14630 // Load vector (64 bits)
14631 instruct loadV8(vecD dst, vmem8 mem)
14632 %{
14633   predicate(n->as_LoadVector()->memory_size() == 8);
14634   match(Set dst (LoadVector mem));
14635   ins_cost(4 * INSN_COST);
14636   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
14637   ins_encode( aarch64_enc_ldrvD(dst, mem) );
14638   ins_pipe(vload_reg_mem64);
14639 %}
14640 
14641 // Load Vector (128 bits)
14642 instruct loadV16(vecX dst, vmem16 mem)
14643 %{
14644   predicate(n->as_LoadVector()->memory_size() == 16);
14645   match(Set dst (LoadVector mem));
14646   ins_cost(4 * INSN_COST);
14647   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
14648   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
14649   ins_pipe(vload_reg_mem128);
14650 %}
14651 
14652 // Store Vector (32 bits)
14653 instruct storeV4(vecD src, vmem4 mem)
14654 %{
14655   predicate(n->as_StoreVector()->memory_size() == 4);
14656   match(Set mem (StoreVector mem src));
14657   ins_cost(4 * INSN_COST);
14658   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
14659   ins_encode( aarch64_enc_strvS(src, mem) );
14660   ins_pipe(vstore_reg_mem64);
14661 %}
14662 
14663 // Store Vector (64 bits)
14664 instruct storeV8(vecD src, vmem8 mem)
14665 %{
14666   predicate(n->as_StoreVector()->memory_size() == 8);
14667   match(Set mem (StoreVector mem src));
14668   ins_cost(4 * INSN_COST);
14669   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
14670   ins_encode( aarch64_enc_strvD(src, mem) );
14671   ins_pipe(vstore_reg_mem64);
14672 %}
14673 
14674 // Store Vector (128 bits)
14675 instruct storeV16(vecX src, vmem16 mem)
14676 %{
14677   predicate(n->as_StoreVector()->memory_size() == 16);
14678   match(Set mem (StoreVector mem src));
14679   ins_cost(4 * INSN_COST);
14680   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
14681   ins_encode( aarch64_enc_strvQ(src, mem) );
14682   ins_pipe(vstore_reg_mem128);
14683 %}
14684 
14685 instruct replicate8B(vecD dst, iRegIorL2I src)
14686 %{
14687   predicate(n->as_Vector()->length() == 4 ||
14688             n->as_Vector()->length() == 8);
14689   match(Set dst (ReplicateB src));
14690   ins_cost(INSN_COST);
14691   format %{ "dup  $dst, $src\t# vector (8B)" %}
14692   ins_encode %{
14693     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
14694   %}
14695   ins_pipe(vdup_reg_reg64);
14696 %}
14697 
14698 instruct replicate16B(vecX dst, iRegIorL2I src)
14699 %{
14700   predicate(n->as_Vector()->length() == 16);
14701   match(Set dst (ReplicateB src));
14702   ins_cost(INSN_COST);
14703   format %{ "dup  $dst, $src\t# vector (16B)" %}
14704   ins_encode %{
14705     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
14706   %}
14707   ins_pipe(vdup_reg_reg128);
14708 %}
14709 
14710 instruct replicate8B_imm(vecD dst, immI con)
14711 %{
14712   predicate(n->as_Vector()->length() == 4 ||
14713             n->as_Vector()->length() == 8);
14714   match(Set dst (ReplicateB con));
14715   ins_cost(INSN_COST);
14716   format %{ "movi  $dst, $con\t# vector(8B)" %}
14717   ins_encode %{
14718     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
14719   %}
14720   ins_pipe(vmovi_reg_imm64);
14721 %}
14722 
14723 instruct replicate16B_imm(vecX dst, immI con)
14724 %{
14725   predicate(n->as_Vector()->length() == 16);
14726   match(Set dst (ReplicateB con));
14727   ins_cost(INSN_COST);
14728   format %{ "movi  $dst, $con\t# vector(16B)" %}
14729   ins_encode %{
14730     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
14731   %}
14732   ins_pipe(vmovi_reg_imm128);
14733 %}
14734 
14735 instruct replicate4S(vecD dst, iRegIorL2I src)
14736 %{
14737   predicate(n->as_Vector()->length() == 2 ||
14738             n->as_Vector()->length() == 4);
14739   match(Set dst (ReplicateS src));
14740   ins_cost(INSN_COST);
14741   format %{ "dup  $dst, $src\t# vector (4S)" %}
14742   ins_encode %{
14743     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
14744   %}
14745   ins_pipe(vdup_reg_reg64);
14746 %}
14747 
14748 instruct replicate8S(vecX dst, iRegIorL2I src)
14749 %{
14750   predicate(n->as_Vector()->length() == 8);
14751   match(Set dst (ReplicateS src));
14752   ins_cost(INSN_COST);
14753   format %{ "dup  $dst, $src\t# vector (8S)" %}
14754   ins_encode %{
14755     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
14756   %}
14757   ins_pipe(vdup_reg_reg128);
14758 %}
14759 
14760 instruct replicate4S_imm(vecD dst, immI con)
14761 %{
14762   predicate(n->as_Vector()->length() == 2 ||
14763             n->as_Vector()->length() == 4);
14764   match(Set dst (ReplicateS con));
14765   ins_cost(INSN_COST);
14766   format %{ "movi  $dst, $con\t# vector(4H)" %}
14767   ins_encode %{
14768     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
14769   %}
14770   ins_pipe(vmovi_reg_imm64);
14771 %}
14772 
14773 instruct replicate8S_imm(vecX dst, immI con)
14774 %{
14775   predicate(n->as_Vector()->length() == 8);
14776   match(Set dst (ReplicateS con));
14777   ins_cost(INSN_COST);
14778   format %{ "movi  $dst, $con\t# vector(8H)" %}
14779   ins_encode %{
14780     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
14781   %}
14782   ins_pipe(vmovi_reg_imm128);
14783 %}
14784 
14785 instruct replicate2I(vecD dst, iRegIorL2I src)
14786 %{
14787   predicate(n->as_Vector()->length() == 2);
14788   match(Set dst (ReplicateI src));
14789   ins_cost(INSN_COST);
14790   format %{ "dup  $dst, $src\t# vector (2I)" %}
14791   ins_encode %{
14792     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
14793   %}
14794   ins_pipe(vdup_reg_reg64);
14795 %}
14796 
14797 instruct replicate4I(vecX dst, iRegIorL2I src)
14798 %{
14799   predicate(n->as_Vector()->length() == 4);
14800   match(Set dst (ReplicateI src));
14801   ins_cost(INSN_COST);
14802   format %{ "dup  $dst, $src\t# vector (4I)" %}
14803   ins_encode %{
14804     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
14805   %}
14806   ins_pipe(vdup_reg_reg128);
14807 %}
14808 
14809 instruct replicate2I_imm(vecD dst, immI con)
14810 %{
14811   predicate(n->as_Vector()->length() == 2);
14812   match(Set dst (ReplicateI con));
14813   ins_cost(INSN_COST);
14814   format %{ "movi  $dst, $con\t# vector(2I)" %}
14815   ins_encode %{
14816     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
14817   %}
14818   ins_pipe(vmovi_reg_imm64);
14819 %}
14820 
14821 instruct replicate4I_imm(vecX dst, immI con)
14822 %{
14823   predicate(n->as_Vector()->length() == 4);
14824   match(Set dst (ReplicateI con));
14825   ins_cost(INSN_COST);
14826   format %{ "movi  $dst, $con\t# vector(4I)" %}
14827   ins_encode %{
14828     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
14829   %}
14830   ins_pipe(vmovi_reg_imm128);
14831 %}
14832 
14833 instruct replicate2L(vecX dst, iRegL src)
14834 %{
14835   predicate(n->as_Vector()->length() == 2);
14836   match(Set dst (ReplicateL src));
14837   ins_cost(INSN_COST);
14838   format %{ "dup  $dst, $src\t# vector (2L)" %}
14839   ins_encode %{
14840     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
14841   %}
14842   ins_pipe(vdup_reg_reg128);
14843 %}
14844 
14845 instruct replicate2L_zero(vecX dst, immI0 zero)
14846 %{
14847   predicate(n->as_Vector()->length() == 2);
14848   match(Set dst (ReplicateI zero));
14849   ins_cost(INSN_COST);
14850   format %{ "movi  $dst, $zero\t# vector(4I)" %}
14851   ins_encode %{
14852     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14853            as_FloatRegister($dst$$reg),
14854            as_FloatRegister($dst$$reg));
14855   %}
14856   ins_pipe(vmovi_reg_imm128);
14857 %}
14858 
14859 instruct replicate2F(vecD dst, vRegF src)
14860 %{
14861   predicate(n->as_Vector()->length() == 2);
14862   match(Set dst (ReplicateF src));
14863   ins_cost(INSN_COST);
14864   format %{ "dup  $dst, $src\t# vector (2F)" %}
14865   ins_encode %{
14866     __ dup(as_FloatRegister($dst$$reg), __ T2S,
14867            as_FloatRegister($src$$reg));
14868   %}
14869   ins_pipe(vdup_reg_freg64);
14870 %}
14871 
14872 instruct replicate4F(vecX dst, vRegF src)
14873 %{
14874   predicate(n->as_Vector()->length() == 4);
14875   match(Set dst (ReplicateF src));
14876   ins_cost(INSN_COST);
14877   format %{ "dup  $dst, $src\t# vector (4F)" %}
14878   ins_encode %{
14879     __ dup(as_FloatRegister($dst$$reg), __ T4S,
14880            as_FloatRegister($src$$reg));
14881   %}
14882   ins_pipe(vdup_reg_freg128);
14883 %}
14884 
14885 instruct replicate2D(vecX dst, vRegD src)
14886 %{
14887   predicate(n->as_Vector()->length() == 2);
14888   match(Set dst (ReplicateD src));
14889   ins_cost(INSN_COST);
14890   format %{ "dup  $dst, $src\t# vector (2D)" %}
14891   ins_encode %{
14892     __ dup(as_FloatRegister($dst$$reg), __ T2D,
14893            as_FloatRegister($src$$reg));
14894   %}
14895   ins_pipe(vdup_reg_dreg128);
14896 %}
14897 
14898 // ====================REDUCTION ARITHMETIC====================================
14899 
14900 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
14901 %{
14902   match(Set dst (AddReductionVI src1 src2));
14903   ins_cost(INSN_COST);
14904   effect(TEMP tmp, TEMP tmp2);
14905   format %{ "umov  $tmp, $src2, S, 0\n\t"
14906             "umov  $tmp2, $src2, S, 1\n\t"
14907             "addw  $dst, $src1, $tmp\n\t"
14908             "addw  $dst, $dst, $tmp2\t add reduction2i"
14909   %}
14910   ins_encode %{
14911     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
14912     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
14913     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
14914     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
14915   %}
14916   ins_pipe(pipe_class_default);
14917 %}
14918 
14919 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
14920 %{
14921   match(Set dst (AddReductionVI src1 src2));
14922   ins_cost(INSN_COST);
14923   effect(TEMP tmp, TEMP tmp2);
14924   format %{ "addv  $tmp, T4S, $src2\n\t"
14925             "umov  $tmp2, $tmp, S, 0\n\t"
14926             "addw  $dst, $tmp2, $src1\t add reduction4i"
14927   %}
14928   ins_encode %{
14929     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
14930             as_FloatRegister($src2$$reg));
14931     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
14932     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
14933   %}
14934   ins_pipe(pipe_class_default);
14935 %}
14936 
14937 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
14938 %{
14939   match(Set dst (MulReductionVI src1 src2));
14940   ins_cost(INSN_COST);
14941   effect(TEMP tmp, TEMP dst);
14942   format %{ "umov  $tmp, $src2, S, 0\n\t"
14943             "mul   $dst, $tmp, $src1\n\t"
14944             "umov  $tmp, $src2, S, 1\n\t"
14945             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
14946   %}
14947   ins_encode %{
14948     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
14949     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
14950     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
14951     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
14952   %}
14953   ins_pipe(pipe_class_default);
14954 %}
14955 
14956 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
14957 %{
14958   match(Set dst (MulReductionVI src1 src2));
14959   ins_cost(INSN_COST);
14960   effect(TEMP tmp, TEMP tmp2, TEMP dst);
14961   format %{ "ins   $tmp, $src2, 0, 1\n\t"
14962             "mul   $tmp, $tmp, $src2\n\t"
14963             "umov  $tmp2, $tmp, S, 0\n\t"
14964             "mul   $dst, $tmp2, $src1\n\t"
14965             "umov  $tmp2, $tmp, S, 1\n\t"
14966             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
14967   %}
14968   ins_encode %{
14969     __ ins(as_FloatRegister($tmp$$reg), __ D,
14970            as_FloatRegister($src2$$reg), 0, 1);
14971     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
14972            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
14973     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
14974     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
14975     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
14976     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
14977   %}
14978   ins_pipe(pipe_class_default);
14979 %}
14980 
14981 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
14982 %{
14983   match(Set dst (AddReductionVF src1 src2));
14984   ins_cost(INSN_COST);
14985   effect(TEMP tmp, TEMP dst);
14986   format %{ "fadds $dst, $src1, $src2\n\t"
14987             "ins   $tmp, S, $src2, 0, 1\n\t"
14988             "fadds $dst, $dst, $tmp\t add reduction2f"
14989   %}
14990   ins_encode %{
14991     __ fadds(as_FloatRegister($dst$$reg),
14992              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14993     __ ins(as_FloatRegister($tmp$$reg), __ S,
14994            as_FloatRegister($src2$$reg), 0, 1);
14995     __ fadds(as_FloatRegister($dst$$reg),
14996              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14997   %}
14998   ins_pipe(pipe_class_default);
14999 %}
15000 
15001 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15002 %{
15003   match(Set dst (AddReductionVF src1 src2));
15004   ins_cost(INSN_COST);
15005   effect(TEMP tmp, TEMP dst);
15006   format %{ "fadds $dst, $src1, $src2\n\t"
15007             "ins   $tmp, S, $src2, 0, 1\n\t"
15008             "fadds $dst, $dst, $tmp\n\t"
15009             "ins   $tmp, S, $src2, 0, 2\n\t"
15010             "fadds $dst, $dst, $tmp\n\t"
15011             "ins   $tmp, S, $src2, 0, 3\n\t"
15012             "fadds $dst, $dst, $tmp\t add reduction4f"
15013   %}
15014   ins_encode %{
15015     __ fadds(as_FloatRegister($dst$$reg),
15016              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15017     __ ins(as_FloatRegister($tmp$$reg), __ S,
15018            as_FloatRegister($src2$$reg), 0, 1);
15019     __ fadds(as_FloatRegister($dst$$reg),
15020              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15021     __ ins(as_FloatRegister($tmp$$reg), __ S,
15022            as_FloatRegister($src2$$reg), 0, 2);
15023     __ fadds(as_FloatRegister($dst$$reg),
15024              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15025     __ ins(as_FloatRegister($tmp$$reg), __ S,
15026            as_FloatRegister($src2$$reg), 0, 3);
15027     __ fadds(as_FloatRegister($dst$$reg),
15028              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15029   %}
15030   ins_pipe(pipe_class_default);
15031 %}
15032 
15033 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
15034 %{
15035   match(Set dst (MulReductionVF src1 src2));
15036   ins_cost(INSN_COST);
15037   effect(TEMP tmp, TEMP dst);
15038   format %{ "fmuls $dst, $src1, $src2\n\t"
15039             "ins   $tmp, S, $src2, 0, 1\n\t"
15040             "fmuls $dst, $dst, $tmp\t add reduction4f"
15041   %}
15042   ins_encode %{
15043     __ fmuls(as_FloatRegister($dst$$reg),
15044              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15045     __ ins(as_FloatRegister($tmp$$reg), __ S,
15046            as_FloatRegister($src2$$reg), 0, 1);
15047     __ fmuls(as_FloatRegister($dst$$reg),
15048              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15049   %}
15050   ins_pipe(pipe_class_default);
15051 %}
15052 
15053 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15054 %{
15055   match(Set dst (MulReductionVF src1 src2));
15056   ins_cost(INSN_COST);
15057   effect(TEMP tmp, TEMP dst);
15058   format %{ "fmuls $dst, $src1, $src2\n\t"
15059             "ins   $tmp, S, $src2, 0, 1\n\t"
15060             "fmuls $dst, $dst, $tmp\n\t"
15061             "ins   $tmp, S, $src2, 0, 2\n\t"
15062             "fmuls $dst, $dst, $tmp\n\t"
15063             "ins   $tmp, S, $src2, 0, 3\n\t"
15064             "fmuls $dst, $dst, $tmp\t add reduction4f"
15065   %}
15066   ins_encode %{
15067     __ fmuls(as_FloatRegister($dst$$reg),
15068              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15069     __ ins(as_FloatRegister($tmp$$reg), __ S,
15070            as_FloatRegister($src2$$reg), 0, 1);
15071     __ fmuls(as_FloatRegister($dst$$reg),
15072              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15073     __ ins(as_FloatRegister($tmp$$reg), __ S,
15074            as_FloatRegister($src2$$reg), 0, 2);
15075     __ fmuls(as_FloatRegister($dst$$reg),
15076              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15077     __ ins(as_FloatRegister($tmp$$reg), __ S,
15078            as_FloatRegister($src2$$reg), 0, 3);
15079     __ fmuls(as_FloatRegister($dst$$reg),
15080              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15081   %}
15082   ins_pipe(pipe_class_default);
15083 %}
15084 
15085 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15086 %{
15087   match(Set dst (AddReductionVD src1 src2));
15088   ins_cost(INSN_COST);
15089   effect(TEMP tmp, TEMP dst);
15090   format %{ "faddd $dst, $src1, $src2\n\t"
15091             "ins   $tmp, D, $src2, 0, 1\n\t"
15092             "faddd $dst, $dst, $tmp\t add reduction2d"
15093   %}
15094   ins_encode %{
15095     __ faddd(as_FloatRegister($dst$$reg),
15096              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15097     __ ins(as_FloatRegister($tmp$$reg), __ D,
15098            as_FloatRegister($src2$$reg), 0, 1);
15099     __ faddd(as_FloatRegister($dst$$reg),
15100              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15101   %}
15102   ins_pipe(pipe_class_default);
15103 %}
15104 
15105 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15106 %{
15107   match(Set dst (MulReductionVD src1 src2));
15108   ins_cost(INSN_COST);
15109   effect(TEMP tmp, TEMP dst);
15110   format %{ "fmuld $dst, $src1, $src2\n\t"
15111             "ins   $tmp, D, $src2, 0, 1\n\t"
15112             "fmuld $dst, $dst, $tmp\t add reduction2d"
15113   %}
15114   ins_encode %{
15115     __ fmuld(as_FloatRegister($dst$$reg),
15116              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15117     __ ins(as_FloatRegister($tmp$$reg), __ D,
15118            as_FloatRegister($src2$$reg), 0, 1);
15119     __ fmuld(as_FloatRegister($dst$$reg),
15120              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15121   %}
15122   ins_pipe(pipe_class_default);
15123 %}
15124 
15125 // ====================VECTOR ARITHMETIC=======================================
15126 
15127 // --------------------------------- ADD --------------------------------------
15128 
15129 instruct vadd8B(vecD dst, vecD src1, vecD src2)
15130 %{
15131   predicate(n->as_Vector()->length() == 4 ||
15132             n->as_Vector()->length() == 8);
15133   match(Set dst (AddVB src1 src2));
15134   ins_cost(INSN_COST);
15135   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
15136   ins_encode %{
15137     __ addv(as_FloatRegister($dst$$reg), __ T8B,
15138             as_FloatRegister($src1$$reg),
15139             as_FloatRegister($src2$$reg));
15140   %}
15141   ins_pipe(vdop64);
15142 %}
15143 
15144 instruct vadd16B(vecX dst, vecX src1, vecX src2)
15145 %{
15146   predicate(n->as_Vector()->length() == 16);
15147   match(Set dst (AddVB src1 src2));
15148   ins_cost(INSN_COST);
15149   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
15150   ins_encode %{
15151     __ addv(as_FloatRegister($dst$$reg), __ T16B,
15152             as_FloatRegister($src1$$reg),
15153             as_FloatRegister($src2$$reg));
15154   %}
15155   ins_pipe(vdop128);
15156 %}
15157 
15158 instruct vadd4S(vecD dst, vecD src1, vecD src2)
15159 %{
15160   predicate(n->as_Vector()->length() == 2 ||
15161             n->as_Vector()->length() == 4);
15162   match(Set dst (AddVS src1 src2));
15163   ins_cost(INSN_COST);
15164   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
15165   ins_encode %{
15166     __ addv(as_FloatRegister($dst$$reg), __ T4H,
15167             as_FloatRegister($src1$$reg),
15168             as_FloatRegister($src2$$reg));
15169   %}
15170   ins_pipe(vdop64);
15171 %}
15172 
15173 instruct vadd8S(vecX dst, vecX src1, vecX src2)
15174 %{
15175   predicate(n->as_Vector()->length() == 8);
15176   match(Set dst (AddVS src1 src2));
15177   ins_cost(INSN_COST);
15178   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
15179   ins_encode %{
15180     __ addv(as_FloatRegister($dst$$reg), __ T8H,
15181             as_FloatRegister($src1$$reg),
15182             as_FloatRegister($src2$$reg));
15183   %}
15184   ins_pipe(vdop128);
15185 %}
15186 
15187 instruct vadd2I(vecD dst, vecD src1, vecD src2)
15188 %{
15189   predicate(n->as_Vector()->length() == 2);
15190   match(Set dst (AddVI src1 src2));
15191   ins_cost(INSN_COST);
15192   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
15193   ins_encode %{
15194     __ addv(as_FloatRegister($dst$$reg), __ T2S,
15195             as_FloatRegister($src1$$reg),
15196             as_FloatRegister($src2$$reg));
15197   %}
15198   ins_pipe(vdop64);
15199 %}
15200 
15201 instruct vadd4I(vecX dst, vecX src1, vecX src2)
15202 %{
15203   predicate(n->as_Vector()->length() == 4);
15204   match(Set dst (AddVI src1 src2));
15205   ins_cost(INSN_COST);
15206   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
15207   ins_encode %{
15208     __ addv(as_FloatRegister($dst$$reg), __ T4S,
15209             as_FloatRegister($src1$$reg),
15210             as_FloatRegister($src2$$reg));
15211   %}
15212   ins_pipe(vdop128);
15213 %}
15214 
15215 instruct vadd2L(vecX dst, vecX src1, vecX src2)
15216 %{
15217   predicate(n->as_Vector()->length() == 2);
15218   match(Set dst (AddVL src1 src2));
15219   ins_cost(INSN_COST);
15220   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
15221   ins_encode %{
15222     __ addv(as_FloatRegister($dst$$reg), __ T2D,
15223             as_FloatRegister($src1$$reg),
15224             as_FloatRegister($src2$$reg));
15225   %}
15226   ins_pipe(vdop128);
15227 %}
15228 
15229 instruct vadd2F(vecD dst, vecD src1, vecD src2)
15230 %{
15231   predicate(n->as_Vector()->length() == 2);
15232   match(Set dst (AddVF src1 src2));
15233   ins_cost(INSN_COST);
15234   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
15235   ins_encode %{
15236     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
15237             as_FloatRegister($src1$$reg),
15238             as_FloatRegister($src2$$reg));
15239   %}
15240   ins_pipe(vdop_fp64);
15241 %}
15242 
15243 instruct vadd4F(vecX dst, vecX src1, vecX src2)
15244 %{
15245   predicate(n->as_Vector()->length() == 4);
15246   match(Set dst (AddVF src1 src2));
15247   ins_cost(INSN_COST);
15248   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
15249   ins_encode %{
15250     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
15251             as_FloatRegister($src1$$reg),
15252             as_FloatRegister($src2$$reg));
15253   %}
15254   ins_pipe(vdop_fp128);
15255 %}
15256 
15257 instruct vadd2D(vecX dst, vecX src1, vecX src2)
15258 %{
15259   match(Set dst (AddVD src1 src2));
15260   ins_cost(INSN_COST);
15261   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
15262   ins_encode %{
15263     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
15264             as_FloatRegister($src1$$reg),
15265             as_FloatRegister($src2$$reg));
15266   %}
15267   ins_pipe(vdop_fp128);
15268 %}
15269 
15270 // --------------------------------- SUB --------------------------------------
15271 
15272 instruct vsub8B(vecD dst, vecD src1, vecD src2)
15273 %{
15274   predicate(n->as_Vector()->length() == 4 ||
15275             n->as_Vector()->length() == 8);
15276   match(Set dst (SubVB src1 src2));
15277   ins_cost(INSN_COST);
15278   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
15279   ins_encode %{
15280     __ subv(as_FloatRegister($dst$$reg), __ T8B,
15281             as_FloatRegister($src1$$reg),
15282             as_FloatRegister($src2$$reg));
15283   %}
15284   ins_pipe(vdop64);
15285 %}
15286 
15287 instruct vsub16B(vecX dst, vecX src1, vecX src2)
15288 %{
15289   predicate(n->as_Vector()->length() == 16);
15290   match(Set dst (SubVB src1 src2));
15291   ins_cost(INSN_COST);
15292   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
15293   ins_encode %{
15294     __ subv(as_FloatRegister($dst$$reg), __ T16B,
15295             as_FloatRegister($src1$$reg),
15296             as_FloatRegister($src2$$reg));
15297   %}
15298   ins_pipe(vdop128);
15299 %}
15300 
15301 instruct vsub4S(vecD dst, vecD src1, vecD src2)
15302 %{
15303   predicate(n->as_Vector()->length() == 2 ||
15304             n->as_Vector()->length() == 4);
15305   match(Set dst (SubVS src1 src2));
15306   ins_cost(INSN_COST);
15307   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
15308   ins_encode %{
15309     __ subv(as_FloatRegister($dst$$reg), __ T4H,
15310             as_FloatRegister($src1$$reg),
15311             as_FloatRegister($src2$$reg));
15312   %}
15313   ins_pipe(vdop64);
15314 %}
15315 
15316 instruct vsub8S(vecX dst, vecX src1, vecX src2)
15317 %{
15318   predicate(n->as_Vector()->length() == 8);
15319   match(Set dst (SubVS src1 src2));
15320   ins_cost(INSN_COST);
15321   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
15322   ins_encode %{
15323     __ subv(as_FloatRegister($dst$$reg), __ T8H,
15324             as_FloatRegister($src1$$reg),
15325             as_FloatRegister($src2$$reg));
15326   %}
15327   ins_pipe(vdop128);
15328 %}
15329 
15330 instruct vsub2I(vecD dst, vecD src1, vecD src2)
15331 %{
15332   predicate(n->as_Vector()->length() == 2);
15333   match(Set dst (SubVI src1 src2));
15334   ins_cost(INSN_COST);
15335   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
15336   ins_encode %{
15337     __ subv(as_FloatRegister($dst$$reg), __ T2S,
15338             as_FloatRegister($src1$$reg),
15339             as_FloatRegister($src2$$reg));
15340   %}
15341   ins_pipe(vdop64);
15342 %}
15343 
15344 instruct vsub4I(vecX dst, vecX src1, vecX src2)
15345 %{
15346   predicate(n->as_Vector()->length() == 4);
15347   match(Set dst (SubVI src1 src2));
15348   ins_cost(INSN_COST);
15349   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
15350   ins_encode %{
15351     __ subv(as_FloatRegister($dst$$reg), __ T4S,
15352             as_FloatRegister($src1$$reg),
15353             as_FloatRegister($src2$$reg));
15354   %}
15355   ins_pipe(vdop128);
15356 %}
15357 
15358 instruct vsub2L(vecX dst, vecX src1, vecX src2)
15359 %{
15360   predicate(n->as_Vector()->length() == 2);
15361   match(Set dst (SubVL src1 src2));
15362   ins_cost(INSN_COST);
15363   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
15364   ins_encode %{
15365     __ subv(as_FloatRegister($dst$$reg), __ T2D,
15366             as_FloatRegister($src1$$reg),
15367             as_FloatRegister($src2$$reg));
15368   %}
15369   ins_pipe(vdop128);
15370 %}
15371 
15372 instruct vsub2F(vecD dst, vecD src1, vecD src2)
15373 %{
15374   predicate(n->as_Vector()->length() == 2);
15375   match(Set dst (SubVF src1 src2));
15376   ins_cost(INSN_COST);
15377   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
15378   ins_encode %{
15379     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
15380             as_FloatRegister($src1$$reg),
15381             as_FloatRegister($src2$$reg));
15382   %}
15383   ins_pipe(vdop_fp64);
15384 %}
15385 
15386 instruct vsub4F(vecX dst, vecX src1, vecX src2)
15387 %{
15388   predicate(n->as_Vector()->length() == 4);
15389   match(Set dst (SubVF src1 src2));
15390   ins_cost(INSN_COST);
15391   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
15392   ins_encode %{
15393     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
15394             as_FloatRegister($src1$$reg),
15395             as_FloatRegister($src2$$reg));
15396   %}
15397   ins_pipe(vdop_fp128);
15398 %}
15399 
15400 instruct vsub2D(vecX dst, vecX src1, vecX src2)
15401 %{
15402   predicate(n->as_Vector()->length() == 2);
15403   match(Set dst (SubVD src1 src2));
15404   ins_cost(INSN_COST);
15405   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
15406   ins_encode %{
15407     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
15408             as_FloatRegister($src1$$reg),
15409             as_FloatRegister($src2$$reg));
15410   %}
15411   ins_pipe(vdop_fp128);
15412 %}
15413 
15414 // --------------------------------- MUL --------------------------------------
15415 
15416 instruct vmul4S(vecD dst, vecD src1, vecD src2)
15417 %{
15418   predicate(n->as_Vector()->length() == 2 ||
15419             n->as_Vector()->length() == 4);
15420   match(Set dst (MulVS src1 src2));
15421   ins_cost(INSN_COST);
15422   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
15423   ins_encode %{
15424     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
15425             as_FloatRegister($src1$$reg),
15426             as_FloatRegister($src2$$reg));
15427   %}
15428   ins_pipe(vmul64);
15429 %}
15430 
15431 instruct vmul8S(vecX dst, vecX src1, vecX src2)
15432 %{
15433   predicate(n->as_Vector()->length() == 8);
15434   match(Set dst (MulVS src1 src2));
15435   ins_cost(INSN_COST);
15436   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
15437   ins_encode %{
15438     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
15439             as_FloatRegister($src1$$reg),
15440             as_FloatRegister($src2$$reg));
15441   %}
15442   ins_pipe(vmul128);
15443 %}
15444 
15445 instruct vmul2I(vecD dst, vecD src1, vecD src2)
15446 %{
15447   predicate(n->as_Vector()->length() == 2);
15448   match(Set dst (MulVI src1 src2));
15449   ins_cost(INSN_COST);
15450   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
15451   ins_encode %{
15452     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
15453             as_FloatRegister($src1$$reg),
15454             as_FloatRegister($src2$$reg));
15455   %}
15456   ins_pipe(vmul64);
15457 %}
15458 
15459 instruct vmul4I(vecX dst, vecX src1, vecX src2)
15460 %{
15461   predicate(n->as_Vector()->length() == 4);
15462   match(Set dst (MulVI src1 src2));
15463   ins_cost(INSN_COST);
15464   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
15465   ins_encode %{
15466     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
15467             as_FloatRegister($src1$$reg),
15468             as_FloatRegister($src2$$reg));
15469   %}
15470   ins_pipe(vmul128);
15471 %}
15472 
15473 instruct vmul2F(vecD dst, vecD src1, vecD src2)
15474 %{
15475   predicate(n->as_Vector()->length() == 2);
15476   match(Set dst (MulVF src1 src2));
15477   ins_cost(INSN_COST);
15478   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
15479   ins_encode %{
15480     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
15481             as_FloatRegister($src1$$reg),
15482             as_FloatRegister($src2$$reg));
15483   %}
15484   ins_pipe(vmuldiv_fp64);
15485 %}
15486 
15487 instruct vmul4F(vecX dst, vecX src1, vecX src2)
15488 %{
15489   predicate(n->as_Vector()->length() == 4);
15490   match(Set dst (MulVF src1 src2));
15491   ins_cost(INSN_COST);
15492   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
15493   ins_encode %{
15494     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
15495             as_FloatRegister($src1$$reg),
15496             as_FloatRegister($src2$$reg));
15497   %}
15498   ins_pipe(vmuldiv_fp128);
15499 %}
15500 
15501 instruct vmul2D(vecX dst, vecX src1, vecX src2)
15502 %{
15503   predicate(n->as_Vector()->length() == 2);
15504   match(Set dst (MulVD src1 src2));
15505   ins_cost(INSN_COST);
15506   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
15507   ins_encode %{
15508     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
15509             as_FloatRegister($src1$$reg),
15510             as_FloatRegister($src2$$reg));
15511   %}
15512   ins_pipe(vmuldiv_fp128);
15513 %}
15514 
15515 // --------------------------------- MLA --------------------------------------
15516 
15517 instruct vmla4S(vecD dst, vecD src1, vecD src2)
15518 %{
15519   predicate(n->as_Vector()->length() == 2 ||
15520             n->as_Vector()->length() == 4);
15521   match(Set dst (AddVS dst (MulVS src1 src2)));
15522   ins_cost(INSN_COST);
15523   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
15524   ins_encode %{
15525     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
15526             as_FloatRegister($src1$$reg),
15527             as_FloatRegister($src2$$reg));
15528   %}
15529   ins_pipe(vmla64);
15530 %}
15531 
15532 instruct vmla8S(vecX dst, vecX src1, vecX src2)
15533 %{
15534   predicate(n->as_Vector()->length() == 8);
15535   match(Set dst (AddVS dst (MulVS src1 src2)));
15536   ins_cost(INSN_COST);
15537   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
15538   ins_encode %{
15539     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
15540             as_FloatRegister($src1$$reg),
15541             as_FloatRegister($src2$$reg));
15542   %}
15543   ins_pipe(vmla128);
15544 %}
15545 
15546 instruct vmla2I(vecD dst, vecD src1, vecD src2)
15547 %{
15548   predicate(n->as_Vector()->length() == 2);
15549   match(Set dst (AddVI dst (MulVI src1 src2)));
15550   ins_cost(INSN_COST);
15551   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
15552   ins_encode %{
15553     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
15554             as_FloatRegister($src1$$reg),
15555             as_FloatRegister($src2$$reg));
15556   %}
15557   ins_pipe(vmla64);
15558 %}
15559 
15560 instruct vmla4I(vecX dst, vecX src1, vecX src2)
15561 %{
15562   predicate(n->as_Vector()->length() == 4);
15563   match(Set dst (AddVI dst (MulVI src1 src2)));
15564   ins_cost(INSN_COST);
15565   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
15566   ins_encode %{
15567     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
15568             as_FloatRegister($src1$$reg),
15569             as_FloatRegister($src2$$reg));
15570   %}
15571   ins_pipe(vmla128);
15572 %}
15573 
15574 // dst + src1 * src2
15575 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
15576   predicate(UseFMA && n->as_Vector()->length() == 2);
15577   match(Set dst (FmaVF  dst (Binary src1 src2)));
15578   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
15579   ins_cost(INSN_COST);
15580   ins_encode %{
15581     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
15582             as_FloatRegister($src1$$reg),
15583             as_FloatRegister($src2$$reg));
15584   %}
15585   ins_pipe(vmuldiv_fp64);
15586 %}
15587 
15588 // dst + src1 * src2
15589 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
15590   predicate(UseFMA && n->as_Vector()->length() == 4);
15591   match(Set dst (FmaVF  dst (Binary src1 src2)));
15592   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
15593   ins_cost(INSN_COST);
15594   ins_encode %{
15595     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
15596             as_FloatRegister($src1$$reg),
15597             as_FloatRegister($src2$$reg));
15598   %}
15599   ins_pipe(vmuldiv_fp128);
15600 %}
15601 
15602 // dst + src1 * src2
15603 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
15604   predicate(UseFMA && n->as_Vector()->length() == 2);
15605   match(Set dst (FmaVD  dst (Binary src1 src2)));
15606   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
15607   ins_cost(INSN_COST);
15608   ins_encode %{
15609     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
15610             as_FloatRegister($src1$$reg),
15611             as_FloatRegister($src2$$reg));
15612   %}
15613   ins_pipe(vmuldiv_fp128);
15614 %}
15615 
15616 // --------------------------------- MLS --------------------------------------
15617 
15618 instruct vmls4S(vecD dst, vecD src1, vecD src2)
15619 %{
15620   predicate(n->as_Vector()->length() == 2 ||
15621             n->as_Vector()->length() == 4);
15622   match(Set dst (SubVS dst (MulVS src1 src2)));
15623   ins_cost(INSN_COST);
15624   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
15625   ins_encode %{
15626     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
15627             as_FloatRegister($src1$$reg),
15628             as_FloatRegister($src2$$reg));
15629   %}
15630   ins_pipe(vmla64);
15631 %}
15632 
15633 instruct vmls8S(vecX dst, vecX src1, vecX src2)
15634 %{
15635   predicate(n->as_Vector()->length() == 8);
15636   match(Set dst (SubVS dst (MulVS src1 src2)));
15637   ins_cost(INSN_COST);
15638   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
15639   ins_encode %{
15640     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
15641             as_FloatRegister($src1$$reg),
15642             as_FloatRegister($src2$$reg));
15643   %}
15644   ins_pipe(vmla128);
15645 %}
15646 
15647 instruct vmls2I(vecD dst, vecD src1, vecD src2)
15648 %{
15649   predicate(n->as_Vector()->length() == 2);
15650   match(Set dst (SubVI dst (MulVI src1 src2)));
15651   ins_cost(INSN_COST);
15652   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
15653   ins_encode %{
15654     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
15655             as_FloatRegister($src1$$reg),
15656             as_FloatRegister($src2$$reg));
15657   %}
15658   ins_pipe(vmla64);
15659 %}
15660 
15661 instruct vmls4I(vecX dst, vecX src1, vecX src2)
15662 %{
15663   predicate(n->as_Vector()->length() == 4);
15664   match(Set dst (SubVI dst (MulVI src1 src2)));
15665   ins_cost(INSN_COST);
15666   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
15667   ins_encode %{
15668     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
15669             as_FloatRegister($src1$$reg),
15670             as_FloatRegister($src2$$reg));
15671   %}
15672   ins_pipe(vmla128);
15673 %}
15674 
15675 // dst - src1 * src2
15676 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
15677   predicate(UseFMA && n->as_Vector()->length() == 2);
15678   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
15679   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
15680   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
15681   ins_cost(INSN_COST);
15682   ins_encode %{
15683     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
15684             as_FloatRegister($src1$$reg),
15685             as_FloatRegister($src2$$reg));
15686   %}
15687   ins_pipe(vmuldiv_fp64);
15688 %}
15689 
15690 // dst - src1 * src2
15691 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
15692   predicate(UseFMA && n->as_Vector()->length() == 4);
15693   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
15694   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
15695   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
15696   ins_cost(INSN_COST);
15697   ins_encode %{
15698     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
15699             as_FloatRegister($src1$$reg),
15700             as_FloatRegister($src2$$reg));
15701   %}
15702   ins_pipe(vmuldiv_fp128);
15703 %}
15704 
15705 // dst - src1 * src2
15706 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
15707   predicate(UseFMA && n->as_Vector()->length() == 2);
15708   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
15709   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
15710   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
15711   ins_cost(INSN_COST);
15712   ins_encode %{
15713     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
15714             as_FloatRegister($src1$$reg),
15715             as_FloatRegister($src2$$reg));
15716   %}
15717   ins_pipe(vmuldiv_fp128);
15718 %}
15719 
15720 // --------------------------------- DIV --------------------------------------
15721 
15722 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
15723 %{
15724   predicate(n->as_Vector()->length() == 2);
15725   match(Set dst (DivVF src1 src2));
15726   ins_cost(INSN_COST);
15727   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
15728   ins_encode %{
15729     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
15730             as_FloatRegister($src1$$reg),
15731             as_FloatRegister($src2$$reg));
15732   %}
15733   ins_pipe(vmuldiv_fp64);
15734 %}
15735 
15736 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
15737 %{
15738   predicate(n->as_Vector()->length() == 4);
15739   match(Set dst (DivVF src1 src2));
15740   ins_cost(INSN_COST);
15741   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
15742   ins_encode %{
15743     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
15744             as_FloatRegister($src1$$reg),
15745             as_FloatRegister($src2$$reg));
15746   %}
15747   ins_pipe(vmuldiv_fp128);
15748 %}
15749 
15750 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
15751 %{
15752   predicate(n->as_Vector()->length() == 2);
15753   match(Set dst (DivVD src1 src2));
15754   ins_cost(INSN_COST);
15755   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
15756   ins_encode %{
15757     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
15758             as_FloatRegister($src1$$reg),
15759             as_FloatRegister($src2$$reg));
15760   %}
15761   ins_pipe(vmuldiv_fp128);
15762 %}
15763 
15764 // --------------------------------- SQRT -------------------------------------
15765 
15766 instruct vsqrt2D(vecX dst, vecX src)
15767 %{
15768   predicate(n->as_Vector()->length() == 2);
15769   match(Set dst (SqrtVD src));
15770   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
15771   ins_encode %{
15772     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
15773              as_FloatRegister($src$$reg));
15774   %}
15775   ins_pipe(vsqrt_fp128);
15776 %}
15777 
15778 // --------------------------------- ABS --------------------------------------
15779 
15780 instruct vabs2F(vecD dst, vecD src)
15781 %{
15782   predicate(n->as_Vector()->length() == 2);
15783   match(Set dst (AbsVF src));
15784   ins_cost(INSN_COST * 3);
15785   format %{ "fabs  $dst,$src\t# vector (2S)" %}
15786   ins_encode %{
15787     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
15788             as_FloatRegister($src$$reg));
15789   %}
15790   ins_pipe(vunop_fp64);
15791 %}
15792 
15793 instruct vabs4F(vecX dst, vecX src)
15794 %{
15795   predicate(n->as_Vector()->length() == 4);
15796   match(Set dst (AbsVF src));
15797   ins_cost(INSN_COST * 3);
15798   format %{ "fabs  $dst,$src\t# vector (4S)" %}
15799   ins_encode %{
15800     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
15801             as_FloatRegister($src$$reg));
15802   %}
15803   ins_pipe(vunop_fp128);
15804 %}
15805 
15806 instruct vabs2D(vecX dst, vecX src)
15807 %{
15808   predicate(n->as_Vector()->length() == 2);
15809   match(Set dst (AbsVD src));
15810   ins_cost(INSN_COST * 3);
15811   format %{ "fabs  $dst,$src\t# vector (2D)" %}
15812   ins_encode %{
15813     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
15814             as_FloatRegister($src$$reg));
15815   %}
15816   ins_pipe(vunop_fp128);
15817 %}
15818 
15819 // --------------------------------- NEG --------------------------------------
15820 
15821 instruct vneg2F(vecD dst, vecD src)
15822 %{
15823   predicate(n->as_Vector()->length() == 2);
15824   match(Set dst (NegVF src));
15825   ins_cost(INSN_COST * 3);
15826   format %{ "fneg  $dst,$src\t# vector (2S)" %}
15827   ins_encode %{
15828     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
15829             as_FloatRegister($src$$reg));
15830   %}
15831   ins_pipe(vunop_fp64);
15832 %}
15833 
15834 instruct vneg4F(vecX dst, vecX src)
15835 %{
15836   predicate(n->as_Vector()->length() == 4);
15837   match(Set dst (NegVF src));
15838   ins_cost(INSN_COST * 3);
15839   format %{ "fneg  $dst,$src\t# vector (4S)" %}
15840   ins_encode %{
15841     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
15842             as_FloatRegister($src$$reg));
15843   %}
15844   ins_pipe(vunop_fp128);
15845 %}
15846 
15847 instruct vneg2D(vecX dst, vecX src)
15848 %{
15849   predicate(n->as_Vector()->length() == 2);
15850   match(Set dst (NegVD src));
15851   ins_cost(INSN_COST * 3);
15852   format %{ "fneg  $dst,$src\t# vector (2D)" %}
15853   ins_encode %{
15854     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
15855             as_FloatRegister($src$$reg));
15856   %}
15857   ins_pipe(vunop_fp128);
15858 %}
15859 
15860 // --------------------------------- AND --------------------------------------
15861 
15862 instruct vand8B(vecD dst, vecD src1, vecD src2)
15863 %{
15864   predicate(n->as_Vector()->length_in_bytes() == 4 ||
15865             n->as_Vector()->length_in_bytes() == 8);
15866   match(Set dst (AndV src1 src2));
15867   ins_cost(INSN_COST);
15868   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
15869   ins_encode %{
15870     __ andr(as_FloatRegister($dst$$reg), __ T8B,
15871             as_FloatRegister($src1$$reg),
15872             as_FloatRegister($src2$$reg));
15873   %}
15874   ins_pipe(vlogical64);
15875 %}
15876 
15877 instruct vand16B(vecX dst, vecX src1, vecX src2)
15878 %{
15879   predicate(n->as_Vector()->length_in_bytes() == 16);
15880   match(Set dst (AndV src1 src2));
15881   ins_cost(INSN_COST);
15882   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
15883   ins_encode %{
15884     __ andr(as_FloatRegister($dst$$reg), __ T16B,
15885             as_FloatRegister($src1$$reg),
15886             as_FloatRegister($src2$$reg));
15887   %}
15888   ins_pipe(vlogical128);
15889 %}
15890 
15891 // --------------------------------- OR ---------------------------------------
15892 
15893 instruct vor8B(vecD dst, vecD src1, vecD src2)
15894 %{
15895   predicate(n->as_Vector()->length_in_bytes() == 4 ||
15896             n->as_Vector()->length_in_bytes() == 8);
15897   match(Set dst (OrV src1 src2));
15898   ins_cost(INSN_COST);
15899   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
15900   ins_encode %{
15901     __ orr(as_FloatRegister($dst$$reg), __ T8B,
15902             as_FloatRegister($src1$$reg),
15903             as_FloatRegister($src2$$reg));
15904   %}
15905   ins_pipe(vlogical64);
15906 %}
15907 
15908 instruct vor16B(vecX dst, vecX src1, vecX src2)
15909 %{
15910   predicate(n->as_Vector()->length_in_bytes() == 16);
15911   match(Set dst (OrV src1 src2));
15912   ins_cost(INSN_COST);
15913   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
15914   ins_encode %{
15915     __ orr(as_FloatRegister($dst$$reg), __ T16B,
15916             as_FloatRegister($src1$$reg),
15917             as_FloatRegister($src2$$reg));
15918   %}
15919   ins_pipe(vlogical128);
15920 %}
15921 
15922 // --------------------------------- XOR --------------------------------------
15923 
15924 instruct vxor8B(vecD dst, vecD src1, vecD src2)
15925 %{
15926   predicate(n->as_Vector()->length_in_bytes() == 4 ||
15927             n->as_Vector()->length_in_bytes() == 8);
15928   match(Set dst (XorV src1 src2));
15929   ins_cost(INSN_COST);
15930   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
15931   ins_encode %{
15932     __ eor(as_FloatRegister($dst$$reg), __ T8B,
15933             as_FloatRegister($src1$$reg),
15934             as_FloatRegister($src2$$reg));
15935   %}
15936   ins_pipe(vlogical64);
15937 %}
15938 
15939 instruct vxor16B(vecX dst, vecX src1, vecX src2)
15940 %{
15941   predicate(n->as_Vector()->length_in_bytes() == 16);
15942   match(Set dst (XorV src1 src2));
15943   ins_cost(INSN_COST);
15944   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
15945   ins_encode %{
15946     __ eor(as_FloatRegister($dst$$reg), __ T16B,
15947             as_FloatRegister($src1$$reg),
15948             as_FloatRegister($src2$$reg));
15949   %}
15950   ins_pipe(vlogical128);
15951 %}
15952 
15953 // ------------------------------ Shift ---------------------------------------
15954 instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
15955   predicate(n->as_Vector()->length_in_bytes() == 8);
15956   match(Set dst (LShiftCntV cnt));
15957   match(Set dst (RShiftCntV cnt));
15958   format %{ "dup  $dst, $cnt\t# shift count vector (8B)" %}
15959   ins_encode %{
15960     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg));
15961   %}
15962   ins_pipe(vdup_reg_reg64);
15963 %}
15964 
15965 instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{
15966   predicate(n->as_Vector()->length_in_bytes() == 16);
15967   match(Set dst (LShiftCntV cnt));
15968   match(Set dst (RShiftCntV cnt));
15969   format %{ "dup  $dst, $cnt\t# shift count vector (16B)" %}
15970   ins_encode %{
15971     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
15972   %}
15973   ins_pipe(vdup_reg_reg128);
15974 %}
15975 
15976 instruct vsll8B(vecD dst, vecD src, vecD shift) %{
15977   predicate(n->as_Vector()->length() == 4 ||
15978             n->as_Vector()->length() == 8);
15979   match(Set dst (LShiftVB src shift));
15980   ins_cost(INSN_COST);
15981   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
15982   ins_encode %{
15983     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
15984             as_FloatRegister($src$$reg),
15985             as_FloatRegister($shift$$reg));
15986   %}
15987   ins_pipe(vshift64);
15988 %}
15989 
15990 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
15991   predicate(n->as_Vector()->length() == 16);
15992   match(Set dst (LShiftVB src shift));
15993   ins_cost(INSN_COST);
15994   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
15995   ins_encode %{
15996     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
15997             as_FloatRegister($src$$reg),
15998             as_FloatRegister($shift$$reg));
15999   %}
16000   ins_pipe(vshift128);
16001 %}
16002 
16003 // Right shifts with vector shift count on aarch64 SIMD are implemented
16004 // as left shift by negative shift count.
16005 // There are two cases for vector shift count.
16006 //
16007 // Case 1: The vector shift count is from replication.
16008 //        |            |
16009 //    LoadVector  RShiftCntV
16010 //        |       /
16011 //     RShiftVI
16012 // Note: In inner loop, multiple neg instructions are used, which can be
16013 // moved to outer loop and merge into one neg instruction.
16014 //
16015 // Case 2: The vector shift count is from loading.
16016 // This case isn't supported by middle-end now. But it's supported by
16017 // panama/vectorIntrinsics(JEP 338: Vector API).
16018 //        |            |
16019 //    LoadVector  LoadVector
16020 //        |       /
16021 //     RShiftVI
16022 //
16023 
16024 instruct vsra8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
16025   predicate(n->as_Vector()->length() == 4 ||
16026             n->as_Vector()->length() == 8);
16027   match(Set dst (RShiftVB src shift));
16028   ins_cost(INSN_COST);
16029   effect(TEMP tmp);
16030   format %{ "negr  $tmp,$shift\t"
16031             "sshl  $dst,$src,$tmp\t# vector (8B)" %}
16032   ins_encode %{
16033     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16034             as_FloatRegister($shift$$reg));
16035     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
16036             as_FloatRegister($src$$reg),
16037             as_FloatRegister($tmp$$reg));
16038   %}
16039   ins_pipe(vshift64);
16040 %}
16041 
16042 instruct vsra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
16043   predicate(n->as_Vector()->length() == 16);
16044   match(Set dst (RShiftVB src shift));
16045   ins_cost(INSN_COST);
16046   effect(TEMP tmp);
16047   format %{ "negr  $tmp,$shift\t"
16048             "sshl  $dst,$src,$tmp\t# vector (16B)" %}
16049   ins_encode %{
16050     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16051             as_FloatRegister($shift$$reg));
16052     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
16053             as_FloatRegister($src$$reg),
16054             as_FloatRegister($tmp$$reg));
16055   %}
16056   ins_pipe(vshift128);
16057 %}
16058 
16059 instruct vsrl8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
16060   predicate(n->as_Vector()->length() == 4 ||
16061             n->as_Vector()->length() == 8);
16062   match(Set dst (URShiftVB src shift));
16063   ins_cost(INSN_COST);
16064   effect(TEMP tmp);
16065   format %{ "negr  $tmp,$shift\t"
16066             "ushl  $dst,$src,$tmp\t# vector (8B)" %}
16067   ins_encode %{
16068     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16069             as_FloatRegister($shift$$reg));
16070     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
16071             as_FloatRegister($src$$reg),
16072             as_FloatRegister($tmp$$reg));
16073   %}
16074   ins_pipe(vshift64);
16075 %}
16076 
16077 instruct vsrl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
16078   predicate(n->as_Vector()->length() == 16);
16079   match(Set dst (URShiftVB src shift));
16080   ins_cost(INSN_COST);
16081   effect(TEMP tmp);
16082   format %{ "negr  $tmp,$shift\t"
16083             "ushl  $dst,$src,$tmp\t# vector (16B)" %}
16084   ins_encode %{
16085     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16086             as_FloatRegister($shift$$reg));
16087     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
16088             as_FloatRegister($src$$reg),
16089             as_FloatRegister($tmp$$reg));
16090   %}
16091   ins_pipe(vshift128);
16092 %}
16093 
16094 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
16095   predicate(n->as_Vector()->length() == 4 ||
16096             n->as_Vector()->length() == 8);
16097   match(Set dst (LShiftVB src shift));
16098   ins_cost(INSN_COST);
16099   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
16100   ins_encode %{
16101     int sh = (int)$shift$$constant;
16102     if (sh >= 8) {
16103       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16104              as_FloatRegister($src$$reg),
16105              as_FloatRegister($src$$reg));
16106     } else {
16107       __ shl(as_FloatRegister($dst$$reg), __ T8B,
16108              as_FloatRegister($src$$reg), sh);
16109     }
16110   %}
16111   ins_pipe(vshift64_imm);
16112 %}
16113 
16114 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
16115   predicate(n->as_Vector()->length() == 16);
16116   match(Set dst (LShiftVB src shift));
16117   ins_cost(INSN_COST);
16118   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
16119   ins_encode %{
16120     int sh = (int)$shift$$constant;
16121     if (sh >= 8) {
16122       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16123              as_FloatRegister($src$$reg),
16124              as_FloatRegister($src$$reg));
16125     } else {
16126       __ shl(as_FloatRegister($dst$$reg), __ T16B,
16127              as_FloatRegister($src$$reg), sh);
16128     }
16129   %}
16130   ins_pipe(vshift128_imm);
16131 %}
16132 
16133 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
16134   predicate(n->as_Vector()->length() == 4 ||
16135             n->as_Vector()->length() == 8);
16136   match(Set dst (RShiftVB src shift));
16137   ins_cost(INSN_COST);
16138   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
16139   ins_encode %{
16140     int sh = (int)$shift$$constant;
16141     if (sh >= 8) sh = 7;
16142     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
16143            as_FloatRegister($src$$reg), sh);
16144   %}
16145   ins_pipe(vshift64_imm);
16146 %}
16147 
16148 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
16149   predicate(n->as_Vector()->length() == 16);
16150   match(Set dst (RShiftVB src shift));
16151   ins_cost(INSN_COST);
16152   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
16153   ins_encode %{
16154     int sh = (int)$shift$$constant;
16155     if (sh >= 8) sh = 7;
16156     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
16157            as_FloatRegister($src$$reg), sh);
16158   %}
16159   ins_pipe(vshift128_imm);
16160 %}
16161 
16162 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
16163   predicate(n->as_Vector()->length() == 4 ||
16164             n->as_Vector()->length() == 8);
16165   match(Set dst (URShiftVB src shift));
16166   ins_cost(INSN_COST);
16167   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
16168   ins_encode %{
16169     int sh = (int)$shift$$constant;
16170     if (sh >= 8) {
16171       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16172              as_FloatRegister($src$$reg),
16173              as_FloatRegister($src$$reg));
16174     } else {
16175       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
16176              as_FloatRegister($src$$reg), sh);
16177     }
16178   %}
16179   ins_pipe(vshift64_imm);
16180 %}
16181 
16182 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
16183   predicate(n->as_Vector()->length() == 16);
16184   match(Set dst (URShiftVB src shift));
16185   ins_cost(INSN_COST);
16186   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
16187   ins_encode %{
16188     int sh = (int)$shift$$constant;
16189     if (sh >= 8) {
16190       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16191              as_FloatRegister($src$$reg),
16192              as_FloatRegister($src$$reg));
16193     } else {
16194       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
16195              as_FloatRegister($src$$reg), sh);
16196     }
16197   %}
16198   ins_pipe(vshift128_imm);
16199 %}
16200 
16201 instruct vsll4S(vecD dst, vecD src, vecD shift) %{
16202   predicate(n->as_Vector()->length() == 2 ||
16203             n->as_Vector()->length() == 4);
16204   match(Set dst (LShiftVS src shift));
16205   ins_cost(INSN_COST);
16206   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
16207   ins_encode %{
16208     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
16209             as_FloatRegister($src$$reg),
16210             as_FloatRegister($shift$$reg));
16211   %}
16212   ins_pipe(vshift64);
16213 %}
16214 
16215 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
16216   predicate(n->as_Vector()->length() == 8);
16217   match(Set dst (LShiftVS src shift));
16218   ins_cost(INSN_COST);
16219   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
16220   ins_encode %{
16221     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
16222             as_FloatRegister($src$$reg),
16223             as_FloatRegister($shift$$reg));
16224   %}
16225   ins_pipe(vshift128);
16226 %}
16227 
16228 instruct vsra4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
16229   predicate(n->as_Vector()->length() == 2 ||
16230             n->as_Vector()->length() == 4);
16231   match(Set dst (RShiftVS src shift));
16232   ins_cost(INSN_COST);
16233   effect(TEMP tmp);
16234   format %{ "negr  $tmp,$shift\t"
16235             "sshl  $dst,$src,$tmp\t# vector (4H)" %}
16236   ins_encode %{
16237     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16238             as_FloatRegister($shift$$reg));
16239     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
16240             as_FloatRegister($src$$reg),
16241             as_FloatRegister($tmp$$reg));
16242   %}
16243   ins_pipe(vshift64);
16244 %}
16245 
16246 instruct vsra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
16247   predicate(n->as_Vector()->length() == 8);
16248   match(Set dst (RShiftVS src shift));
16249   ins_cost(INSN_COST);
16250   effect(TEMP tmp);
16251   format %{ "negr  $tmp,$shift\t"
16252             "sshl  $dst,$src,$tmp\t# vector (8H)" %}
16253   ins_encode %{
16254     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16255             as_FloatRegister($shift$$reg));
16256     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
16257             as_FloatRegister($src$$reg),
16258             as_FloatRegister($tmp$$reg));
16259   %}
16260   ins_pipe(vshift128);
16261 %}
16262 
16263 instruct vsrl4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
16264   predicate(n->as_Vector()->length() == 2 ||
16265             n->as_Vector()->length() == 4);
16266   match(Set dst (URShiftVS src shift));
16267   ins_cost(INSN_COST);
16268   effect(TEMP tmp);
16269   format %{ "negr  $tmp,$shift\t"
16270             "ushl  $dst,$src,$tmp\t# vector (4H)" %}
16271   ins_encode %{
16272     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16273             as_FloatRegister($shift$$reg));
16274     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
16275             as_FloatRegister($src$$reg),
16276             as_FloatRegister($tmp$$reg));
16277   %}
16278   ins_pipe(vshift64);
16279 %}
16280 
16281 instruct vsrl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
16282   predicate(n->as_Vector()->length() == 8);
16283   match(Set dst (URShiftVS src shift));
16284   ins_cost(INSN_COST);
16285   effect(TEMP tmp);
16286   format %{ "negr  $tmp,$shift\t"
16287             "ushl  $dst,$src,$tmp\t# vector (8H)" %}
16288   ins_encode %{
16289     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16290             as_FloatRegister($shift$$reg));
16291     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
16292             as_FloatRegister($src$$reg),
16293             as_FloatRegister($tmp$$reg));
16294   %}
16295   ins_pipe(vshift128);
16296 %}
16297 
16298 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
16299   predicate(n->as_Vector()->length() == 2 ||
16300             n->as_Vector()->length() == 4);
16301   match(Set dst (LShiftVS src shift));
16302   ins_cost(INSN_COST);
16303   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
16304   ins_encode %{
16305     int sh = (int)$shift$$constant;
16306     if (sh >= 16) {
16307       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16308              as_FloatRegister($src$$reg),
16309              as_FloatRegister($src$$reg));
16310     } else {
16311       __ shl(as_FloatRegister($dst$$reg), __ T4H,
16312              as_FloatRegister($src$$reg), sh);
16313     }
16314   %}
16315   ins_pipe(vshift64_imm);
16316 %}
16317 
16318 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
16319   predicate(n->as_Vector()->length() == 8);
16320   match(Set dst (LShiftVS src shift));
16321   ins_cost(INSN_COST);
16322   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
16323   ins_encode %{
16324     int sh = (int)$shift$$constant;
16325     if (sh >= 16) {
16326       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16327              as_FloatRegister($src$$reg),
16328              as_FloatRegister($src$$reg));
16329     } else {
16330       __ shl(as_FloatRegister($dst$$reg), __ T8H,
16331              as_FloatRegister($src$$reg), sh);
16332     }
16333   %}
16334   ins_pipe(vshift128_imm);
16335 %}
16336 
16337 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
16338   predicate(n->as_Vector()->length() == 2 ||
16339             n->as_Vector()->length() == 4);
16340   match(Set dst (RShiftVS src shift));
16341   ins_cost(INSN_COST);
16342   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
16343   ins_encode %{
16344     int sh = (int)$shift$$constant;
16345     if (sh >= 16) sh = 15;
16346     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
16347            as_FloatRegister($src$$reg), sh);
16348   %}
16349   ins_pipe(vshift64_imm);
16350 %}
16351 
16352 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
16353   predicate(n->as_Vector()->length() == 8);
16354   match(Set dst (RShiftVS src shift));
16355   ins_cost(INSN_COST);
16356   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
16357   ins_encode %{
16358     int sh = (int)$shift$$constant;
16359     if (sh >= 16) sh = 15;
16360     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
16361            as_FloatRegister($src$$reg), sh);
16362   %}
16363   ins_pipe(vshift128_imm);
16364 %}
16365 
16366 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
16367   predicate(n->as_Vector()->length() == 2 ||
16368             n->as_Vector()->length() == 4);
16369   match(Set dst (URShiftVS src shift));
16370   ins_cost(INSN_COST);
16371   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
16372   ins_encode %{
16373     int sh = (int)$shift$$constant;
16374     if (sh >= 16) {
16375       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16376              as_FloatRegister($src$$reg),
16377              as_FloatRegister($src$$reg));
16378     } else {
16379       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
16380              as_FloatRegister($src$$reg), sh);
16381     }
16382   %}
16383   ins_pipe(vshift64_imm);
16384 %}
16385 
16386 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
16387   predicate(n->as_Vector()->length() == 8);
16388   match(Set dst (URShiftVS src shift));
16389   ins_cost(INSN_COST);
16390   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
16391   ins_encode %{
16392     int sh = (int)$shift$$constant;
16393     if (sh >= 16) {
16394       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16395              as_FloatRegister($src$$reg),
16396              as_FloatRegister($src$$reg));
16397     } else {
16398       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
16399              as_FloatRegister($src$$reg), sh);
16400     }
16401   %}
16402   ins_pipe(vshift128_imm);
16403 %}
16404 
16405 instruct vsll2I(vecD dst, vecD src, vecD shift) %{
16406   predicate(n->as_Vector()->length() == 2);
16407   match(Set dst (LShiftVI src shift));
16408   ins_cost(INSN_COST);
16409   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
16410   ins_encode %{
16411     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
16412             as_FloatRegister($src$$reg),
16413             as_FloatRegister($shift$$reg));
16414   %}
16415   ins_pipe(vshift64);
16416 %}
16417 
16418 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
16419   predicate(n->as_Vector()->length() == 4);
16420   match(Set dst (LShiftVI src shift));
16421   ins_cost(INSN_COST);
16422   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
16423   ins_encode %{
16424     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
16425             as_FloatRegister($src$$reg),
16426             as_FloatRegister($shift$$reg));
16427   %}
16428   ins_pipe(vshift128);
16429 %}
16430 
16431 instruct vsra2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
16432   predicate(n->as_Vector()->length() == 2);
16433   match(Set dst (RShiftVI src shift));
16434   ins_cost(INSN_COST);
16435   effect(TEMP tmp);
16436   format %{ "negr  $tmp,$shift\t"
16437             "sshl  $dst,$src,$tmp\t# vector (2S)" %}
16438   ins_encode %{
16439     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16440             as_FloatRegister($shift$$reg));
16441     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
16442             as_FloatRegister($src$$reg),
16443             as_FloatRegister($tmp$$reg));
16444   %}
16445   ins_pipe(vshift64);
16446 %}
16447 
16448 instruct vsra4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
16449   predicate(n->as_Vector()->length() == 4);
16450   match(Set dst (RShiftVI src shift));
16451   ins_cost(INSN_COST);
16452   effect(TEMP tmp);
16453   format %{ "negr  $tmp,$shift\t"
16454             "sshl  $dst,$src,$tmp\t# vector (4S)" %}
16455   ins_encode %{
16456     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16457             as_FloatRegister($shift$$reg));
16458     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
16459             as_FloatRegister($src$$reg),
16460             as_FloatRegister($tmp$$reg));
16461   %}
16462   ins_pipe(vshift128);
16463 %}
16464 
16465 instruct vsrl2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
16466   predicate(n->as_Vector()->length() == 2);
16467   match(Set dst (URShiftVI src shift));
16468   ins_cost(INSN_COST);
16469   effect(TEMP tmp);
16470   format %{ "negr  $tmp,$shift\t"
16471             "ushl  $dst,$src,$tmp\t# vector (2S)" %}
16472   ins_encode %{
16473     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16474             as_FloatRegister($shift$$reg));
16475     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
16476             as_FloatRegister($src$$reg),
16477             as_FloatRegister($tmp$$reg));
16478   %}
16479   ins_pipe(vshift64);
16480 %}
16481 
16482 instruct vsrl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
16483   predicate(n->as_Vector()->length() == 4);
16484   match(Set dst (URShiftVI src shift));
16485   ins_cost(INSN_COST);
16486   effect(TEMP tmp);
16487   format %{ "negr  $tmp,$shift\t"
16488             "ushl  $dst,$src,$tmp\t# vector (4S)" %}
16489   ins_encode %{
16490     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16491             as_FloatRegister($shift$$reg));
16492     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
16493             as_FloatRegister($src$$reg),
16494             as_FloatRegister($tmp$$reg));
16495   %}
16496   ins_pipe(vshift128);
16497 %}
16498 
16499 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
16500   predicate(n->as_Vector()->length() == 2);
16501   match(Set dst (LShiftVI src shift));
16502   ins_cost(INSN_COST);
16503   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
16504   ins_encode %{
16505     __ shl(as_FloatRegister($dst$$reg), __ T2S,
16506            as_FloatRegister($src$$reg),
16507            (int)$shift$$constant);
16508   %}
16509   ins_pipe(vshift64_imm);
16510 %}
16511 
16512 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
16513   predicate(n->as_Vector()->length() == 4);
16514   match(Set dst (LShiftVI src shift));
16515   ins_cost(INSN_COST);
16516   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
16517   ins_encode %{
16518     __ shl(as_FloatRegister($dst$$reg), __ T4S,
16519            as_FloatRegister($src$$reg),
16520            (int)$shift$$constant);
16521   %}
16522   ins_pipe(vshift128_imm);
16523 %}
16524 
16525 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
16526   predicate(n->as_Vector()->length() == 2);
16527   match(Set dst (RShiftVI src shift));
16528   ins_cost(INSN_COST);
16529   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
16530   ins_encode %{
16531     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
16532             as_FloatRegister($src$$reg),
16533             (int)$shift$$constant);
16534   %}
16535   ins_pipe(vshift64_imm);
16536 %}
16537 
16538 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
16539   predicate(n->as_Vector()->length() == 4);
16540   match(Set dst (RShiftVI src shift));
16541   ins_cost(INSN_COST);
16542   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
16543   ins_encode %{
16544     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
16545             as_FloatRegister($src$$reg),
16546             (int)$shift$$constant);
16547   %}
16548   ins_pipe(vshift128_imm);
16549 %}
16550 
16551 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
16552   predicate(n->as_Vector()->length() == 2);
16553   match(Set dst (URShiftVI src shift));
16554   ins_cost(INSN_COST);
16555   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
16556   ins_encode %{
16557     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
16558             as_FloatRegister($src$$reg),
16559             (int)$shift$$constant);
16560   %}
16561   ins_pipe(vshift64_imm);
16562 %}
16563 
16564 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
16565   predicate(n->as_Vector()->length() == 4);
16566   match(Set dst (URShiftVI src shift));
16567   ins_cost(INSN_COST);
16568   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
16569   ins_encode %{
16570     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
16571             as_FloatRegister($src$$reg),
16572             (int)$shift$$constant);
16573   %}
16574   ins_pipe(vshift128_imm);
16575 %}
16576 
16577 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
16578   predicate(n->as_Vector()->length() == 2);
16579   match(Set dst (LShiftVL src shift));
16580   ins_cost(INSN_COST);
16581   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
16582   ins_encode %{
16583     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
16584             as_FloatRegister($src$$reg),
16585             as_FloatRegister($shift$$reg));
16586   %}
16587   ins_pipe(vshift128);
16588 %}
16589 
16590 instruct vsra2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
16591   predicate(n->as_Vector()->length() == 2);
16592   match(Set dst (RShiftVL src shift));
16593   ins_cost(INSN_COST);
16594   effect(TEMP tmp);
16595   format %{ "negr  $tmp,$shift\t"
16596             "sshl  $dst,$src,$tmp\t# vector (2D)" %}
16597   ins_encode %{
16598     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16599             as_FloatRegister($shift$$reg));
16600     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
16601             as_FloatRegister($src$$reg),
16602             as_FloatRegister($tmp$$reg));
16603   %}
16604   ins_pipe(vshift128);
16605 %}
16606 
16607 instruct vsrl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
16608   predicate(n->as_Vector()->length() == 2);
16609   match(Set dst (URShiftVL src shift));
16610   ins_cost(INSN_COST);
16611   effect(TEMP tmp);
16612   format %{ "negr  $tmp,$shift\t"
16613             "ushl  $dst,$src,$tmp\t# vector (2D)" %}
16614   ins_encode %{
16615     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16616             as_FloatRegister($shift$$reg));
16617     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
16618             as_FloatRegister($src$$reg),
16619             as_FloatRegister($tmp$$reg));
16620   %}
16621   ins_pipe(vshift128);
16622 %}
16623 
16624 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
16625   predicate(n->as_Vector()->length() == 2);
16626   match(Set dst (LShiftVL src shift));
16627   ins_cost(INSN_COST);
16628   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
16629   ins_encode %{
16630     __ shl(as_FloatRegister($dst$$reg), __ T2D,
16631            as_FloatRegister($src$$reg),
16632            (int)$shift$$constant);
16633   %}
16634   ins_pipe(vshift128_imm);
16635 %}
16636 
16637 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
16638   predicate(n->as_Vector()->length() == 2);
16639   match(Set dst (RShiftVL src shift));
16640   ins_cost(INSN_COST);
16641   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
16642   ins_encode %{
16643     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
16644             as_FloatRegister($src$$reg),
16645             (int)$shift$$constant);
16646   %}
16647   ins_pipe(vshift128_imm);
16648 %}
16649 
16650 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
16651   predicate(n->as_Vector()->length() == 2);
16652   match(Set dst (URShiftVL src shift));
16653   ins_cost(INSN_COST);
16654   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
16655   ins_encode %{
16656     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
16657             as_FloatRegister($src$$reg),
16658             (int)$shift$$constant);
16659   %}
16660   ins_pipe(vshift128_imm);
16661 %}
16662 
16663 //----------PEEPHOLE RULES-----------------------------------------------------
16664 // These must follow all instruction definitions as they use the names
16665 // defined in the instructions definitions.
16666 //
16667 // peepmatch ( root_instr_name [preceding_instruction]* );
16668 //
16669 // peepconstraint %{
16670 // (instruction_number.operand_name relational_op instruction_number.operand_name
16671 //  [, ...] );
16672 // // instruction numbers are zero-based using left to right order in peepmatch
16673 //
16674 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
16675 // // provide an instruction_number.operand_name for each operand that appears
16676 // // in the replacement instruction's match rule
16677 //
16678 // ---------VM FLAGS---------------------------------------------------------
16679 //
16680 // All peephole optimizations can be turned off using -XX:-OptoPeephole
16681 //
16682 // Each peephole rule is given an identifying number starting with zero and
16683 // increasing by one in the order seen by the parser.  An individual peephole
16684 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
16685 // on the command-line.
16686 //
16687 // ---------CURRENT LIMITATIONS----------------------------------------------
16688 //
16689 // Only match adjacent instructions in same basic block
16690 // Only equality constraints
16691 // Only constraints between operands, not (0.dest_reg == RAX_enc)
16692 // Only one replacement instruction
16693 //
16694 // ---------EXAMPLE----------------------------------------------------------
16695 //
16696 // // pertinent parts of existing instructions in architecture description
16697 // instruct movI(iRegINoSp dst, iRegI src)
16698 // %{
16699 //   match(Set dst (CopyI src));
16700 // %}
16701 //
16702 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
16703 // %{
16704 //   match(Set dst (AddI dst src));
16705 //   effect(KILL cr);
16706 // %}
16707 //
16708 // // Change (inc mov) to lea
16709 // peephole %{
16710 //   // increment preceeded by register-register move
16711 //   peepmatch ( incI_iReg movI );
16712 //   // require that the destination register of the increment
16713 //   // match the destination register of the move
16714 //   peepconstraint ( 0.dst == 1.dst );
16715 //   // construct a replacement instruction that sets
16716 //   // the destination to ( move's source register + one )
16717 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
16718 // %}
16719 //
16720 
16721 // Implementation no longer uses movX instructions since
16722 // machine-independent system no longer uses CopyX nodes.
16723 //
16724 // peephole
16725 // %{
16726 //   peepmatch (incI_iReg movI);
16727 //   peepconstraint (0.dst == 1.dst);
16728 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
16729 // %}
16730 
16731 // peephole
16732 // %{
16733 //   peepmatch (decI_iReg movI);
16734 //   peepconstraint (0.dst == 1.dst);
16735 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
16736 // %}
16737 
16738 // peephole
16739 // %{
16740 //   peepmatch (addI_iReg_imm movI);
16741 //   peepconstraint (0.dst == 1.dst);
16742 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
16743 // %}
16744 
16745 // peephole
16746 // %{
16747 //   peepmatch (incL_iReg movL);
16748 //   peepconstraint (0.dst == 1.dst);
16749 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
16750 // %}
16751 
16752 // peephole
16753 // %{
16754 //   peepmatch (decL_iReg movL);
16755 //   peepconstraint (0.dst == 1.dst);
16756 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
16757 // %}
16758 
16759 // peephole
16760 // %{
16761 //   peepmatch (addL_iReg_imm movL);
16762 //   peepconstraint (0.dst == 1.dst);
16763 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
16764 // %}
16765 
16766 // peephole
16767 // %{
16768 //   peepmatch (addP_iReg_imm movP);
16769 //   peepconstraint (0.dst == 1.dst);
16770 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
16771 // %}
16772 
16773 // // Change load of spilled value to only a spill
16774 // instruct storeI(memory mem, iRegI src)
16775 // %{
16776 //   match(Set mem (StoreI mem src));
16777 // %}
16778 //
16779 // instruct loadI(iRegINoSp dst, memory mem)
16780 // %{
16781 //   match(Set dst (LoadI mem));
16782 // %}
16783 //
16784 
16785 //----------SMARTSPILL RULES---------------------------------------------------
16786 // These must follow all instruction definitions as they use the names
16787 // defined in the instructions definitions.
16788 
16789 // Local Variables:
16790 // mode: c++
16791 // End: