1 //
   2 // Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, 2019, Red Hat, Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "asm/macroAssembler.hpp"
 999 #include "gc/shared/cardTable.hpp"
1000 #include "gc/shared/cardTableBarrierSet.hpp"
1001 #include "gc/shared/collectedHeap.hpp"
1002 #include "opto/addnode.hpp"
1003 
1004 class CallStubImpl {
1005 
1006   //--------------------------------------------------------------
1007   //---<  Used for optimization in Compile::shorten_branches  >---
1008   //--------------------------------------------------------------
1009 
1010  public:
1011   // Size of call trampoline stub.
1012   static uint size_call_trampoline() {
1013     return 0; // no call trampolines on this platform
1014   }
1015 
1016   // number of relocations needed by a call trampoline stub
1017   static uint reloc_call_trampoline() {
1018     return 0; // no call trampolines on this platform
1019   }
1020 };
1021 
1022 class HandlerImpl {
1023 
1024  public:
1025 
1026   static int emit_exception_handler(CodeBuffer &cbuf);
1027   static int emit_deopt_handler(CodeBuffer& cbuf);
1028 
1029   static uint size_exception_handler() {
1030     return MacroAssembler::far_branch_size();
1031   }
1032 
1033   static uint size_deopt_handler() {
1034     // count one adr and one far branch instruction
1035     return 4 * NativeInstruction::instruction_size;
1036   }
1037 };
1038 
1039   bool is_CAS(int opcode);
1040 
1041   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1042 
1043   bool unnecessary_acquire(const Node *barrier);
1044   bool needs_acquiring_load(const Node *load);
1045 
1046   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1047 
1048   bool unnecessary_release(const Node *barrier);
1049   bool unnecessary_volatile(const Node *barrier);
1050   bool needs_releasing_store(const Node *store);
1051 
1052   // predicate controlling translation of CompareAndSwapX
1053   bool needs_acquiring_load_exclusive(const Node *load);
1054 
1055   // predicate controlling translation of StoreCM
1056   bool unnecessary_storestore(const Node *storecm);
1057 
1058   // predicate controlling addressing modes
1059   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1060 %}
1061 
1062 source %{
1063 
1064   // Optimizaton of volatile gets and puts
1065   // -------------------------------------
1066   //
1067   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1068   // use to implement volatile reads and writes. For a volatile read
1069   // we simply need
1070   //
1071   //   ldar<x>
1072   //
1073   // and for a volatile write we need
1074   //
1075   //   stlr<x>
1076   //
1077   // Alternatively, we can implement them by pairing a normal
1078   // load/store with a memory barrier. For a volatile read we need
1079   //
1080   //   ldr<x>
1081   //   dmb ishld
1082   //
1083   // for a volatile write
1084   //
1085   //   dmb ish
1086   //   str<x>
1087   //   dmb ish
1088   //
1089   // We can also use ldaxr and stlxr to implement compare and swap CAS
1090   // sequences. These are normally translated to an instruction
1091   // sequence like the following
1092   //
1093   //   dmb      ish
1094   // retry:
1095   //   ldxr<x>   rval raddr
1096   //   cmp       rval rold
1097   //   b.ne done
1098   //   stlxr<x>  rval, rnew, rold
1099   //   cbnz      rval retry
1100   // done:
1101   //   cset      r0, eq
1102   //   dmb ishld
1103   //
1104   // Note that the exclusive store is already using an stlxr
1105   // instruction. That is required to ensure visibility to other
1106   // threads of the exclusive write (assuming it succeeds) before that
1107   // of any subsequent writes.
1108   //
1109   // The following instruction sequence is an improvement on the above
1110   //
1111   // retry:
1112   //   ldaxr<x>  rval raddr
1113   //   cmp       rval rold
1114   //   b.ne done
1115   //   stlxr<x>  rval, rnew, rold
1116   //   cbnz      rval retry
1117   // done:
1118   //   cset      r0, eq
1119   //
1120   // We don't need the leading dmb ish since the stlxr guarantees
1121   // visibility of prior writes in the case that the swap is
1122   // successful. Crucially we don't have to worry about the case where
1123   // the swap is not successful since no valid program should be
1124   // relying on visibility of prior changes by the attempting thread
1125   // in the case where the CAS fails.
1126   //
1127   // Similarly, we don't need the trailing dmb ishld if we substitute
1128   // an ldaxr instruction since that will provide all the guarantees we
1129   // require regarding observation of changes made by other threads
1130   // before any change to the CAS address observed by the load.
1131   //
1132   // In order to generate the desired instruction sequence we need to
1133   // be able to identify specific 'signature' ideal graph node
1134   // sequences which i) occur as a translation of a volatile reads or
1135   // writes or CAS operations and ii) do not occur through any other
1136   // translation or graph transformation. We can then provide
1137   // alternative aldc matching rules which translate these node
1138   // sequences to the desired machine code sequences. Selection of the
1139   // alternative rules can be implemented by predicates which identify
1140   // the relevant node sequences.
1141   //
1142   // The ideal graph generator translates a volatile read to the node
1143   // sequence
1144   //
1145   //   LoadX[mo_acquire]
1146   //   MemBarAcquire
1147   //
1148   // As a special case when using the compressed oops optimization we
1149   // may also see this variant
1150   //
1151   //   LoadN[mo_acquire]
1152   //   DecodeN
1153   //   MemBarAcquire
1154   //
1155   // A volatile write is translated to the node sequence
1156   //
1157   //   MemBarRelease
1158   //   StoreX[mo_release] {CardMark}-optional
1159   //   MemBarVolatile
1160   //
1161   // n.b. the above node patterns are generated with a strict
1162   // 'signature' configuration of input and output dependencies (see
1163   // the predicates below for exact details). The card mark may be as
1164   // simple as a few extra nodes or, in a few GC configurations, may
1165   // include more complex control flow between the leading and
1166   // trailing memory barriers. However, whatever the card mark
1167   // configuration these signatures are unique to translated volatile
1168   // reads/stores -- they will not appear as a result of any other
1169   // bytecode translation or inlining nor as a consequence of
1170   // optimizing transforms.
1171   //
1172   // We also want to catch inlined unsafe volatile gets and puts and
1173   // be able to implement them using either ldar<x>/stlr<x> or some
1174   // combination of ldr<x>/stlr<x> and dmb instructions.
1175   //
1176   // Inlined unsafe volatiles puts manifest as a minor variant of the
1177   // normal volatile put node sequence containing an extra cpuorder
1178   // membar
1179   //
1180   //   MemBarRelease
1181   //   MemBarCPUOrder
1182   //   StoreX[mo_release] {CardMark}-optional
1183   //   MemBarCPUOrder
1184   //   MemBarVolatile
1185   //
1186   // n.b. as an aside, a cpuorder membar is not itself subject to
1187   // matching and translation by adlc rules.  However, the rule
1188   // predicates need to detect its presence in order to correctly
1189   // select the desired adlc rules.
1190   //
1191   // Inlined unsafe volatile gets manifest as a slightly different
1192   // node sequence to a normal volatile get because of the
1193   // introduction of some CPUOrder memory barriers to bracket the
1194   // Load. However, but the same basic skeleton of a LoadX feeding a
1195   // MemBarAcquire, possibly thorugh an optional DecodeN, is still
1196   // present
1197   //
1198   //   MemBarCPUOrder
1199   //        ||       \\
1200   //   MemBarCPUOrder LoadX[mo_acquire]
1201   //        ||            |
1202   //        ||       {DecodeN} optional
1203   //        ||       /
1204   //     MemBarAcquire
1205   //
1206   // In this case the acquire membar does not directly depend on the
1207   // load. However, we can be sure that the load is generated from an
1208   // inlined unsafe volatile get if we see it dependent on this unique
1209   // sequence of membar nodes. Similarly, given an acquire membar we
1210   // can know that it was added because of an inlined unsafe volatile
1211   // get if it is fed and feeds a cpuorder membar and if its feed
1212   // membar also feeds an acquiring load.
1213   //
1214   // Finally an inlined (Unsafe) CAS operation is translated to the
1215   // following ideal graph
1216   //
1217   //   MemBarRelease
1218   //   MemBarCPUOrder
1219   //   CompareAndSwapX {CardMark}-optional
1220   //   MemBarCPUOrder
1221   //   MemBarAcquire
1222   //
1223   // So, where we can identify these volatile read and write
1224   // signatures we can choose to plant either of the above two code
1225   // sequences. For a volatile read we can simply plant a normal
1226   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1227   // also choose to inhibit translation of the MemBarAcquire and
1228   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1229   //
1230   // When we recognise a volatile store signature we can choose to
1231   // plant at a dmb ish as a translation for the MemBarRelease, a
1232   // normal str<x> and then a dmb ish for the MemBarVolatile.
1233   // Alternatively, we can inhibit translation of the MemBarRelease
1234   // and MemBarVolatile and instead plant a simple stlr<x>
1235   // instruction.
1236   //
1237   // when we recognise a CAS signature we can choose to plant a dmb
1238   // ish as a translation for the MemBarRelease, the conventional
1239   // macro-instruction sequence for the CompareAndSwap node (which
1240   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1241   // Alternatively, we can elide generation of the dmb instructions
1242   // and plant the alternative CompareAndSwap macro-instruction
1243   // sequence (which uses ldaxr<x>).
1244   //
1245   // Of course, the above only applies when we see these signature
1246   // configurations. We still want to plant dmb instructions in any
1247   // other cases where we may see a MemBarAcquire, MemBarRelease or
1248   // MemBarVolatile. For example, at the end of a constructor which
1249   // writes final/volatile fields we will see a MemBarRelease
1250   // instruction and this needs a 'dmb ish' lest we risk the
1251   // constructed object being visible without making the
1252   // final/volatile field writes visible.
1253   //
1254   // n.b. the translation rules below which rely on detection of the
1255   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1256   // If we see anything other than the signature configurations we
1257   // always just translate the loads and stores to ldr<x> and str<x>
1258   // and translate acquire, release and volatile membars to the
1259   // relevant dmb instructions.
1260   //
1261 
1262   // is_CAS(int opcode)
1263   //
1264   // return true if opcode is one of the possible CompareAndSwapX
1265   // values otherwise false.
1266 
1267   bool is_CAS(int opcode)
1268   {
1269     switch(opcode) {
1270       // We handle these
1271     case Op_CompareAndSwapI:
1272     case Op_CompareAndSwapL:
1273     case Op_CompareAndSwapP:
1274     case Op_CompareAndSwapN:
1275  // case Op_CompareAndSwapB:
1276  // case Op_CompareAndSwapS:
1277 #if INCLUDE_SHENANDOAHGC
1278     case Op_ShenandoahCompareAndSwapP:
1279     case Op_ShenandoahCompareAndSwapN:
1280 #endif
1281       return true;
1282       // These are TBD
1283     case Op_WeakCompareAndSwapB:
1284     case Op_WeakCompareAndSwapS:
1285     case Op_WeakCompareAndSwapI:
1286     case Op_WeakCompareAndSwapL:
1287     case Op_WeakCompareAndSwapP:
1288     case Op_WeakCompareAndSwapN:
1289     case Op_CompareAndExchangeB:
1290     case Op_CompareAndExchangeS:
1291     case Op_CompareAndExchangeI:
1292     case Op_CompareAndExchangeL:
1293     case Op_CompareAndExchangeP:
1294     case Op_CompareAndExchangeN:
1295       return false;
1296     default:
1297       return false;
1298     }
1299   }
1300 
1301   // helper to determine the maximum number of Phi nodes we may need to
1302   // traverse when searching from a card mark membar for the merge mem
1303   // feeding a trailing membar or vice versa
1304 
1305 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1306 
1307 bool unnecessary_acquire(const Node *barrier)
1308 {
1309   assert(barrier->is_MemBar(), "expecting a membar");
1310 
1311   if (UseBarriersForVolatile) {
1312     // we need to plant a dmb
1313     return false;
1314   }
1315 
1316   MemBarNode* mb = barrier->as_MemBar();
1317 
1318   if (mb->trailing_load()) {
1319     return true;
1320   }
1321 
1322   if (mb->trailing_load_store()) {
1323     Node* load_store = mb->in(MemBarNode::Precedent);
1324     assert(load_store->is_LoadStore(), "unexpected graph shape");
1325     return is_CAS(load_store->Opcode());
1326   }
1327 
1328   return false;
1329 }
1330 
1331 bool needs_acquiring_load(const Node *n)
1332 {
1333   assert(n->is_Load(), "expecting a load");
1334   if (UseBarriersForVolatile) {
1335     // we use a normal load and a dmb
1336     return false;
1337   }
1338 
1339   LoadNode *ld = n->as_Load();
1340 
1341   return ld->is_acquire();
1342 }
1343 
1344 bool unnecessary_release(const Node *n)
1345 {
1346   assert((n->is_MemBar() &&
1347           n->Opcode() == Op_MemBarRelease),
1348          "expecting a release membar");
1349 
1350   if (UseBarriersForVolatile) {
1351     // we need to plant a dmb
1352     return false;
1353   }
1354 
1355   MemBarNode *barrier = n->as_MemBar();
1356   if (!barrier->leading()) {
1357     return false;
1358   } else {
1359     Node* trailing = barrier->trailing_membar();
1360     MemBarNode* trailing_mb = trailing->as_MemBar();
1361     assert(trailing_mb->trailing(), "Not a trailing membar?");
1362     assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
1363 
1364     Node* mem = trailing_mb->in(MemBarNode::Precedent);
1365     if (mem->is_Store()) {
1366       assert(mem->as_Store()->is_release(), "");
1367       assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
1368       return true;
1369     } else {
1370       assert(mem->is_LoadStore(), "");
1371       assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
1372       return is_CAS(mem->Opcode());
1373     }
1374   }
1375   return false;
1376 }
1377 
1378 bool unnecessary_volatile(const Node *n)
1379 {
1380   // assert n->is_MemBar();
1381   if (UseBarriersForVolatile) {
1382     // we need to plant a dmb
1383     return false;
1384   }
1385 
1386   MemBarNode *mbvol = n->as_MemBar();
1387 
1388   bool release = mbvol->trailing_store();
1389   assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1390 #ifdef ASSERT
1391   if (release) {
1392     Node* leading = mbvol->leading_membar();
1393     assert(leading->Opcode() == Op_MemBarRelease, "");
1394     assert(leading->as_MemBar()->leading_store(), "");
1395     assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1396   }
1397 #endif
1398 
1399   return release;
1400 }
1401 
1402 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1403 
1404 bool needs_releasing_store(const Node *n)
1405 {
1406   // assert n->is_Store();
1407   if (UseBarriersForVolatile) {
1408     // we use a normal store and dmb combination
1409     return false;
1410   }
1411 
1412   StoreNode *st = n->as_Store();
1413 
1414   return st->trailing_membar() != NULL;
1415 }
1416 
1417 // predicate controlling translation of CAS
1418 //
1419 // returns true if CAS needs to use an acquiring load otherwise false
1420 
1421 bool needs_acquiring_load_exclusive(const Node *n)
1422 {
1423   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
1424   if (UseBarriersForVolatile) {
1425     return false;
1426   }
1427 
1428   LoadStoreNode* ldst = n->as_LoadStore();
1429   assert(ldst->trailing_membar() != NULL, "expected trailing membar");
1430 
1431   // so we can just return true here
1432   return true;
1433 }
1434 
1435 // predicate controlling translation of StoreCM
1436 //
1437 // returns true if a StoreStore must precede the card write otherwise
1438 // false
1439 
1440 bool unnecessary_storestore(const Node *storecm)
1441 {
1442   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
1443 
1444   // we need to generate a dmb ishst between an object put and the
1445   // associated card mark when we are using CMS without conditional
1446   // card marking
1447 
1448   if (UseConcMarkSweepGC && !UseCondCardMark) {
1449     return false;
1450   }
1451 
1452   // a storestore is unnecesary in all other cases
1453 
1454   return true;
1455 }
1456 
1457 
1458 #define __ _masm.
1459 
1460 // advance declarations for helper functions to convert register
1461 // indices to register objects
1462 
1463 // the ad file has to provide implementations of certain methods
1464 // expected by the generic code
1465 //
1466 // REQUIRED FUNCTIONALITY
1467 
1468 //=============================================================================
1469 
1470 // !!!!! Special hack to get all types of calls to specify the byte offset
1471 //       from the start of the call to the point where the return address
1472 //       will point.
1473 
1474 int MachCallStaticJavaNode::ret_addr_offset()
1475 {
1476   // call should be a simple bl
1477   int off = 4;
1478   return off;
1479 }
1480 
1481 int MachCallDynamicJavaNode::ret_addr_offset()
1482 {
1483   return 16; // movz, movk, movk, bl
1484 }
1485 
1486 int MachCallRuntimeNode::ret_addr_offset() {
1487   // for generated stubs the call will be
1488   //   far_call(addr)
1489   // for real runtime callouts it will be six instructions
1490   // see aarch64_enc_java_to_runtime
1491   //   adr(rscratch2, retaddr)
1492   //   lea(rscratch1, RuntimeAddress(addr)
1493   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1494   //   blr(rscratch1)
1495   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1496   if (cb) {
1497     return MacroAssembler::far_branch_size();
1498   } else {
1499     return 6 * NativeInstruction::instruction_size;
1500   }
1501 }
1502 
1503 // Indicate if the safepoint node needs the polling page as an input
1504 
1505 // the shared code plants the oop data at the start of the generated
1506 // code for the safepoint node and that needs ot be at the load
1507 // instruction itself. so we cannot plant a mov of the safepoint poll
1508 // address followed by a load. setting this to true means the mov is
1509 // scheduled as a prior instruction. that's better for scheduling
1510 // anyway.
1511 
1512 bool SafePointNode::needs_polling_address_input()
1513 {
1514   return true;
1515 }
1516 
1517 //=============================================================================
1518 
1519 #ifndef PRODUCT
1520 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1521   st->print("BREAKPOINT");
1522 }
1523 #endif
1524 
1525 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1526   MacroAssembler _masm(&cbuf);
1527   __ brk(0);
1528 }
1529 
1530 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1531   return MachNode::size(ra_);
1532 }
1533 
1534 //=============================================================================
1535 
1536 #ifndef PRODUCT
1537   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1538     st->print("nop \t# %d bytes pad for loops and calls", _count);
1539   }
1540 #endif
1541 
1542   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1543     MacroAssembler _masm(&cbuf);
1544     for (int i = 0; i < _count; i++) {
1545       __ nop();
1546     }
1547   }
1548 
1549   uint MachNopNode::size(PhaseRegAlloc*) const {
1550     return _count * NativeInstruction::instruction_size;
1551   }
1552 
1553 //=============================================================================
1554 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1555 
1556 int Compile::ConstantTable::calculate_table_base_offset() const {
1557   return 0;  // absolute addressing, no offset
1558 }
1559 
1560 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1561 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1562   ShouldNotReachHere();
1563 }
1564 
1565 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1566   // Empty encoding
1567 }
1568 
1569 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1570   return 0;
1571 }
1572 
1573 #ifndef PRODUCT
1574 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1575   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1576 }
1577 #endif
1578 
1579 #ifndef PRODUCT
1580 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1581   Compile* C = ra_->C;
1582 
1583   int framesize = C->frame_slots() << LogBytesPerInt;
1584 
1585   if (C->need_stack_bang(framesize))
1586     st->print("# stack bang size=%d\n\t", framesize);
1587 
1588   if (framesize < ((1 << 9) + 2 * wordSize)) {
1589     st->print("sub  sp, sp, #%d\n\t", framesize);
1590     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1591     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
1592   } else {
1593     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
1594     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
1595     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1596     st->print("sub  sp, sp, rscratch1");
1597   }
1598 }
1599 #endif
1600 
1601 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1602   Compile* C = ra_->C;
1603   MacroAssembler _masm(&cbuf);
1604 
1605   // n.b. frame size includes space for return pc and rfp
1606   const long framesize = C->frame_size_in_bytes();
1607   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
1608 
1609   // insert a nop at the start of the prolog so we can patch in a
1610   // branch if we need to invalidate the method later
1611   __ nop();
1612 
1613   int bangsize = C->bang_size_in_bytes();
1614   if (C->need_stack_bang(bangsize) && UseStackBanging)
1615     __ generate_stack_overflow_check(bangsize);
1616 
1617   __ build_frame(framesize);
1618 
1619   if (VerifyStackAtCalls) {
1620     Unimplemented();
1621   }
1622 
1623   C->set_frame_complete(cbuf.insts_size());
1624 
1625   if (C->has_mach_constant_base_node()) {
1626     // NOTE: We set the table base offset here because users might be
1627     // emitted before MachConstantBaseNode.
1628     Compile::ConstantTable& constant_table = C->constant_table();
1629     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1630   }
1631 }
1632 
1633 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1634 {
1635   return MachNode::size(ra_); // too many variables; just compute it
1636                               // the hard way
1637 }
1638 
1639 int MachPrologNode::reloc() const
1640 {
1641   return 0;
1642 }
1643 
1644 //=============================================================================
1645 
1646 #ifndef PRODUCT
1647 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1648   Compile* C = ra_->C;
1649   int framesize = C->frame_slots() << LogBytesPerInt;
1650 
1651   st->print("# pop frame %d\n\t",framesize);
1652 
1653   if (framesize == 0) {
1654     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1655   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1656     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1657     st->print("add  sp, sp, #%d\n\t", framesize);
1658   } else {
1659     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1660     st->print("add  sp, sp, rscratch1\n\t");
1661     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1662   }
1663 
1664   if (do_polling() && C->is_method_compilation()) {
1665     st->print("# touch polling page\n\t");
1666     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
1667     st->print("ldr zr, [rscratch1]");
1668   }
1669 }
1670 #endif
1671 
1672 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1673   Compile* C = ra_->C;
1674   MacroAssembler _masm(&cbuf);
1675   int framesize = C->frame_slots() << LogBytesPerInt;
1676 
1677   __ remove_frame(framesize);
1678 
1679   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1680     __ reserved_stack_check();
1681   }
1682 
1683   if (do_polling() && C->is_method_compilation()) {
1684     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
1685   }
1686 }
1687 
1688 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1689   // Variable size. Determine dynamically.
1690   return MachNode::size(ra_);
1691 }
1692 
1693 int MachEpilogNode::reloc() const {
1694   // Return number of relocatable values contained in this instruction.
1695   return 1; // 1 for polling page.
1696 }
1697 
1698 const Pipeline * MachEpilogNode::pipeline() const {
1699   return MachNode::pipeline_class();
1700 }
1701 
1702 // This method seems to be obsolete. It is declared in machnode.hpp
1703 // and defined in all *.ad files, but it is never called. Should we
1704 // get rid of it?
1705 int MachEpilogNode::safepoint_offset() const {
1706   assert(do_polling(), "no return for this epilog node");
1707   return 4;
1708 }
1709 
1710 //=============================================================================
1711 
1712 // Figure out which register class each belongs in: rc_int, rc_float or
1713 // rc_stack.
1714 enum RC { rc_bad, rc_int, rc_float, rc_stack };
1715 
1716 static enum RC rc_class(OptoReg::Name reg) {
1717 
1718   if (reg == OptoReg::Bad) {
1719     return rc_bad;
1720   }
1721 
1722   // we have 30 int registers * 2 halves
1723   // (rscratch1 and rscratch2 are omitted)
1724 
1725   if (reg < 60) {
1726     return rc_int;
1727   }
1728 
1729   // we have 32 float register * 2 halves
1730   if (reg < 60 + 128) {
1731     return rc_float;
1732   }
1733 
1734   // Between float regs & stack is the flags regs.
1735   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
1736 
1737   return rc_stack;
1738 }
1739 
1740 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
1741   Compile* C = ra_->C;
1742 
1743   // Get registers to move.
1744   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
1745   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
1746   OptoReg::Name dst_hi = ra_->get_reg_second(this);
1747   OptoReg::Name dst_lo = ra_->get_reg_first(this);
1748 
1749   enum RC src_hi_rc = rc_class(src_hi);
1750   enum RC src_lo_rc = rc_class(src_lo);
1751   enum RC dst_hi_rc = rc_class(dst_hi);
1752   enum RC dst_lo_rc = rc_class(dst_lo);
1753 
1754   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
1755 
1756   if (src_hi != OptoReg::Bad) {
1757     assert((src_lo&1)==0 && src_lo+1==src_hi &&
1758            (dst_lo&1)==0 && dst_lo+1==dst_hi,
1759            "expected aligned-adjacent pairs");
1760   }
1761 
1762   if (src_lo == dst_lo && src_hi == dst_hi) {
1763     return 0;            // Self copy, no move.
1764   }
1765 
1766   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
1767               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
1768   int src_offset = ra_->reg2offset(src_lo);
1769   int dst_offset = ra_->reg2offset(dst_lo);
1770 
1771   if (bottom_type()->isa_vect() != NULL) {
1772     uint ireg = ideal_reg();
1773     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
1774     if (cbuf) {
1775       MacroAssembler _masm(cbuf);
1776       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
1777       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
1778         // stack->stack
1779         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
1780         if (ireg == Op_VecD) {
1781           __ unspill(rscratch1, true, src_offset);
1782           __ spill(rscratch1, true, dst_offset);
1783         } else {
1784           __ spill_copy128(src_offset, dst_offset);
1785         }
1786       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
1787         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1788                ireg == Op_VecD ? __ T8B : __ T16B,
1789                as_FloatRegister(Matcher::_regEncode[src_lo]));
1790       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
1791         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1792                        ireg == Op_VecD ? __ D : __ Q,
1793                        ra_->reg2offset(dst_lo));
1794       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
1795         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1796                        ireg == Op_VecD ? __ D : __ Q,
1797                        ra_->reg2offset(src_lo));
1798       } else {
1799         ShouldNotReachHere();
1800       }
1801     }
1802   } else if (cbuf) {
1803     MacroAssembler _masm(cbuf);
1804     switch (src_lo_rc) {
1805     case rc_int:
1806       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
1807         if (is64) {
1808             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
1809                    as_Register(Matcher::_regEncode[src_lo]));
1810         } else {
1811             MacroAssembler _masm(cbuf);
1812             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
1813                     as_Register(Matcher::_regEncode[src_lo]));
1814         }
1815       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
1816         if (is64) {
1817             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1818                      as_Register(Matcher::_regEncode[src_lo]));
1819         } else {
1820             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1821                      as_Register(Matcher::_regEncode[src_lo]));
1822         }
1823       } else {                    // gpr --> stack spill
1824         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1825         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
1826       }
1827       break;
1828     case rc_float:
1829       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
1830         if (is64) {
1831             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
1832                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1833         } else {
1834             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
1835                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1836         }
1837       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
1838           if (cbuf) {
1839             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1840                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1841         } else {
1842             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1843                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1844         }
1845       } else {                    // fpr --> stack spill
1846         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1847         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1848                  is64 ? __ D : __ S, dst_offset);
1849       }
1850       break;
1851     case rc_stack:
1852       if (dst_lo_rc == rc_int) {  // stack --> gpr load
1853         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
1854       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
1855         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1856                    is64 ? __ D : __ S, src_offset);
1857       } else {                    // stack --> stack copy
1858         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1859         __ unspill(rscratch1, is64, src_offset);
1860         __ spill(rscratch1, is64, dst_offset);
1861       }
1862       break;
1863     default:
1864       assert(false, "bad rc_class for spill");
1865       ShouldNotReachHere();
1866     }
1867   }
1868 
1869   if (st) {
1870     st->print("spill ");
1871     if (src_lo_rc == rc_stack) {
1872       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
1873     } else {
1874       st->print("%s -> ", Matcher::regName[src_lo]);
1875     }
1876     if (dst_lo_rc == rc_stack) {
1877       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
1878     } else {
1879       st->print("%s", Matcher::regName[dst_lo]);
1880     }
1881     if (bottom_type()->isa_vect() != NULL) {
1882       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
1883     } else {
1884       st->print("\t# spill size = %d", is64 ? 64:32);
1885     }
1886   }
1887 
1888   return 0;
1889 
1890 }
1891 
1892 #ifndef PRODUCT
1893 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1894   if (!ra_)
1895     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
1896   else
1897     implementation(NULL, ra_, false, st);
1898 }
1899 #endif
1900 
1901 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1902   implementation(&cbuf, ra_, false, NULL);
1903 }
1904 
1905 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1906   return MachNode::size(ra_);
1907 }
1908 
1909 //=============================================================================
1910 
1911 #ifndef PRODUCT
1912 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1913   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1914   int reg = ra_->get_reg_first(this);
1915   st->print("add %s, rsp, #%d]\t# box lock",
1916             Matcher::regName[reg], offset);
1917 }
1918 #endif
1919 
1920 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1921   MacroAssembler _masm(&cbuf);
1922 
1923   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1924   int reg    = ra_->get_encode(this);
1925 
1926   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
1927     __ add(as_Register(reg), sp, offset);
1928   } else {
1929     ShouldNotReachHere();
1930   }
1931 }
1932 
1933 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1934   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
1935   return 4;
1936 }
1937 
1938 //=============================================================================
1939 
1940 #ifndef PRODUCT
1941 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1942 {
1943   st->print_cr("# MachUEPNode");
1944   if (UseCompressedClassPointers) {
1945     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1946     if (Universe::narrow_klass_shift() != 0) {
1947       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1948     }
1949   } else {
1950    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1951   }
1952   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
1953   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
1954 }
1955 #endif
1956 
1957 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1958 {
1959   // This is the unverified entry point.
1960   MacroAssembler _masm(&cbuf);
1961 
1962   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
1963   Label skip;
1964   // TODO
1965   // can we avoid this skip and still use a reloc?
1966   __ br(Assembler::EQ, skip);
1967   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1968   __ bind(skip);
1969 }
1970 
1971 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1972 {
1973   return MachNode::size(ra_);
1974 }
1975 
1976 // REQUIRED EMIT CODE
1977 
1978 //=============================================================================
1979 
1980 // Emit exception handler code.
1981 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
1982 {
1983   // mov rscratch1 #exception_blob_entry_point
1984   // br rscratch1
1985   // Note that the code buffer's insts_mark is always relative to insts.
1986   // That's why we must use the macroassembler to generate a handler.
1987   MacroAssembler _masm(&cbuf);
1988   address base = __ start_a_stub(size_exception_handler());
1989   if (base == NULL) {
1990     ciEnv::current()->record_failure("CodeCache is full");
1991     return 0;  // CodeBuffer::expand failed
1992   }
1993   int offset = __ offset();
1994   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1995   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1996   __ end_a_stub();
1997   return offset;
1998 }
1999 
2000 // Emit deopt handler code.
2001 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
2002 {
2003   // Note that the code buffer's insts_mark is always relative to insts.
2004   // That's why we must use the macroassembler to generate a handler.
2005   MacroAssembler _masm(&cbuf);
2006   address base = __ start_a_stub(size_deopt_handler());
2007   if (base == NULL) {
2008     ciEnv::current()->record_failure("CodeCache is full");
2009     return 0;  // CodeBuffer::expand failed
2010   }
2011   int offset = __ offset();
2012 
2013   __ adr(lr, __ pc());
2014   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2015 
2016   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
2017   __ end_a_stub();
2018   return offset;
2019 }
2020 
2021 // REQUIRED MATCHER CODE
2022 
2023 //=============================================================================
2024 
2025 const bool Matcher::match_rule_supported(int opcode) {
2026 
2027   switch (opcode) {
2028   default:
2029     break;
2030   }
2031 
2032   if (!has_match_rule(opcode)) {
2033     return false;
2034   }
2035 
2036   return true;  // Per default match rules are supported.
2037 }
2038 
2039 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
2040 
2041   // TODO
2042   // identify extra cases that we might want to provide match rules for
2043   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
2044   bool ret_value = match_rule_supported(opcode);
2045   // Add rules here.
2046 
2047   return ret_value;  // Per default match rules are supported.
2048 }
2049 
2050 const bool Matcher::has_predicated_vectors(void) {
2051   return false;
2052 }
2053 
2054 const int Matcher::float_pressure(int default_pressure_threshold) {
2055   return default_pressure_threshold;
2056 }
2057 
2058 int Matcher::regnum_to_fpu_offset(int regnum)
2059 {
2060   Unimplemented();
2061   return 0;
2062 }
2063 
2064 // Is this branch offset short enough that a short branch can be used?
2065 //
2066 // NOTE: If the platform does not provide any short branch variants, then
2067 //       this method should return false for offset 0.
2068 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2069   // The passed offset is relative to address of the branch.
2070 
2071   return (-32768 <= offset && offset < 32768);
2072 }
2073 
2074 const bool Matcher::isSimpleConstant64(jlong value) {
2075   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2076   // Probably always true, even if a temp register is required.
2077   return true;
2078 }
2079 
2080 // true just means we have fast l2f conversion
2081 const bool Matcher::convL2FSupported(void) {
2082   return true;
2083 }
2084 
2085 // Vector width in bytes.
2086 const int Matcher::vector_width_in_bytes(BasicType bt) {
2087   int size = MIN2(16,(int)MaxVectorSize);
2088   // Minimum 2 values in vector
2089   if (size < 2*type2aelembytes(bt)) size = 0;
2090   // But never < 4
2091   if (size < 4) size = 0;
2092   return size;
2093 }
2094 
2095 // Limits on vector size (number of elements) loaded into vector.
2096 const int Matcher::max_vector_size(const BasicType bt) {
2097   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2098 }
2099 const int Matcher::min_vector_size(const BasicType bt) {
2100 //  For the moment limit the vector size to 8 bytes
2101     int size = 8 / type2aelembytes(bt);
2102     if (size < 2) size = 2;
2103     return size;
2104 }
2105 
2106 // Vector ideal reg.
2107 const uint Matcher::vector_ideal_reg(int len) {
2108   switch(len) {
2109     case  8: return Op_VecD;
2110     case 16: return Op_VecX;
2111   }
2112   ShouldNotReachHere();
2113   return 0;
2114 }
2115 
2116 const uint Matcher::vector_shift_count_ideal_reg(int size) {
2117   switch(size) {
2118     case  8: return Op_VecD;
2119     case 16: return Op_VecX;
2120   }
2121   ShouldNotReachHere();
2122   return 0;
2123 }
2124 
2125 // AES support not yet implemented
2126 const bool Matcher::pass_original_key_for_aes() {
2127   return false;
2128 }
2129 
2130 // aarch64 supports misaligned vectors store/load.
2131 const bool Matcher::misaligned_vectors_ok() {
2132   return true;
2133 }
2134 
2135 // false => size gets scaled to BytesPerLong, ok.
2136 const bool Matcher::init_array_count_is_in_bytes = false;
2137 
2138 // Use conditional move (CMOVL)
2139 const int Matcher::long_cmove_cost() {
2140   // long cmoves are no more expensive than int cmoves
2141   return 0;
2142 }
2143 
2144 const int Matcher::float_cmove_cost() {
2145   // float cmoves are no more expensive than int cmoves
2146   return 0;
2147 }
2148 
2149 // Does the CPU require late expand (see block.cpp for description of late expand)?
2150 const bool Matcher::require_postalloc_expand = false;
2151 
2152 // Do we need to mask the count passed to shift instructions or does
2153 // the cpu only look at the lower 5/6 bits anyway?
2154 const bool Matcher::need_masked_shift_count = false;
2155 
2156 // This affects two different things:
2157 //  - how Decode nodes are matched
2158 //  - how ImplicitNullCheck opportunities are recognized
2159 // If true, the matcher will try to remove all Decodes and match them
2160 // (as operands) into nodes. NullChecks are not prepared to deal with
2161 // Decodes by final_graph_reshaping().
2162 // If false, final_graph_reshaping() forces the decode behind the Cmp
2163 // for a NullCheck. The matcher matches the Decode node into a register.
2164 // Implicit_null_check optimization moves the Decode along with the
2165 // memory operation back up before the NullCheck.
2166 bool Matcher::narrow_oop_use_complex_address() {
2167   return Universe::narrow_oop_shift() == 0;
2168 }
2169 
2170 bool Matcher::narrow_klass_use_complex_address() {
2171 // TODO
2172 // decide whether we need to set this to true
2173   return false;
2174 }
2175 
2176 bool Matcher::const_oop_prefer_decode() {
2177   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
2178   return Universe::narrow_oop_base() == NULL;
2179 }
2180 
2181 bool Matcher::const_klass_prefer_decode() {
2182   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
2183   return Universe::narrow_klass_base() == NULL;
2184 }
2185 
2186 // Is it better to copy float constants, or load them directly from
2187 // memory?  Intel can load a float constant from a direct address,
2188 // requiring no extra registers.  Most RISCs will have to materialize
2189 // an address into a register first, so they would do better to copy
2190 // the constant from stack.
2191 const bool Matcher::rematerialize_float_constants = false;
2192 
2193 // If CPU can load and store mis-aligned doubles directly then no
2194 // fixup is needed.  Else we split the double into 2 integer pieces
2195 // and move it piece-by-piece.  Only happens when passing doubles into
2196 // C code as the Java calling convention forces doubles to be aligned.
2197 const bool Matcher::misaligned_doubles_ok = true;
2198 
2199 // No-op on amd64
2200 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2201   Unimplemented();
2202 }
2203 
2204 // Advertise here if the CPU requires explicit rounding operations to
2205 // implement the UseStrictFP mode.
2206 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2207 
2208 // Are floats converted to double when stored to stack during
2209 // deoptimization?
2210 bool Matcher::float_in_double() { return false; }
2211 
2212 // Do ints take an entire long register or just half?
2213 // The relevant question is how the int is callee-saved:
2214 // the whole long is written but de-opt'ing will have to extract
2215 // the relevant 32 bits.
2216 const bool Matcher::int_in_long = true;
2217 
2218 // Return whether or not this register is ever used as an argument.
2219 // This function is used on startup to build the trampoline stubs in
2220 // generateOptoStub.  Registers not mentioned will be killed by the VM
2221 // call in the trampoline, and arguments in those registers not be
2222 // available to the callee.
2223 bool Matcher::can_be_java_arg(int reg)
2224 {
2225   return
2226     reg ==  R0_num || reg == R0_H_num ||
2227     reg ==  R1_num || reg == R1_H_num ||
2228     reg ==  R2_num || reg == R2_H_num ||
2229     reg ==  R3_num || reg == R3_H_num ||
2230     reg ==  R4_num || reg == R4_H_num ||
2231     reg ==  R5_num || reg == R5_H_num ||
2232     reg ==  R6_num || reg == R6_H_num ||
2233     reg ==  R7_num || reg == R7_H_num ||
2234     reg ==  V0_num || reg == V0_H_num ||
2235     reg ==  V1_num || reg == V1_H_num ||
2236     reg ==  V2_num || reg == V2_H_num ||
2237     reg ==  V3_num || reg == V3_H_num ||
2238     reg ==  V4_num || reg == V4_H_num ||
2239     reg ==  V5_num || reg == V5_H_num ||
2240     reg ==  V6_num || reg == V6_H_num ||
2241     reg ==  V7_num || reg == V7_H_num;
2242 }
2243 
2244 bool Matcher::is_spillable_arg(int reg)
2245 {
2246   return can_be_java_arg(reg);
2247 }
2248 
2249 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2250   return false;
2251 }
2252 
2253 RegMask Matcher::divI_proj_mask() {
2254   ShouldNotReachHere();
2255   return RegMask();
2256 }
2257 
2258 // Register for MODI projection of divmodI.
2259 RegMask Matcher::modI_proj_mask() {
2260   ShouldNotReachHere();
2261   return RegMask();
2262 }
2263 
2264 // Register for DIVL projection of divmodL.
2265 RegMask Matcher::divL_proj_mask() {
2266   ShouldNotReachHere();
2267   return RegMask();
2268 }
2269 
2270 // Register for MODL projection of divmodL.
2271 RegMask Matcher::modL_proj_mask() {
2272   ShouldNotReachHere();
2273   return RegMask();
2274 }
2275 
2276 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2277   return FP_REG_mask();
2278 }
2279 
2280 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
2281   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
2282     Node* u = addp->fast_out(i);
2283     if (u->is_Mem()) {
2284       int opsize = u->as_Mem()->memory_size();
2285       assert(opsize > 0, "unexpected memory operand size");
2286       if (u->as_Mem()->memory_size() != (1<<shift)) {
2287         return false;
2288       }
2289     }
2290   }
2291   return true;
2292 }
2293 
2294 const bool Matcher::convi2l_type_required = false;
2295 
2296 // Should the Matcher clone shifts on addressing modes, expecting them
2297 // to be subsumed into complex addressing expressions or compute them
2298 // into registers?
2299 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
2300   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
2301     return true;
2302   }
2303 
2304   Node *off = m->in(AddPNode::Offset);
2305   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
2306       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
2307       // Are there other uses besides address expressions?
2308       !is_visited(off)) {
2309     address_visited.set(off->_idx); // Flag as address_visited
2310     mstack.push(off->in(2), Visit);
2311     Node *conv = off->in(1);
2312     if (conv->Opcode() == Op_ConvI2L &&
2313         // Are there other uses besides address expressions?
2314         !is_visited(conv)) {
2315       address_visited.set(conv->_idx); // Flag as address_visited
2316       mstack.push(conv->in(1), Pre_Visit);
2317     } else {
2318       mstack.push(conv, Pre_Visit);
2319     }
2320     address_visited.test_set(m->_idx); // Flag as address_visited
2321     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2322     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2323     return true;
2324   } else if (off->Opcode() == Op_ConvI2L &&
2325              // Are there other uses besides address expressions?
2326              !is_visited(off)) {
2327     address_visited.test_set(m->_idx); // Flag as address_visited
2328     address_visited.set(off->_idx); // Flag as address_visited
2329     mstack.push(off->in(1), Pre_Visit);
2330     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2331     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2332     return true;
2333   }
2334   return false;
2335 }
2336 
2337 void Compile::reshape_address(AddPNode* addp) {
2338 }
2339 
2340 
2341 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2342   MacroAssembler _masm(&cbuf);                                          \
2343   {                                                                     \
2344     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2345     guarantee(DISP == 0, "mode not permitted for volatile");            \
2346     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2347     __ INSN(REG, as_Register(BASE));                                    \
2348   }
2349 
2350 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2351 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2352 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2353                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2354 
2355   // Used for all non-volatile memory accesses.  The use of
2356   // $mem->opcode() to discover whether this pattern uses sign-extended
2357   // offsets is something of a kludge.
2358   static void loadStore(MacroAssembler masm, mem_insn insn,
2359                          Register reg, int opcode,
2360                          Register base, int index, int size, int disp)
2361   {
2362     Address::extend scale;
2363 
2364     // Hooboy, this is fugly.  We need a way to communicate to the
2365     // encoder that the index needs to be sign extended, so we have to
2366     // enumerate all the cases.
2367     switch (opcode) {
2368     case INDINDEXSCALEDI2L:
2369     case INDINDEXSCALEDI2LN:
2370     case INDINDEXI2L:
2371     case INDINDEXI2LN:
2372       scale = Address::sxtw(size);
2373       break;
2374     default:
2375       scale = Address::lsl(size);
2376     }
2377 
2378     if (index == -1) {
2379       (masm.*insn)(reg, Address(base, disp));
2380     } else {
2381       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2382       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2383     }
2384   }
2385 
2386   static void loadStore(MacroAssembler masm, mem_float_insn insn,
2387                          FloatRegister reg, int opcode,
2388                          Register base, int index, int size, int disp)
2389   {
2390     Address::extend scale;
2391 
2392     switch (opcode) {
2393     case INDINDEXSCALEDI2L:
2394     case INDINDEXSCALEDI2LN:
2395       scale = Address::sxtw(size);
2396       break;
2397     default:
2398       scale = Address::lsl(size);
2399     }
2400 
2401      if (index == -1) {
2402       (masm.*insn)(reg, Address(base, disp));
2403     } else {
2404       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2405       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2406     }
2407   }
2408 
2409   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2410                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2411                          int opcode, Register base, int index, int size, int disp)
2412   {
2413     if (index == -1) {
2414       (masm.*insn)(reg, T, Address(base, disp));
2415     } else {
2416       assert(disp == 0, "unsupported address mode");
2417       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2418     }
2419   }
2420 
2421 %}
2422 
2423 
2424 
2425 //----------ENCODING BLOCK-----------------------------------------------------
2426 // This block specifies the encoding classes used by the compiler to
2427 // output byte streams.  Encoding classes are parameterized macros
2428 // used by Machine Instruction Nodes in order to generate the bit
2429 // encoding of the instruction.  Operands specify their base encoding
2430 // interface with the interface keyword.  There are currently
2431 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2432 // COND_INTER.  REG_INTER causes an operand to generate a function
2433 // which returns its register number when queried.  CONST_INTER causes
2434 // an operand to generate a function which returns the value of the
2435 // constant when queried.  MEMORY_INTER causes an operand to generate
2436 // four functions which return the Base Register, the Index Register,
2437 // the Scale Value, and the Offset Value of the operand when queried.
2438 // COND_INTER causes an operand to generate six functions which return
2439 // the encoding code (ie - encoding bits for the instruction)
2440 // associated with each basic boolean condition for a conditional
2441 // instruction.
2442 //
2443 // Instructions specify two basic values for encoding.  Again, a
2444 // function is available to check if the constant displacement is an
2445 // oop. They use the ins_encode keyword to specify their encoding
2446 // classes (which must be a sequence of enc_class names, and their
2447 // parameters, specified in the encoding block), and they use the
2448 // opcode keyword to specify, in order, their primary, secondary, and
2449 // tertiary opcode.  Only the opcode sections which a particular
2450 // instruction needs for encoding need to be specified.
2451 encode %{
2452   // Build emit functions for each basic byte or larger field in the
2453   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2454   // from C++ code in the enc_class source block.  Emit functions will
2455   // live in the main source block for now.  In future, we can
2456   // generalize this by adding a syntax that specifies the sizes of
2457   // fields in an order, so that the adlc can build the emit functions
2458   // automagically
2459 
2460   // catch all for unimplemented encodings
2461   enc_class enc_unimplemented %{
2462     MacroAssembler _masm(&cbuf);
2463     __ unimplemented("C2 catch all");
2464   %}
2465 
2466   // BEGIN Non-volatile memory access
2467 
2468   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
2469     Register dst_reg = as_Register($dst$$reg);
2470     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
2471                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2472   %}
2473 
2474   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
2475     Register dst_reg = as_Register($dst$$reg);
2476     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
2477                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2478   %}
2479 
2480   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
2481     Register dst_reg = as_Register($dst$$reg);
2482     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2483                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2484   %}
2485 
2486   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
2487     Register dst_reg = as_Register($dst$$reg);
2488     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2489                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2490   %}
2491 
2492   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
2493     Register dst_reg = as_Register($dst$$reg);
2494     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
2495                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2496   %}
2497 
2498   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
2499     Register dst_reg = as_Register($dst$$reg);
2500     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
2501                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2502   %}
2503 
2504   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
2505     Register dst_reg = as_Register($dst$$reg);
2506     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2507                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2508   %}
2509 
2510   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
2511     Register dst_reg = as_Register($dst$$reg);
2512     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2513                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2514   %}
2515 
2516   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
2517     Register dst_reg = as_Register($dst$$reg);
2518     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2519                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2520   %}
2521 
2522   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
2523     Register dst_reg = as_Register($dst$$reg);
2524     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2525                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2526   %}
2527 
2528   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
2529     Register dst_reg = as_Register($dst$$reg);
2530     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
2531                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2532   %}
2533 
2534   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
2535     Register dst_reg = as_Register($dst$$reg);
2536     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2537                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2538   %}
2539 
2540   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
2541     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2542     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2543                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2544   %}
2545 
2546   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
2547     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2548     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2549                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2550   %}
2551 
2552   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2553     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2554     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2555        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2556   %}
2557 
2558   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2559     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2560     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2561        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2562   %}
2563 
2564   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2565     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2566     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2567        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2568   %}
2569 
2570   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
2571     Register src_reg = as_Register($src$$reg);
2572     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2573                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2574   %}
2575 
2576   enc_class aarch64_enc_strb0(memory mem) %{
2577     MacroAssembler _masm(&cbuf);
2578     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2579                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2580   %}
2581 
2582   enc_class aarch64_enc_strb0_ordered(memory mem) %{
2583     MacroAssembler _masm(&cbuf);
2584     __ membar(Assembler::StoreStore);
2585     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2586                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2587   %}
2588 
2589   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
2590     Register src_reg = as_Register($src$$reg);
2591     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2592                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2593   %}
2594 
2595   enc_class aarch64_enc_strh0(memory mem) %{
2596     MacroAssembler _masm(&cbuf);
2597     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
2598                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2599   %}
2600 
2601   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
2602     Register src_reg = as_Register($src$$reg);
2603     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
2604                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2605   %}
2606 
2607   enc_class aarch64_enc_strw0(memory mem) %{
2608     MacroAssembler _masm(&cbuf);
2609     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
2610                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2611   %}
2612 
2613   enc_class aarch64_enc_str(iRegL src, memory mem) %{
2614     Register src_reg = as_Register($src$$reg);
2615     // we sometimes get asked to store the stack pointer into the
2616     // current thread -- we cannot do that directly on AArch64
2617     if (src_reg == r31_sp) {
2618       MacroAssembler _masm(&cbuf);
2619       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2620       __ mov(rscratch2, sp);
2621       src_reg = rscratch2;
2622     }
2623     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
2624                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2625   %}
2626 
2627   enc_class aarch64_enc_str0(memory mem) %{
2628     MacroAssembler _masm(&cbuf);
2629     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2630                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2631   %}
2632 
2633   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
2634     FloatRegister src_reg = as_FloatRegister($src$$reg);
2635     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2636                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2637   %}
2638 
2639   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
2640     FloatRegister src_reg = as_FloatRegister($src$$reg);
2641     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
2642                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2643   %}
2644 
2645   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
2646     FloatRegister src_reg = as_FloatRegister($src$$reg);
2647     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
2648        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2649   %}
2650 
2651   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
2652     FloatRegister src_reg = as_FloatRegister($src$$reg);
2653     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
2654        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2655   %}
2656 
2657   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
2658     FloatRegister src_reg = as_FloatRegister($src$$reg);
2659     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
2660        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2661   %}
2662 
2663   // END Non-volatile memory access
2664 
2665   // volatile loads and stores
2666 
2667   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
2668     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2669                  rscratch1, stlrb);
2670   %}
2671 
2672   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
2673     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2674                  rscratch1, stlrh);
2675   %}
2676 
2677   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
2678     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2679                  rscratch1, stlrw);
2680   %}
2681 
2682 
2683   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
2684     Register dst_reg = as_Register($dst$$reg);
2685     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2686              rscratch1, ldarb);
2687     __ sxtbw(dst_reg, dst_reg);
2688   %}
2689 
2690   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
2691     Register dst_reg = as_Register($dst$$reg);
2692     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2693              rscratch1, ldarb);
2694     __ sxtb(dst_reg, dst_reg);
2695   %}
2696 
2697   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
2698     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2699              rscratch1, ldarb);
2700   %}
2701 
2702   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
2703     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2704              rscratch1, ldarb);
2705   %}
2706 
2707   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
2708     Register dst_reg = as_Register($dst$$reg);
2709     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2710              rscratch1, ldarh);
2711     __ sxthw(dst_reg, dst_reg);
2712   %}
2713 
2714   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
2715     Register dst_reg = as_Register($dst$$reg);
2716     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2717              rscratch1, ldarh);
2718     __ sxth(dst_reg, dst_reg);
2719   %}
2720 
2721   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
2722     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2723              rscratch1, ldarh);
2724   %}
2725 
2726   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
2727     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2728              rscratch1, ldarh);
2729   %}
2730 
2731   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
2732     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2733              rscratch1, ldarw);
2734   %}
2735 
2736   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
2737     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2738              rscratch1, ldarw);
2739   %}
2740 
2741   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
2742     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2743              rscratch1, ldar);
2744   %}
2745 
2746   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
2747     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2748              rscratch1, ldarw);
2749     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
2750   %}
2751 
2752   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
2753     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2754              rscratch1, ldar);
2755     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
2756   %}
2757 
2758   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
2759     Register src_reg = as_Register($src$$reg);
2760     // we sometimes get asked to store the stack pointer into the
2761     // current thread -- we cannot do that directly on AArch64
2762     if (src_reg == r31_sp) {
2763         MacroAssembler _masm(&cbuf);
2764       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2765       __ mov(rscratch2, sp);
2766       src_reg = rscratch2;
2767     }
2768     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2769                  rscratch1, stlr);
2770   %}
2771 
2772   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
2773     {
2774       MacroAssembler _masm(&cbuf);
2775       FloatRegister src_reg = as_FloatRegister($src$$reg);
2776       __ fmovs(rscratch2, src_reg);
2777     }
2778     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2779                  rscratch1, stlrw);
2780   %}
2781 
2782   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
2783     {
2784       MacroAssembler _masm(&cbuf);
2785       FloatRegister src_reg = as_FloatRegister($src$$reg);
2786       __ fmovd(rscratch2, src_reg);
2787     }
2788     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2789                  rscratch1, stlr);
2790   %}
2791 
2792   // synchronized read/update encodings
2793 
2794   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
2795     MacroAssembler _masm(&cbuf);
2796     Register dst_reg = as_Register($dst$$reg);
2797     Register base = as_Register($mem$$base);
2798     int index = $mem$$index;
2799     int scale = $mem$$scale;
2800     int disp = $mem$$disp;
2801     if (index == -1) {
2802        if (disp != 0) {
2803         __ lea(rscratch1, Address(base, disp));
2804         __ ldaxr(dst_reg, rscratch1);
2805       } else {
2806         // TODO
2807         // should we ever get anything other than this case?
2808         __ ldaxr(dst_reg, base);
2809       }
2810     } else {
2811       Register index_reg = as_Register(index);
2812       if (disp == 0) {
2813         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
2814         __ ldaxr(dst_reg, rscratch1);
2815       } else {
2816         __ lea(rscratch1, Address(base, disp));
2817         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
2818         __ ldaxr(dst_reg, rscratch1);
2819       }
2820     }
2821   %}
2822 
2823   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
2824     MacroAssembler _masm(&cbuf);
2825     Register src_reg = as_Register($src$$reg);
2826     Register base = as_Register($mem$$base);
2827     int index = $mem$$index;
2828     int scale = $mem$$scale;
2829     int disp = $mem$$disp;
2830     if (index == -1) {
2831        if (disp != 0) {
2832         __ lea(rscratch2, Address(base, disp));
2833         __ stlxr(rscratch1, src_reg, rscratch2);
2834       } else {
2835         // TODO
2836         // should we ever get anything other than this case?
2837         __ stlxr(rscratch1, src_reg, base);
2838       }
2839     } else {
2840       Register index_reg = as_Register(index);
2841       if (disp == 0) {
2842         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
2843         __ stlxr(rscratch1, src_reg, rscratch2);
2844       } else {
2845         __ lea(rscratch2, Address(base, disp));
2846         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
2847         __ stlxr(rscratch1, src_reg, rscratch2);
2848       }
2849     }
2850     __ cmpw(rscratch1, zr);
2851   %}
2852 
2853   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2854     MacroAssembler _masm(&cbuf);
2855     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2856     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2857                Assembler::xword, /*acquire*/ false, /*release*/ true,
2858                /*weak*/ false, noreg);
2859   %}
2860 
2861   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2862     MacroAssembler _masm(&cbuf);
2863     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2864     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2865                Assembler::word, /*acquire*/ false, /*release*/ true,
2866                /*weak*/ false, noreg);
2867   %}
2868 
2869   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2870     MacroAssembler _masm(&cbuf);
2871     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2872     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2873                Assembler::halfword, /*acquire*/ false, /*release*/ true,
2874                /*weak*/ false, noreg);
2875   %}
2876 
2877   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2878     MacroAssembler _masm(&cbuf);
2879     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2880     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2881                Assembler::byte, /*acquire*/ false, /*release*/ true,
2882                /*weak*/ false, noreg);
2883   %}
2884 
2885 
2886   // The only difference between aarch64_enc_cmpxchg and
2887   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
2888   // CompareAndSwap sequence to serve as a barrier on acquiring a
2889   // lock.
2890   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2891     MacroAssembler _masm(&cbuf);
2892     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2893     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2894                Assembler::xword, /*acquire*/ true, /*release*/ true,
2895                /*weak*/ false, noreg);
2896   %}
2897 
2898   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2899     MacroAssembler _masm(&cbuf);
2900     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2901     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2902                Assembler::word, /*acquire*/ true, /*release*/ true,
2903                /*weak*/ false, noreg);
2904   %}
2905 
2906 
2907   // auxiliary used for CompareAndSwapX to set result register
2908   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
2909     MacroAssembler _masm(&cbuf);
2910     Register res_reg = as_Register($res$$reg);
2911     __ cset(res_reg, Assembler::EQ);
2912   %}
2913 
2914   // prefetch encodings
2915 
2916   enc_class aarch64_enc_prefetchw(memory mem) %{
2917     MacroAssembler _masm(&cbuf);
2918     Register base = as_Register($mem$$base);
2919     int index = $mem$$index;
2920     int scale = $mem$$scale;
2921     int disp = $mem$$disp;
2922     if (index == -1) {
2923       __ prfm(Address(base, disp), PSTL1KEEP);
2924     } else {
2925       Register index_reg = as_Register(index);
2926       if (disp == 0) {
2927         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
2928       } else {
2929         __ lea(rscratch1, Address(base, disp));
2930         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
2931       }
2932     }
2933   %}
2934 
2935   /// mov envcodings
2936 
2937   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
2938     MacroAssembler _masm(&cbuf);
2939     u_int32_t con = (u_int32_t)$src$$constant;
2940     Register dst_reg = as_Register($dst$$reg);
2941     if (con == 0) {
2942       __ movw(dst_reg, zr);
2943     } else {
2944       __ movw(dst_reg, con);
2945     }
2946   %}
2947 
2948   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
2949     MacroAssembler _masm(&cbuf);
2950     Register dst_reg = as_Register($dst$$reg);
2951     u_int64_t con = (u_int64_t)$src$$constant;
2952     if (con == 0) {
2953       __ mov(dst_reg, zr);
2954     } else {
2955       __ mov(dst_reg, con);
2956     }
2957   %}
2958 
2959   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
2960     MacroAssembler _masm(&cbuf);
2961     Register dst_reg = as_Register($dst$$reg);
2962     address con = (address)$src$$constant;
2963     if (con == NULL || con == (address)1) {
2964       ShouldNotReachHere();
2965     } else {
2966       relocInfo::relocType rtype = $src->constant_reloc();
2967       if (rtype == relocInfo::oop_type) {
2968         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
2969       } else if (rtype == relocInfo::metadata_type) {
2970         __ mov_metadata(dst_reg, (Metadata*)con);
2971       } else {
2972         assert(rtype == relocInfo::none, "unexpected reloc type");
2973         if (con < (address)(uintptr_t)os::vm_page_size()) {
2974           __ mov(dst_reg, con);
2975         } else {
2976           unsigned long offset;
2977           __ adrp(dst_reg, con, offset);
2978           __ add(dst_reg, dst_reg, offset);
2979         }
2980       }
2981     }
2982   %}
2983 
2984   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
2985     MacroAssembler _masm(&cbuf);
2986     Register dst_reg = as_Register($dst$$reg);
2987     __ mov(dst_reg, zr);
2988   %}
2989 
2990   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
2991     MacroAssembler _masm(&cbuf);
2992     Register dst_reg = as_Register($dst$$reg);
2993     __ mov(dst_reg, (u_int64_t)1);
2994   %}
2995 
2996   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
2997     MacroAssembler _masm(&cbuf);
2998     address page = (address)$src$$constant;
2999     Register dst_reg = as_Register($dst$$reg);
3000     unsigned long off;
3001     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
3002     assert(off == 0, "assumed offset == 0");
3003   %}
3004 
3005   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
3006     MacroAssembler _masm(&cbuf);
3007     __ load_byte_map_base($dst$$Register);
3008   %}
3009 
3010   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
3011     MacroAssembler _masm(&cbuf);
3012     Register dst_reg = as_Register($dst$$reg);
3013     address con = (address)$src$$constant;
3014     if (con == NULL) {
3015       ShouldNotReachHere();
3016     } else {
3017       relocInfo::relocType rtype = $src->constant_reloc();
3018       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
3019       __ set_narrow_oop(dst_reg, (jobject)con);
3020     }
3021   %}
3022 
3023   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
3024     MacroAssembler _masm(&cbuf);
3025     Register dst_reg = as_Register($dst$$reg);
3026     __ mov(dst_reg, zr);
3027   %}
3028 
3029   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
3030     MacroAssembler _masm(&cbuf);
3031     Register dst_reg = as_Register($dst$$reg);
3032     address con = (address)$src$$constant;
3033     if (con == NULL) {
3034       ShouldNotReachHere();
3035     } else {
3036       relocInfo::relocType rtype = $src->constant_reloc();
3037       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3038       __ set_narrow_klass(dst_reg, (Klass *)con);
3039     }
3040   %}
3041 
3042   // arithmetic encodings
3043 
3044   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3045     MacroAssembler _masm(&cbuf);
3046     Register dst_reg = as_Register($dst$$reg);
3047     Register src_reg = as_Register($src1$$reg);
3048     int32_t con = (int32_t)$src2$$constant;
3049     // add has primary == 0, subtract has primary == 1
3050     if ($primary) { con = -con; }
3051     if (con < 0) {
3052       __ subw(dst_reg, src_reg, -con);
3053     } else {
3054       __ addw(dst_reg, src_reg, con);
3055     }
3056   %}
3057 
3058   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3059     MacroAssembler _masm(&cbuf);
3060     Register dst_reg = as_Register($dst$$reg);
3061     Register src_reg = as_Register($src1$$reg);
3062     int32_t con = (int32_t)$src2$$constant;
3063     // add has primary == 0, subtract has primary == 1
3064     if ($primary) { con = -con; }
3065     if (con < 0) {
3066       __ sub(dst_reg, src_reg, -con);
3067     } else {
3068       __ add(dst_reg, src_reg, con);
3069     }
3070   %}
3071 
3072   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3073     MacroAssembler _masm(&cbuf);
3074    Register dst_reg = as_Register($dst$$reg);
3075    Register src1_reg = as_Register($src1$$reg);
3076    Register src2_reg = as_Register($src2$$reg);
3077     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3078   %}
3079 
3080   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3081     MacroAssembler _masm(&cbuf);
3082    Register dst_reg = as_Register($dst$$reg);
3083    Register src1_reg = as_Register($src1$$reg);
3084    Register src2_reg = as_Register($src2$$reg);
3085     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3086   %}
3087 
3088   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3089     MacroAssembler _masm(&cbuf);
3090    Register dst_reg = as_Register($dst$$reg);
3091    Register src1_reg = as_Register($src1$$reg);
3092    Register src2_reg = as_Register($src2$$reg);
3093     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3094   %}
3095 
3096   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3097     MacroAssembler _masm(&cbuf);
3098    Register dst_reg = as_Register($dst$$reg);
3099    Register src1_reg = as_Register($src1$$reg);
3100    Register src2_reg = as_Register($src2$$reg);
3101     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3102   %}
3103 
3104   // compare instruction encodings
3105 
3106   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3107     MacroAssembler _masm(&cbuf);
3108     Register reg1 = as_Register($src1$$reg);
3109     Register reg2 = as_Register($src2$$reg);
3110     __ cmpw(reg1, reg2);
3111   %}
3112 
3113   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3114     MacroAssembler _masm(&cbuf);
3115     Register reg = as_Register($src1$$reg);
3116     int32_t val = $src2$$constant;
3117     if (val >= 0) {
3118       __ subsw(zr, reg, val);
3119     } else {
3120       __ addsw(zr, reg, -val);
3121     }
3122   %}
3123 
3124   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3125     MacroAssembler _masm(&cbuf);
3126     Register reg1 = as_Register($src1$$reg);
3127     u_int32_t val = (u_int32_t)$src2$$constant;
3128     __ movw(rscratch1, val);
3129     __ cmpw(reg1, rscratch1);
3130   %}
3131 
3132   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3133     MacroAssembler _masm(&cbuf);
3134     Register reg1 = as_Register($src1$$reg);
3135     Register reg2 = as_Register($src2$$reg);
3136     __ cmp(reg1, reg2);
3137   %}
3138 
3139   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3140     MacroAssembler _masm(&cbuf);
3141     Register reg = as_Register($src1$$reg);
3142     int64_t val = $src2$$constant;
3143     if (val >= 0) {
3144       __ subs(zr, reg, val);
3145     } else if (val != -val) {
3146       __ adds(zr, reg, -val);
3147     } else {
3148     // aargh, Long.MIN_VALUE is a special case
3149       __ orr(rscratch1, zr, (u_int64_t)val);
3150       __ subs(zr, reg, rscratch1);
3151     }
3152   %}
3153 
3154   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3155     MacroAssembler _masm(&cbuf);
3156     Register reg1 = as_Register($src1$$reg);
3157     u_int64_t val = (u_int64_t)$src2$$constant;
3158     __ mov(rscratch1, val);
3159     __ cmp(reg1, rscratch1);
3160   %}
3161 
3162   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3163     MacroAssembler _masm(&cbuf);
3164     Register reg1 = as_Register($src1$$reg);
3165     Register reg2 = as_Register($src2$$reg);
3166     __ cmp(reg1, reg2);
3167   %}
3168 
3169   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3170     MacroAssembler _masm(&cbuf);
3171     Register reg1 = as_Register($src1$$reg);
3172     Register reg2 = as_Register($src2$$reg);
3173     __ cmpw(reg1, reg2);
3174   %}
3175 
3176   enc_class aarch64_enc_testp(iRegP src) %{
3177     MacroAssembler _masm(&cbuf);
3178     Register reg = as_Register($src$$reg);
3179     __ cmp(reg, zr);
3180   %}
3181 
3182   enc_class aarch64_enc_testn(iRegN src) %{
3183     MacroAssembler _masm(&cbuf);
3184     Register reg = as_Register($src$$reg);
3185     __ cmpw(reg, zr);
3186   %}
3187 
3188   enc_class aarch64_enc_b(label lbl) %{
3189     MacroAssembler _masm(&cbuf);
3190     Label *L = $lbl$$label;
3191     __ b(*L);
3192   %}
3193 
3194   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3195     MacroAssembler _masm(&cbuf);
3196     Label *L = $lbl$$label;
3197     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3198   %}
3199 
3200   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3201     MacroAssembler _masm(&cbuf);
3202     Label *L = $lbl$$label;
3203     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3204   %}
3205 
3206   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3207   %{
3208      Register sub_reg = as_Register($sub$$reg);
3209      Register super_reg = as_Register($super$$reg);
3210      Register temp_reg = as_Register($temp$$reg);
3211      Register result_reg = as_Register($result$$reg);
3212 
3213      Label miss;
3214      MacroAssembler _masm(&cbuf);
3215      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3216                                      NULL, &miss,
3217                                      /*set_cond_codes:*/ true);
3218      if ($primary) {
3219        __ mov(result_reg, zr);
3220      }
3221      __ bind(miss);
3222   %}
3223 
3224   enc_class aarch64_enc_java_static_call(method meth) %{
3225     MacroAssembler _masm(&cbuf);
3226 
3227     address addr = (address)$meth$$method;
3228     address call;
3229     if (!_method) {
3230       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3231       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3232     } else {
3233       int method_index = resolved_method_index(cbuf);
3234       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
3235                                                   : static_call_Relocation::spec(method_index);
3236       call = __ trampoline_call(Address(addr, rspec), &cbuf);
3237 
3238       // Emit stub for static call
3239       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
3240       if (stub == NULL) {
3241         ciEnv::current()->record_failure("CodeCache is full");
3242         return;
3243       }
3244     }
3245     if (call == NULL) {
3246       ciEnv::current()->record_failure("CodeCache is full");
3247       return;
3248     }
3249   %}
3250 
3251   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3252     MacroAssembler _masm(&cbuf);
3253     int method_index = resolved_method_index(cbuf);
3254     address call = __ ic_call((address)$meth$$method, method_index);
3255     if (call == NULL) {
3256       ciEnv::current()->record_failure("CodeCache is full");
3257       return;
3258     }
3259   %}
3260 
3261   enc_class aarch64_enc_call_epilog() %{
3262     MacroAssembler _masm(&cbuf);
3263     if (VerifyStackAtCalls) {
3264       // Check that stack depth is unchanged: find majik cookie on stack
3265       __ call_Unimplemented();
3266     }
3267   %}
3268 
3269   enc_class aarch64_enc_java_to_runtime(method meth) %{
3270     MacroAssembler _masm(&cbuf);
3271 
3272     // some calls to generated routines (arraycopy code) are scheduled
3273     // by C2 as runtime calls. if so we can call them using a br (they
3274     // will be in a reachable segment) otherwise we have to use a blr
3275     // which loads the absolute address into a register.
3276     address entry = (address)$meth$$method;
3277     CodeBlob *cb = CodeCache::find_blob(entry);
3278     if (cb) {
3279       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3280       if (call == NULL) {
3281         ciEnv::current()->record_failure("CodeCache is full");
3282         return;
3283       }
3284     } else {
3285       Label retaddr;
3286       __ adr(rscratch2, retaddr);
3287       __ lea(rscratch1, RuntimeAddress(entry));
3288       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3289       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3290       __ blr(rscratch1);
3291       __ bind(retaddr);
3292       __ add(sp, sp, 2 * wordSize);
3293     }
3294   %}
3295 
3296   enc_class aarch64_enc_rethrow() %{
3297     MacroAssembler _masm(&cbuf);
3298     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3299   %}
3300 
3301   enc_class aarch64_enc_ret() %{
3302     MacroAssembler _masm(&cbuf);
3303     __ ret(lr);
3304   %}
3305 
3306   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3307     MacroAssembler _masm(&cbuf);
3308     Register target_reg = as_Register($jump_target$$reg);
3309     __ br(target_reg);
3310   %}
3311 
3312   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3313     MacroAssembler _masm(&cbuf);
3314     Register target_reg = as_Register($jump_target$$reg);
3315     // exception oop should be in r0
3316     // ret addr has been popped into lr
3317     // callee expects it in r3
3318     __ mov(r3, lr);
3319     __ br(target_reg);
3320   %}
3321 
3322   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3323     MacroAssembler _masm(&cbuf);
3324     Register oop = as_Register($object$$reg);
3325     Register box = as_Register($box$$reg);
3326     Register disp_hdr = as_Register($tmp$$reg);
3327     Register tmp = as_Register($tmp2$$reg);
3328     Label cont;
3329     Label object_has_monitor;
3330     Label cas_failed;
3331 
3332     assert_different_registers(oop, box, tmp, disp_hdr);
3333 
3334     // Load markOop from object into displaced_header.
3335     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3336 
3337     // Always do locking in runtime.
3338     if (EmitSync & 0x01) {
3339       __ cmp(oop, zr);
3340       return;
3341     }
3342 
3343     if (UseBiasedLocking && !UseOptoBiasInlining) {
3344       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
3345     }
3346 
3347     // Check for existing monitor
3348     if ((EmitSync & 0x02) == 0) {
3349       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3350     }
3351 
3352     // Set tmp to be (markOop of object | UNLOCK_VALUE).
3353     __ orr(tmp, disp_hdr, markOopDesc::unlocked_value);
3354 
3355     // Initialize the box. (Must happen before we update the object mark!)
3356     __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3357 
3358     // Compare object markOop with an unlocked value (tmp) and if
3359     // equal exchange the stack address of our box with object markOop.
3360     // On failure disp_hdr contains the possibly locked markOop.
3361     __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
3362                /*release*/ true, /*weak*/ false, disp_hdr);
3363     __ br(Assembler::EQ, cont);
3364 
3365     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3366 
3367     // If the compare-and-exchange succeeded, then we found an unlocked
3368     // object, will have now locked it will continue at label cont
3369 
3370     __ bind(cas_failed);
3371     // We did not see an unlocked object so try the fast recursive case.
3372 
3373     // Check if the owner is self by comparing the value in the
3374     // markOop of object (disp_hdr) with the stack pointer.
3375     __ mov(rscratch1, sp);
3376     __ sub(disp_hdr, disp_hdr, rscratch1);
3377     __ mov(tmp, (address) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
3378     // If condition is true we are cont and hence we can store 0 as the
3379     // displaced header in the box, which indicates that it is a recursive lock.
3380     __ ands(tmp/*==0?*/, disp_hdr, tmp);   // Sets flags for result
3381     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3382 
3383     if ((EmitSync & 0x02) == 0) {
3384       __ b(cont);
3385 
3386       // Handle existing monitor.
3387       __ bind(object_has_monitor);
3388       // The object's monitor m is unlocked iff m->owner == NULL,
3389       // otherwise m->owner may contain a thread or a stack address.
3390       //
3391       // Try to CAS m->owner from NULL to current thread.
3392       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
3393     __ cmpxchg(tmp, zr, rthread, Assembler::xword, /*acquire*/ true,
3394                /*release*/ true, /*weak*/ false, noreg); // Sets flags for result
3395 
3396       // Store a non-null value into the box to avoid looking like a re-entrant
3397       // lock. The fast-path monitor unlock code checks for
3398       // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
3399       // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
3400       __ mov(tmp, (address)markOopDesc::unused_mark());
3401       __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3402     }
3403 
3404     __ bind(cont);
3405     // flag == EQ indicates success
3406     // flag == NE indicates failure
3407   %}
3408 
3409   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3410     MacroAssembler _masm(&cbuf);
3411     Register oop = as_Register($object$$reg);
3412     Register box = as_Register($box$$reg);
3413     Register disp_hdr = as_Register($tmp$$reg);
3414     Register tmp = as_Register($tmp2$$reg);
3415     Label cont;
3416     Label object_has_monitor;
3417 
3418     assert_different_registers(oop, box, tmp, disp_hdr);
3419 
3420     // Always do locking in runtime.
3421     if (EmitSync & 0x01) {
3422       __ cmp(oop, zr); // Oop can't be 0 here => always false.
3423       return;
3424     }
3425 
3426     if (UseBiasedLocking && !UseOptoBiasInlining) {
3427       __ biased_locking_exit(oop, tmp, cont);
3428     }
3429 
3430     // Find the lock address and load the displaced header from the stack.
3431     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3432 
3433     // If the displaced header is 0, we have a recursive unlock.
3434     __ cmp(disp_hdr, zr);
3435     __ br(Assembler::EQ, cont);
3436 
3437     // Handle existing monitor.
3438     if ((EmitSync & 0x02) == 0) {
3439       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
3440       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3441     }
3442 
3443     // Check if it is still a light weight lock, this is is true if we
3444     // see the stack address of the basicLock in the markOop of the
3445     // object.
3446 
3447     __ cmpxchg(oop, box, disp_hdr, Assembler::xword, /*acquire*/ false,
3448                /*release*/ true, /*weak*/ false, tmp);
3449     __ b(cont);
3450 
3451     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3452 
3453     // Handle existing monitor.
3454     if ((EmitSync & 0x02) == 0) {
3455       __ bind(object_has_monitor);
3456       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
3457       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3458       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
3459       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
3460       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
3461       __ cmp(rscratch1, zr); // Sets flags for result
3462       __ br(Assembler::NE, cont);
3463 
3464       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
3465       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
3466       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
3467       __ cmp(rscratch1, zr); // Sets flags for result
3468       __ cbnz(rscratch1, cont);
3469       // need a release store here
3470       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3471       __ stlr(zr, tmp); // set unowned
3472     }
3473 
3474     __ bind(cont);
3475     // flag == EQ indicates success
3476     // flag == NE indicates failure
3477   %}
3478 
3479 %}
3480 
3481 //----------FRAME--------------------------------------------------------------
3482 // Definition of frame structure and management information.
3483 //
3484 //  S T A C K   L A Y O U T    Allocators stack-slot number
3485 //                             |   (to get allocators register number
3486 //  G  Owned by    |        |  v    add OptoReg::stack0())
3487 //  r   CALLER     |        |
3488 //  o     |        +--------+      pad to even-align allocators stack-slot
3489 //  w     V        |  pad0  |        numbers; owned by CALLER
3490 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3491 //  h     ^        |   in   |  5
3492 //        |        |  args  |  4   Holes in incoming args owned by SELF
3493 //  |     |        |        |  3
3494 //  |     |        +--------+
3495 //  V     |        | old out|      Empty on Intel, window on Sparc
3496 //        |    old |preserve|      Must be even aligned.
3497 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3498 //        |        |   in   |  3   area for Intel ret address
3499 //     Owned by    |preserve|      Empty on Sparc.
3500 //       SELF      +--------+
3501 //        |        |  pad2  |  2   pad to align old SP
3502 //        |        +--------+  1
3503 //        |        | locks  |  0
3504 //        |        +--------+----> OptoReg::stack0(), even aligned
3505 //        |        |  pad1  | 11   pad to align new SP
3506 //        |        +--------+
3507 //        |        |        | 10
3508 //        |        | spills |  9   spills
3509 //        V        |        |  8   (pad0 slot for callee)
3510 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3511 //        ^        |  out   |  7
3512 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3513 //     Owned by    +--------+
3514 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3515 //        |    new |preserve|      Must be even-aligned.
3516 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3517 //        |        |        |
3518 //
3519 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3520 //         known from SELF's arguments and the Java calling convention.
3521 //         Region 6-7 is determined per call site.
3522 // Note 2: If the calling convention leaves holes in the incoming argument
3523 //         area, those holes are owned by SELF.  Holes in the outgoing area
3524 //         are owned by the CALLEE.  Holes should not be nessecary in the
3525 //         incoming area, as the Java calling convention is completely under
3526 //         the control of the AD file.  Doubles can be sorted and packed to
3527 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3528 //         varargs C calling conventions.
3529 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3530 //         even aligned with pad0 as needed.
3531 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3532 //           (the latter is true on Intel but is it false on AArch64?)
3533 //         region 6-11 is even aligned; it may be padded out more so that
3534 //         the region from SP to FP meets the minimum stack alignment.
3535 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3536 //         alignment.  Region 11, pad1, may be dynamically extended so that
3537 //         SP meets the minimum alignment.
3538 
3539 frame %{
3540   // What direction does stack grow in (assumed to be same for C & Java)
3541   stack_direction(TOWARDS_LOW);
3542 
3543   // These three registers define part of the calling convention
3544   // between compiled code and the interpreter.
3545 
3546   // Inline Cache Register or methodOop for I2C.
3547   inline_cache_reg(R12);
3548 
3549   // Method Oop Register when calling interpreter.
3550   interpreter_method_oop_reg(R12);
3551 
3552   // Number of stack slots consumed by locking an object
3553   sync_stack_slots(2);
3554 
3555   // Compiled code's Frame Pointer
3556   frame_pointer(R31);
3557 
3558   // Interpreter stores its frame pointer in a register which is
3559   // stored to the stack by I2CAdaptors.
3560   // I2CAdaptors convert from interpreted java to compiled java.
3561   interpreter_frame_pointer(R29);
3562 
3563   // Stack alignment requirement
3564   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3565 
3566   // Number of stack slots between incoming argument block and the start of
3567   // a new frame.  The PROLOG must add this many slots to the stack.  The
3568   // EPILOG must remove this many slots. aarch64 needs two slots for
3569   // return address and fp.
3570   // TODO think this is correct but check
3571   in_preserve_stack_slots(4);
3572 
3573   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3574   // for calls to C.  Supports the var-args backing area for register parms.
3575   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3576 
3577   // The after-PROLOG location of the return address.  Location of
3578   // return address specifies a type (REG or STACK) and a number
3579   // representing the register number (i.e. - use a register name) or
3580   // stack slot.
3581   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3582   // Otherwise, it is above the locks and verification slot and alignment word
3583   // TODO this may well be correct but need to check why that - 2 is there
3584   // ppc port uses 0 but we definitely need to allow for fixed_slots
3585   // which folds in the space used for monitors
3586   return_addr(STACK - 2 +
3587               align_up((Compile::current()->in_preserve_stack_slots() +
3588                         Compile::current()->fixed_slots()),
3589                        stack_alignment_in_slots()));
3590 
3591   // Body of function which returns an integer array locating
3592   // arguments either in registers or in stack slots.  Passed an array
3593   // of ideal registers called "sig" and a "length" count.  Stack-slot
3594   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3595   // arguments for a CALLEE.  Incoming stack arguments are
3596   // automatically biased by the preserve_stack_slots field above.
3597 
3598   calling_convention
3599   %{
3600     // No difference between ingoing/outgoing just pass false
3601     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3602   %}
3603 
3604   c_calling_convention
3605   %{
3606     // This is obviously always outgoing
3607     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
3608   %}
3609 
3610   // Location of compiled Java return values.  Same as C for now.
3611   return_value
3612   %{
3613     // TODO do we allow ideal_reg == Op_RegN???
3614     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3615            "only return normal values");
3616 
3617     static const int lo[Op_RegL + 1] = { // enum name
3618       0,                                 // Op_Node
3619       0,                                 // Op_Set
3620       R0_num,                            // Op_RegN
3621       R0_num,                            // Op_RegI
3622       R0_num,                            // Op_RegP
3623       V0_num,                            // Op_RegF
3624       V0_num,                            // Op_RegD
3625       R0_num                             // Op_RegL
3626     };
3627 
3628     static const int hi[Op_RegL + 1] = { // enum name
3629       0,                                 // Op_Node
3630       0,                                 // Op_Set
3631       OptoReg::Bad,                       // Op_RegN
3632       OptoReg::Bad,                      // Op_RegI
3633       R0_H_num,                          // Op_RegP
3634       OptoReg::Bad,                      // Op_RegF
3635       V0_H_num,                          // Op_RegD
3636       R0_H_num                           // Op_RegL
3637     };
3638 
3639     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3640   %}
3641 %}
3642 
3643 //----------ATTRIBUTES---------------------------------------------------------
3644 //----------Operand Attributes-------------------------------------------------
3645 op_attrib op_cost(1);        // Required cost attribute
3646 
3647 //----------Instruction Attributes---------------------------------------------
3648 ins_attrib ins_cost(INSN_COST); // Required cost attribute
3649 ins_attrib ins_size(32);        // Required size attribute (in bits)
3650 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3651                                 // a non-matching short branch variant
3652                                 // of some long branch?
3653 ins_attrib ins_alignment(4);    // Required alignment attribute (must
3654                                 // be a power of 2) specifies the
3655                                 // alignment that some part of the
3656                                 // instruction (not necessarily the
3657                                 // start) requires.  If > 1, a
3658                                 // compute_padding() function must be
3659                                 // provided for the instruction
3660 
3661 //----------OPERANDS-----------------------------------------------------------
3662 // Operand definitions must precede instruction definitions for correct parsing
3663 // in the ADLC because operands constitute user defined types which are used in
3664 // instruction definitions.
3665 
3666 //----------Simple Operands----------------------------------------------------
3667 
3668 // Integer operands 32 bit
3669 // 32 bit immediate
3670 operand immI()
3671 %{
3672   match(ConI);
3673 
3674   op_cost(0);
3675   format %{ %}
3676   interface(CONST_INTER);
3677 %}
3678 
3679 // 32 bit zero
3680 operand immI0()
3681 %{
3682   predicate(n->get_int() == 0);
3683   match(ConI);
3684 
3685   op_cost(0);
3686   format %{ %}
3687   interface(CONST_INTER);
3688 %}
3689 
3690 // 32 bit unit increment
3691 operand immI_1()
3692 %{
3693   predicate(n->get_int() == 1);
3694   match(ConI);
3695 
3696   op_cost(0);
3697   format %{ %}
3698   interface(CONST_INTER);
3699 %}
3700 
3701 // 32 bit unit decrement
3702 operand immI_M1()
3703 %{
3704   predicate(n->get_int() == -1);
3705   match(ConI);
3706 
3707   op_cost(0);
3708   format %{ %}
3709   interface(CONST_INTER);
3710 %}
3711 
3712 // Shift values for add/sub extension shift
3713 operand immIExt()
3714 %{
3715   predicate(0 <= n->get_int() && (n->get_int() <= 4));
3716   match(ConI);
3717 
3718   op_cost(0);
3719   format %{ %}
3720   interface(CONST_INTER);
3721 %}
3722 
3723 operand immI_le_4()
3724 %{
3725   predicate(n->get_int() <= 4);
3726   match(ConI);
3727 
3728   op_cost(0);
3729   format %{ %}
3730   interface(CONST_INTER);
3731 %}
3732 
3733 operand immI_31()
3734 %{
3735   predicate(n->get_int() == 31);
3736   match(ConI);
3737 
3738   op_cost(0);
3739   format %{ %}
3740   interface(CONST_INTER);
3741 %}
3742 
3743 operand immI_8()
3744 %{
3745   predicate(n->get_int() == 8);
3746   match(ConI);
3747 
3748   op_cost(0);
3749   format %{ %}
3750   interface(CONST_INTER);
3751 %}
3752 
3753 operand immI_16()
3754 %{
3755   predicate(n->get_int() == 16);
3756   match(ConI);
3757 
3758   op_cost(0);
3759   format %{ %}
3760   interface(CONST_INTER);
3761 %}
3762 
3763 operand immI_24()
3764 %{
3765   predicate(n->get_int() == 24);
3766   match(ConI);
3767 
3768   op_cost(0);
3769   format %{ %}
3770   interface(CONST_INTER);
3771 %}
3772 
3773 operand immI_32()
3774 %{
3775   predicate(n->get_int() == 32);
3776   match(ConI);
3777 
3778   op_cost(0);
3779   format %{ %}
3780   interface(CONST_INTER);
3781 %}
3782 
3783 operand immI_48()
3784 %{
3785   predicate(n->get_int() == 48);
3786   match(ConI);
3787 
3788   op_cost(0);
3789   format %{ %}
3790   interface(CONST_INTER);
3791 %}
3792 
3793 operand immI_56()
3794 %{
3795   predicate(n->get_int() == 56);
3796   match(ConI);
3797 
3798   op_cost(0);
3799   format %{ %}
3800   interface(CONST_INTER);
3801 %}
3802 
3803 operand immI_63()
3804 %{
3805   predicate(n->get_int() == 63);
3806   match(ConI);
3807 
3808   op_cost(0);
3809   format %{ %}
3810   interface(CONST_INTER);
3811 %}
3812 
3813 operand immI_64()
3814 %{
3815   predicate(n->get_int() == 64);
3816   match(ConI);
3817 
3818   op_cost(0);
3819   format %{ %}
3820   interface(CONST_INTER);
3821 %}
3822 
3823 operand immI_255()
3824 %{
3825   predicate(n->get_int() == 255);
3826   match(ConI);
3827 
3828   op_cost(0);
3829   format %{ %}
3830   interface(CONST_INTER);
3831 %}
3832 
3833 operand immI_65535()
3834 %{
3835   predicate(n->get_int() == 65535);
3836   match(ConI);
3837 
3838   op_cost(0);
3839   format %{ %}
3840   interface(CONST_INTER);
3841 %}
3842 
3843 operand immL_255()
3844 %{
3845   predicate(n->get_long() == 255L);
3846   match(ConL);
3847 
3848   op_cost(0);
3849   format %{ %}
3850   interface(CONST_INTER);
3851 %}
3852 
3853 operand immL_65535()
3854 %{
3855   predicate(n->get_long() == 65535L);
3856   match(ConL);
3857 
3858   op_cost(0);
3859   format %{ %}
3860   interface(CONST_INTER);
3861 %}
3862 
3863 operand immL_4294967295()
3864 %{
3865   predicate(n->get_long() == 4294967295L);
3866   match(ConL);
3867 
3868   op_cost(0);
3869   format %{ %}
3870   interface(CONST_INTER);
3871 %}
3872 
3873 operand immL_bitmask()
3874 %{
3875   predicate(((n->get_long() & 0xc000000000000000l) == 0)
3876             && is_power_of_2(n->get_long() + 1));
3877   match(ConL);
3878 
3879   op_cost(0);
3880   format %{ %}
3881   interface(CONST_INTER);
3882 %}
3883 
3884 operand immI_bitmask()
3885 %{
3886   predicate(((n->get_int() & 0xc0000000) == 0)
3887             && is_power_of_2(n->get_int() + 1));
3888   match(ConI);
3889 
3890   op_cost(0);
3891   format %{ %}
3892   interface(CONST_INTER);
3893 %}
3894 
3895 // Scale values for scaled offset addressing modes (up to long but not quad)
3896 operand immIScale()
3897 %{
3898   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3899   match(ConI);
3900 
3901   op_cost(0);
3902   format %{ %}
3903   interface(CONST_INTER);
3904 %}
3905 
3906 // 26 bit signed offset -- for pc-relative branches
3907 operand immI26()
3908 %{
3909   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
3910   match(ConI);
3911 
3912   op_cost(0);
3913   format %{ %}
3914   interface(CONST_INTER);
3915 %}
3916 
3917 // 19 bit signed offset -- for pc-relative loads
3918 operand immI19()
3919 %{
3920   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
3921   match(ConI);
3922 
3923   op_cost(0);
3924   format %{ %}
3925   interface(CONST_INTER);
3926 %}
3927 
3928 // 12 bit unsigned offset -- for base plus immediate loads
3929 operand immIU12()
3930 %{
3931   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
3932   match(ConI);
3933 
3934   op_cost(0);
3935   format %{ %}
3936   interface(CONST_INTER);
3937 %}
3938 
3939 operand immLU12()
3940 %{
3941   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
3942   match(ConL);
3943 
3944   op_cost(0);
3945   format %{ %}
3946   interface(CONST_INTER);
3947 %}
3948 
3949 // Offset for scaled or unscaled immediate loads and stores
3950 operand immIOffset()
3951 %{
3952   predicate(Address::offset_ok_for_immed(n->get_int()));
3953   match(ConI);
3954 
3955   op_cost(0);
3956   format %{ %}
3957   interface(CONST_INTER);
3958 %}
3959 
3960 operand immIOffset4()
3961 %{
3962   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
3963   match(ConI);
3964 
3965   op_cost(0);
3966   format %{ %}
3967   interface(CONST_INTER);
3968 %}
3969 
3970 operand immIOffset8()
3971 %{
3972   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
3973   match(ConI);
3974 
3975   op_cost(0);
3976   format %{ %}
3977   interface(CONST_INTER);
3978 %}
3979 
3980 operand immIOffset16()
3981 %{
3982   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
3983   match(ConI);
3984 
3985   op_cost(0);
3986   format %{ %}
3987   interface(CONST_INTER);
3988 %}
3989 
3990 operand immLoffset()
3991 %{
3992   predicate(Address::offset_ok_for_immed(n->get_long()));
3993   match(ConL);
3994 
3995   op_cost(0);
3996   format %{ %}
3997   interface(CONST_INTER);
3998 %}
3999 
4000 operand immLoffset4()
4001 %{
4002   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
4003   match(ConL);
4004 
4005   op_cost(0);
4006   format %{ %}
4007   interface(CONST_INTER);
4008 %}
4009 
4010 operand immLoffset8()
4011 %{
4012   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
4013   match(ConL);
4014 
4015   op_cost(0);
4016   format %{ %}
4017   interface(CONST_INTER);
4018 %}
4019 
4020 operand immLoffset16()
4021 %{
4022   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
4023   match(ConL);
4024 
4025   op_cost(0);
4026   format %{ %}
4027   interface(CONST_INTER);
4028 %}
4029 
4030 // 32 bit integer valid for add sub immediate
4031 operand immIAddSub()
4032 %{
4033   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4034   match(ConI);
4035   op_cost(0);
4036   format %{ %}
4037   interface(CONST_INTER);
4038 %}
4039 
4040 // 32 bit unsigned integer valid for logical immediate
4041 // TODO -- check this is right when e.g the mask is 0x80000000
4042 operand immILog()
4043 %{
4044   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4045   match(ConI);
4046 
4047   op_cost(0);
4048   format %{ %}
4049   interface(CONST_INTER);
4050 %}
4051 
4052 // Integer operands 64 bit
4053 // 64 bit immediate
4054 operand immL()
4055 %{
4056   match(ConL);
4057 
4058   op_cost(0);
4059   format %{ %}
4060   interface(CONST_INTER);
4061 %}
4062 
4063 // 64 bit zero
4064 operand immL0()
4065 %{
4066   predicate(n->get_long() == 0);
4067   match(ConL);
4068 
4069   op_cost(0);
4070   format %{ %}
4071   interface(CONST_INTER);
4072 %}
4073 
4074 // 64 bit unit increment
4075 operand immL_1()
4076 %{
4077   predicate(n->get_long() == 1);
4078   match(ConL);
4079 
4080   op_cost(0);
4081   format %{ %}
4082   interface(CONST_INTER);
4083 %}
4084 
4085 // 64 bit unit decrement
4086 operand immL_M1()
4087 %{
4088   predicate(n->get_long() == -1);
4089   match(ConL);
4090 
4091   op_cost(0);
4092   format %{ %}
4093   interface(CONST_INTER);
4094 %}
4095 
4096 // 32 bit offset of pc in thread anchor
4097 
4098 operand immL_pc_off()
4099 %{
4100   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4101                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4102   match(ConL);
4103 
4104   op_cost(0);
4105   format %{ %}
4106   interface(CONST_INTER);
4107 %}
4108 
4109 // 64 bit integer valid for add sub immediate
4110 operand immLAddSub()
4111 %{
4112   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4113   match(ConL);
4114   op_cost(0);
4115   format %{ %}
4116   interface(CONST_INTER);
4117 %}
4118 
4119 // 64 bit integer valid for logical immediate
4120 operand immLLog()
4121 %{
4122   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4123   match(ConL);
4124   op_cost(0);
4125   format %{ %}
4126   interface(CONST_INTER);
4127 %}
4128 
4129 // Long Immediate: low 32-bit mask
4130 operand immL_32bits()
4131 %{
4132   predicate(n->get_long() == 0xFFFFFFFFL);
4133   match(ConL);
4134   op_cost(0);
4135   format %{ %}
4136   interface(CONST_INTER);
4137 %}
4138 
4139 // Pointer operands
4140 // Pointer Immediate
4141 operand immP()
4142 %{
4143   match(ConP);
4144 
4145   op_cost(0);
4146   format %{ %}
4147   interface(CONST_INTER);
4148 %}
4149 
4150 // NULL Pointer Immediate
4151 operand immP0()
4152 %{
4153   predicate(n->get_ptr() == 0);
4154   match(ConP);
4155 
4156   op_cost(0);
4157   format %{ %}
4158   interface(CONST_INTER);
4159 %}
4160 
4161 // Pointer Immediate One
4162 // this is used in object initialization (initial object header)
4163 operand immP_1()
4164 %{
4165   predicate(n->get_ptr() == 1);
4166   match(ConP);
4167 
4168   op_cost(0);
4169   format %{ %}
4170   interface(CONST_INTER);
4171 %}
4172 
4173 // Polling Page Pointer Immediate
4174 operand immPollPage()
4175 %{
4176   predicate((address)n->get_ptr() == os::get_polling_page());
4177   match(ConP);
4178 
4179   op_cost(0);
4180   format %{ %}
4181   interface(CONST_INTER);
4182 %}
4183 
4184 // Card Table Byte Map Base
4185 operand immByteMapBase()
4186 %{
4187   // Get base of card map
4188   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
4189             (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
4190   match(ConP);
4191 
4192   op_cost(0);
4193   format %{ %}
4194   interface(CONST_INTER);
4195 %}
4196 
4197 // Pointer Immediate Minus One
4198 // this is used when we want to write the current PC to the thread anchor
4199 operand immP_M1()
4200 %{
4201   predicate(n->get_ptr() == -1);
4202   match(ConP);
4203 
4204   op_cost(0);
4205   format %{ %}
4206   interface(CONST_INTER);
4207 %}
4208 
4209 // Pointer Immediate Minus Two
4210 // this is used when we want to write the current PC to the thread anchor
4211 operand immP_M2()
4212 %{
4213   predicate(n->get_ptr() == -2);
4214   match(ConP);
4215 
4216   op_cost(0);
4217   format %{ %}
4218   interface(CONST_INTER);
4219 %}
4220 
4221 // Float and Double operands
4222 // Double Immediate
4223 operand immD()
4224 %{
4225   match(ConD);
4226   op_cost(0);
4227   format %{ %}
4228   interface(CONST_INTER);
4229 %}
4230 
4231 // Double Immediate: +0.0d
4232 operand immD0()
4233 %{
4234   predicate(jlong_cast(n->getd()) == 0);
4235   match(ConD);
4236 
4237   op_cost(0);
4238   format %{ %}
4239   interface(CONST_INTER);
4240 %}
4241 
4242 // constant 'double +0.0'.
4243 operand immDPacked()
4244 %{
4245   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4246   match(ConD);
4247   op_cost(0);
4248   format %{ %}
4249   interface(CONST_INTER);
4250 %}
4251 
4252 // Float Immediate
4253 operand immF()
4254 %{
4255   match(ConF);
4256   op_cost(0);
4257   format %{ %}
4258   interface(CONST_INTER);
4259 %}
4260 
4261 // Float Immediate: +0.0f.
4262 operand immF0()
4263 %{
4264   predicate(jint_cast(n->getf()) == 0);
4265   match(ConF);
4266 
4267   op_cost(0);
4268   format %{ %}
4269   interface(CONST_INTER);
4270 %}
4271 
4272 //
4273 operand immFPacked()
4274 %{
4275   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4276   match(ConF);
4277   op_cost(0);
4278   format %{ %}
4279   interface(CONST_INTER);
4280 %}
4281 
4282 // Narrow pointer operands
4283 // Narrow Pointer Immediate
4284 operand immN()
4285 %{
4286   match(ConN);
4287 
4288   op_cost(0);
4289   format %{ %}
4290   interface(CONST_INTER);
4291 %}
4292 
4293 // Narrow NULL Pointer Immediate
4294 operand immN0()
4295 %{
4296   predicate(n->get_narrowcon() == 0);
4297   match(ConN);
4298 
4299   op_cost(0);
4300   format %{ %}
4301   interface(CONST_INTER);
4302 %}
4303 
4304 operand immNKlass()
4305 %{
4306   match(ConNKlass);
4307 
4308   op_cost(0);
4309   format %{ %}
4310   interface(CONST_INTER);
4311 %}
4312 
4313 // Integer 32 bit Register Operands
4314 // Integer 32 bitRegister (excludes SP)
4315 operand iRegI()
4316 %{
4317   constraint(ALLOC_IN_RC(any_reg32));
4318   match(RegI);
4319   match(iRegINoSp);
4320   op_cost(0);
4321   format %{ %}
4322   interface(REG_INTER);
4323 %}
4324 
4325 // Integer 32 bit Register not Special
4326 operand iRegINoSp()
4327 %{
4328   constraint(ALLOC_IN_RC(no_special_reg32));
4329   match(RegI);
4330   op_cost(0);
4331   format %{ %}
4332   interface(REG_INTER);
4333 %}
4334 
4335 // Integer 64 bit Register Operands
4336 // Integer 64 bit Register (includes SP)
4337 operand iRegL()
4338 %{
4339   constraint(ALLOC_IN_RC(any_reg));
4340   match(RegL);
4341   match(iRegLNoSp);
4342   op_cost(0);
4343   format %{ %}
4344   interface(REG_INTER);
4345 %}
4346 
4347 // Integer 64 bit Register not Special
4348 operand iRegLNoSp()
4349 %{
4350   constraint(ALLOC_IN_RC(no_special_reg));
4351   match(RegL);
4352   match(iRegL_R0);
4353   format %{ %}
4354   interface(REG_INTER);
4355 %}
4356 
4357 // Pointer Register Operands
4358 // Pointer Register
4359 operand iRegP()
4360 %{
4361   constraint(ALLOC_IN_RC(ptr_reg));
4362   match(RegP);
4363   match(iRegPNoSp);
4364   match(iRegP_R0);
4365   //match(iRegP_R2);
4366   //match(iRegP_R4);
4367   //match(iRegP_R5);
4368   match(thread_RegP);
4369   op_cost(0);
4370   format %{ %}
4371   interface(REG_INTER);
4372 %}
4373 
4374 // Pointer 64 bit Register not Special
4375 operand iRegPNoSp()
4376 %{
4377   constraint(ALLOC_IN_RC(no_special_ptr_reg));
4378   match(RegP);
4379   // match(iRegP);
4380   // match(iRegP_R0);
4381   // match(iRegP_R2);
4382   // match(iRegP_R4);
4383   // match(iRegP_R5);
4384   // match(thread_RegP);
4385   op_cost(0);
4386   format %{ %}
4387   interface(REG_INTER);
4388 %}
4389 
4390 // Pointer 64 bit Register R0 only
4391 operand iRegP_R0()
4392 %{
4393   constraint(ALLOC_IN_RC(r0_reg));
4394   match(RegP);
4395   // match(iRegP);
4396   match(iRegPNoSp);
4397   op_cost(0);
4398   format %{ %}
4399   interface(REG_INTER);
4400 %}
4401 
4402 // Pointer 64 bit Register R1 only
4403 operand iRegP_R1()
4404 %{
4405   constraint(ALLOC_IN_RC(r1_reg));
4406   match(RegP);
4407   // match(iRegP);
4408   match(iRegPNoSp);
4409   op_cost(0);
4410   format %{ %}
4411   interface(REG_INTER);
4412 %}
4413 
4414 // Pointer 64 bit Register R2 only
4415 operand iRegP_R2()
4416 %{
4417   constraint(ALLOC_IN_RC(r2_reg));
4418   match(RegP);
4419   // match(iRegP);
4420   match(iRegPNoSp);
4421   op_cost(0);
4422   format %{ %}
4423   interface(REG_INTER);
4424 %}
4425 
4426 // Pointer 64 bit Register R3 only
4427 operand iRegP_R3()
4428 %{
4429   constraint(ALLOC_IN_RC(r3_reg));
4430   match(RegP);
4431   // match(iRegP);
4432   match(iRegPNoSp);
4433   op_cost(0);
4434   format %{ %}
4435   interface(REG_INTER);
4436 %}
4437 
4438 // Pointer 64 bit Register R4 only
4439 operand iRegP_R4()
4440 %{
4441   constraint(ALLOC_IN_RC(r4_reg));
4442   match(RegP);
4443   // match(iRegP);
4444   match(iRegPNoSp);
4445   op_cost(0);
4446   format %{ %}
4447   interface(REG_INTER);
4448 %}
4449 
4450 // Pointer 64 bit Register R5 only
4451 operand iRegP_R5()
4452 %{
4453   constraint(ALLOC_IN_RC(r5_reg));
4454   match(RegP);
4455   // match(iRegP);
4456   match(iRegPNoSp);
4457   op_cost(0);
4458   format %{ %}
4459   interface(REG_INTER);
4460 %}
4461 
4462 // Pointer 64 bit Register R10 only
4463 operand iRegP_R10()
4464 %{
4465   constraint(ALLOC_IN_RC(r10_reg));
4466   match(RegP);
4467   // match(iRegP);
4468   match(iRegPNoSp);
4469   op_cost(0);
4470   format %{ %}
4471   interface(REG_INTER);
4472 %}
4473 
4474 // Long 64 bit Register R0 only
4475 operand iRegL_R0()
4476 %{
4477   constraint(ALLOC_IN_RC(r0_reg));
4478   match(RegL);
4479   match(iRegLNoSp);
4480   op_cost(0);
4481   format %{ %}
4482   interface(REG_INTER);
4483 %}
4484 
4485 // Long 64 bit Register R2 only
4486 operand iRegL_R2()
4487 %{
4488   constraint(ALLOC_IN_RC(r2_reg));
4489   match(RegL);
4490   match(iRegLNoSp);
4491   op_cost(0);
4492   format %{ %}
4493   interface(REG_INTER);
4494 %}
4495 
4496 // Long 64 bit Register R3 only
4497 operand iRegL_R3()
4498 %{
4499   constraint(ALLOC_IN_RC(r3_reg));
4500   match(RegL);
4501   match(iRegLNoSp);
4502   op_cost(0);
4503   format %{ %}
4504   interface(REG_INTER);
4505 %}
4506 
4507 // Long 64 bit Register R11 only
4508 operand iRegL_R11()
4509 %{
4510   constraint(ALLOC_IN_RC(r11_reg));
4511   match(RegL);
4512   match(iRegLNoSp);
4513   op_cost(0);
4514   format %{ %}
4515   interface(REG_INTER);
4516 %}
4517 
4518 // Pointer 64 bit Register FP only
4519 operand iRegP_FP()
4520 %{
4521   constraint(ALLOC_IN_RC(fp_reg));
4522   match(RegP);
4523   // match(iRegP);
4524   op_cost(0);
4525   format %{ %}
4526   interface(REG_INTER);
4527 %}
4528 
4529 // Register R0 only
4530 operand iRegI_R0()
4531 %{
4532   constraint(ALLOC_IN_RC(int_r0_reg));
4533   match(RegI);
4534   match(iRegINoSp);
4535   op_cost(0);
4536   format %{ %}
4537   interface(REG_INTER);
4538 %}
4539 
4540 // Register R2 only
4541 operand iRegI_R2()
4542 %{
4543   constraint(ALLOC_IN_RC(int_r2_reg));
4544   match(RegI);
4545   match(iRegINoSp);
4546   op_cost(0);
4547   format %{ %}
4548   interface(REG_INTER);
4549 %}
4550 
4551 // Register R3 only
4552 operand iRegI_R3()
4553 %{
4554   constraint(ALLOC_IN_RC(int_r3_reg));
4555   match(RegI);
4556   match(iRegINoSp);
4557   op_cost(0);
4558   format %{ %}
4559   interface(REG_INTER);
4560 %}
4561 
4562 
4563 // Register R4 only
4564 operand iRegI_R4()
4565 %{
4566   constraint(ALLOC_IN_RC(int_r4_reg));
4567   match(RegI);
4568   match(iRegINoSp);
4569   op_cost(0);
4570   format %{ %}
4571   interface(REG_INTER);
4572 %}
4573 
4574 
4575 // Pointer Register Operands
4576 // Narrow Pointer Register
4577 operand iRegN()
4578 %{
4579   constraint(ALLOC_IN_RC(any_reg32));
4580   match(RegN);
4581   match(iRegNNoSp);
4582   op_cost(0);
4583   format %{ %}
4584   interface(REG_INTER);
4585 %}
4586 
4587 operand iRegN_R0()
4588 %{
4589   constraint(ALLOC_IN_RC(r0_reg));
4590   match(iRegN);
4591   op_cost(0);
4592   format %{ %}
4593   interface(REG_INTER);
4594 %}
4595 
4596 operand iRegN_R2()
4597 %{
4598   constraint(ALLOC_IN_RC(r2_reg));
4599   match(iRegN);
4600   op_cost(0);
4601   format %{ %}
4602   interface(REG_INTER);
4603 %}
4604 
4605 operand iRegN_R3()
4606 %{
4607   constraint(ALLOC_IN_RC(r3_reg));
4608   match(iRegN);
4609   op_cost(0);
4610   format %{ %}
4611   interface(REG_INTER);
4612 %}
4613 
4614 // Integer 64 bit Register not Special
4615 operand iRegNNoSp()
4616 %{
4617   constraint(ALLOC_IN_RC(no_special_reg32));
4618   match(RegN);
4619   op_cost(0);
4620   format %{ %}
4621   interface(REG_INTER);
4622 %}
4623 
4624 // heap base register -- used for encoding immN0
4625 
4626 operand iRegIHeapbase()
4627 %{
4628   constraint(ALLOC_IN_RC(heapbase_reg));
4629   match(RegI);
4630   op_cost(0);
4631   format %{ %}
4632   interface(REG_INTER);
4633 %}
4634 
4635 // Float Register
4636 // Float register operands
4637 operand vRegF()
4638 %{
4639   constraint(ALLOC_IN_RC(float_reg));
4640   match(RegF);
4641 
4642   op_cost(0);
4643   format %{ %}
4644   interface(REG_INTER);
4645 %}
4646 
4647 // Double Register
4648 // Double register operands
4649 operand vRegD()
4650 %{
4651   constraint(ALLOC_IN_RC(double_reg));
4652   match(RegD);
4653 
4654   op_cost(0);
4655   format %{ %}
4656   interface(REG_INTER);
4657 %}
4658 
4659 operand vecD()
4660 %{
4661   constraint(ALLOC_IN_RC(vectord_reg));
4662   match(VecD);
4663 
4664   op_cost(0);
4665   format %{ %}
4666   interface(REG_INTER);
4667 %}
4668 
4669 operand vecX()
4670 %{
4671   constraint(ALLOC_IN_RC(vectorx_reg));
4672   match(VecX);
4673 
4674   op_cost(0);
4675   format %{ %}
4676   interface(REG_INTER);
4677 %}
4678 
4679 operand vRegD_V0()
4680 %{
4681   constraint(ALLOC_IN_RC(v0_reg));
4682   match(RegD);
4683   op_cost(0);
4684   format %{ %}
4685   interface(REG_INTER);
4686 %}
4687 
4688 operand vRegD_V1()
4689 %{
4690   constraint(ALLOC_IN_RC(v1_reg));
4691   match(RegD);
4692   op_cost(0);
4693   format %{ %}
4694   interface(REG_INTER);
4695 %}
4696 
4697 operand vRegD_V2()
4698 %{
4699   constraint(ALLOC_IN_RC(v2_reg));
4700   match(RegD);
4701   op_cost(0);
4702   format %{ %}
4703   interface(REG_INTER);
4704 %}
4705 
4706 operand vRegD_V3()
4707 %{
4708   constraint(ALLOC_IN_RC(v3_reg));
4709   match(RegD);
4710   op_cost(0);
4711   format %{ %}
4712   interface(REG_INTER);
4713 %}
4714 
4715 // Flags register, used as output of signed compare instructions
4716 
4717 // note that on AArch64 we also use this register as the output for
4718 // for floating point compare instructions (CmpF CmpD). this ensures
4719 // that ordered inequality tests use GT, GE, LT or LE none of which
4720 // pass through cases where the result is unordered i.e. one or both
4721 // inputs to the compare is a NaN. this means that the ideal code can
4722 // replace e.g. a GT with an LE and not end up capturing the NaN case
4723 // (where the comparison should always fail). EQ and NE tests are
4724 // always generated in ideal code so that unordered folds into the NE
4725 // case, matching the behaviour of AArch64 NE.
4726 //
4727 // This differs from x86 where the outputs of FP compares use a
4728 // special FP flags registers and where compares based on this
4729 // register are distinguished into ordered inequalities (cmpOpUCF) and
4730 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
4731 // to explicitly handle the unordered case in branches. x86 also has
4732 // to include extra CMoveX rules to accept a cmpOpUCF input.
4733 
4734 operand rFlagsReg()
4735 %{
4736   constraint(ALLOC_IN_RC(int_flags));
4737   match(RegFlags);
4738 
4739   op_cost(0);
4740   format %{ "RFLAGS" %}
4741   interface(REG_INTER);
4742 %}
4743 
4744 // Flags register, used as output of unsigned compare instructions
4745 operand rFlagsRegU()
4746 %{
4747   constraint(ALLOC_IN_RC(int_flags));
4748   match(RegFlags);
4749 
4750   op_cost(0);
4751   format %{ "RFLAGSU" %}
4752   interface(REG_INTER);
4753 %}
4754 
4755 // Special Registers
4756 
4757 // Method Register
4758 operand inline_cache_RegP(iRegP reg)
4759 %{
4760   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
4761   match(reg);
4762   match(iRegPNoSp);
4763   op_cost(0);
4764   format %{ %}
4765   interface(REG_INTER);
4766 %}
4767 
4768 operand interpreter_method_oop_RegP(iRegP reg)
4769 %{
4770   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
4771   match(reg);
4772   match(iRegPNoSp);
4773   op_cost(0);
4774   format %{ %}
4775   interface(REG_INTER);
4776 %}
4777 
4778 // Thread Register
4779 operand thread_RegP(iRegP reg)
4780 %{
4781   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
4782   match(reg);
4783   op_cost(0);
4784   format %{ %}
4785   interface(REG_INTER);
4786 %}
4787 
4788 operand lr_RegP(iRegP reg)
4789 %{
4790   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
4791   match(reg);
4792   op_cost(0);
4793   format %{ %}
4794   interface(REG_INTER);
4795 %}
4796 
4797 //----------Memory Operands----------------------------------------------------
4798 
4799 operand indirect(iRegP reg)
4800 %{
4801   constraint(ALLOC_IN_RC(ptr_reg));
4802   match(reg);
4803   op_cost(0);
4804   format %{ "[$reg]" %}
4805   interface(MEMORY_INTER) %{
4806     base($reg);
4807     index(0xffffffff);
4808     scale(0x0);
4809     disp(0x0);
4810   %}
4811 %}
4812 
4813 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
4814 %{
4815   constraint(ALLOC_IN_RC(ptr_reg));
4816   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
4817   match(AddP reg (LShiftL (ConvI2L ireg) scale));
4818   op_cost(0);
4819   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
4820   interface(MEMORY_INTER) %{
4821     base($reg);
4822     index($ireg);
4823     scale($scale);
4824     disp(0x0);
4825   %}
4826 %}
4827 
4828 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
4829 %{
4830   constraint(ALLOC_IN_RC(ptr_reg));
4831   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
4832   match(AddP reg (LShiftL lreg scale));
4833   op_cost(0);
4834   format %{ "$reg, $lreg lsl($scale)" %}
4835   interface(MEMORY_INTER) %{
4836     base($reg);
4837     index($lreg);
4838     scale($scale);
4839     disp(0x0);
4840   %}
4841 %}
4842 
4843 operand indIndexI2L(iRegP reg, iRegI ireg)
4844 %{
4845   constraint(ALLOC_IN_RC(ptr_reg));
4846   match(AddP reg (ConvI2L ireg));
4847   op_cost(0);
4848   format %{ "$reg, $ireg, 0, I2L" %}
4849   interface(MEMORY_INTER) %{
4850     base($reg);
4851     index($ireg);
4852     scale(0x0);
4853     disp(0x0);
4854   %}
4855 %}
4856 
4857 operand indIndex(iRegP reg, iRegL lreg)
4858 %{
4859   constraint(ALLOC_IN_RC(ptr_reg));
4860   match(AddP reg lreg);
4861   op_cost(0);
4862   format %{ "$reg, $lreg" %}
4863   interface(MEMORY_INTER) %{
4864     base($reg);
4865     index($lreg);
4866     scale(0x0);
4867     disp(0x0);
4868   %}
4869 %}
4870 
4871 operand indOffI(iRegP reg, immIOffset off)
4872 %{
4873   constraint(ALLOC_IN_RC(ptr_reg));
4874   match(AddP reg off);
4875   op_cost(0);
4876   format %{ "[$reg, $off]" %}
4877   interface(MEMORY_INTER) %{
4878     base($reg);
4879     index(0xffffffff);
4880     scale(0x0);
4881     disp($off);
4882   %}
4883 %}
4884 
4885 operand indOffI4(iRegP reg, immIOffset4 off)
4886 %{
4887   constraint(ALLOC_IN_RC(ptr_reg));
4888   match(AddP reg off);
4889   op_cost(0);
4890   format %{ "[$reg, $off]" %}
4891   interface(MEMORY_INTER) %{
4892     base($reg);
4893     index(0xffffffff);
4894     scale(0x0);
4895     disp($off);
4896   %}
4897 %}
4898 
4899 operand indOffI8(iRegP reg, immIOffset8 off)
4900 %{
4901   constraint(ALLOC_IN_RC(ptr_reg));
4902   match(AddP reg off);
4903   op_cost(0);
4904   format %{ "[$reg, $off]" %}
4905   interface(MEMORY_INTER) %{
4906     base($reg);
4907     index(0xffffffff);
4908     scale(0x0);
4909     disp($off);
4910   %}
4911 %}
4912 
4913 operand indOffI16(iRegP reg, immIOffset16 off)
4914 %{
4915   constraint(ALLOC_IN_RC(ptr_reg));
4916   match(AddP reg off);
4917   op_cost(0);
4918   format %{ "[$reg, $off]" %}
4919   interface(MEMORY_INTER) %{
4920     base($reg);
4921     index(0xffffffff);
4922     scale(0x0);
4923     disp($off);
4924   %}
4925 %}
4926 
4927 operand indOffL(iRegP reg, immLoffset off)
4928 %{
4929   constraint(ALLOC_IN_RC(ptr_reg));
4930   match(AddP reg off);
4931   op_cost(0);
4932   format %{ "[$reg, $off]" %}
4933   interface(MEMORY_INTER) %{
4934     base($reg);
4935     index(0xffffffff);
4936     scale(0x0);
4937     disp($off);
4938   %}
4939 %}
4940 
4941 operand indOffL4(iRegP reg, immLoffset4 off)
4942 %{
4943   constraint(ALLOC_IN_RC(ptr_reg));
4944   match(AddP reg off);
4945   op_cost(0);
4946   format %{ "[$reg, $off]" %}
4947   interface(MEMORY_INTER) %{
4948     base($reg);
4949     index(0xffffffff);
4950     scale(0x0);
4951     disp($off);
4952   %}
4953 %}
4954 
4955 operand indOffL8(iRegP reg, immLoffset8 off)
4956 %{
4957   constraint(ALLOC_IN_RC(ptr_reg));
4958   match(AddP reg off);
4959   op_cost(0);
4960   format %{ "[$reg, $off]" %}
4961   interface(MEMORY_INTER) %{
4962     base($reg);
4963     index(0xffffffff);
4964     scale(0x0);
4965     disp($off);
4966   %}
4967 %}
4968 
4969 operand indOffL16(iRegP reg, immLoffset16 off)
4970 %{
4971   constraint(ALLOC_IN_RC(ptr_reg));
4972   match(AddP reg off);
4973   op_cost(0);
4974   format %{ "[$reg, $off]" %}
4975   interface(MEMORY_INTER) %{
4976     base($reg);
4977     index(0xffffffff);
4978     scale(0x0);
4979     disp($off);
4980   %}
4981 %}
4982 
4983 operand indirectN(iRegN reg)
4984 %{
4985   predicate(Universe::narrow_oop_shift() == 0);
4986   constraint(ALLOC_IN_RC(ptr_reg));
4987   match(DecodeN reg);
4988   op_cost(0);
4989   format %{ "[$reg]\t# narrow" %}
4990   interface(MEMORY_INTER) %{
4991     base($reg);
4992     index(0xffffffff);
4993     scale(0x0);
4994     disp(0x0);
4995   %}
4996 %}
4997 
4998 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
4999 %{
5000   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5001   constraint(ALLOC_IN_RC(ptr_reg));
5002   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
5003   op_cost(0);
5004   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5005   interface(MEMORY_INTER) %{
5006     base($reg);
5007     index($ireg);
5008     scale($scale);
5009     disp(0x0);
5010   %}
5011 %}
5012 
5013 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5014 %{
5015   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5016   constraint(ALLOC_IN_RC(ptr_reg));
5017   match(AddP (DecodeN reg) (LShiftL lreg scale));
5018   op_cost(0);
5019   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5020   interface(MEMORY_INTER) %{
5021     base($reg);
5022     index($lreg);
5023     scale($scale);
5024     disp(0x0);
5025   %}
5026 %}
5027 
5028 operand indIndexI2LN(iRegN reg, iRegI ireg)
5029 %{
5030   predicate(Universe::narrow_oop_shift() == 0);
5031   constraint(ALLOC_IN_RC(ptr_reg));
5032   match(AddP (DecodeN reg) (ConvI2L ireg));
5033   op_cost(0);
5034   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
5035   interface(MEMORY_INTER) %{
5036     base($reg);
5037     index($ireg);
5038     scale(0x0);
5039     disp(0x0);
5040   %}
5041 %}
5042 
5043 operand indIndexN(iRegN reg, iRegL lreg)
5044 %{
5045   predicate(Universe::narrow_oop_shift() == 0);
5046   constraint(ALLOC_IN_RC(ptr_reg));
5047   match(AddP (DecodeN reg) lreg);
5048   op_cost(0);
5049   format %{ "$reg, $lreg\t# narrow" %}
5050   interface(MEMORY_INTER) %{
5051     base($reg);
5052     index($lreg);
5053     scale(0x0);
5054     disp(0x0);
5055   %}
5056 %}
5057 
5058 operand indOffIN(iRegN reg, immIOffset off)
5059 %{
5060   predicate(Universe::narrow_oop_shift() == 0);
5061   constraint(ALLOC_IN_RC(ptr_reg));
5062   match(AddP (DecodeN reg) off);
5063   op_cost(0);
5064   format %{ "[$reg, $off]\t# narrow" %}
5065   interface(MEMORY_INTER) %{
5066     base($reg);
5067     index(0xffffffff);
5068     scale(0x0);
5069     disp($off);
5070   %}
5071 %}
5072 
5073 operand indOffLN(iRegN reg, immLoffset off)
5074 %{
5075   predicate(Universe::narrow_oop_shift() == 0);
5076   constraint(ALLOC_IN_RC(ptr_reg));
5077   match(AddP (DecodeN reg) off);
5078   op_cost(0);
5079   format %{ "[$reg, $off]\t# narrow" %}
5080   interface(MEMORY_INTER) %{
5081     base($reg);
5082     index(0xffffffff);
5083     scale(0x0);
5084     disp($off);
5085   %}
5086 %}
5087 
5088 
5089 
5090 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5091 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5092 %{
5093   constraint(ALLOC_IN_RC(ptr_reg));
5094   match(AddP reg off);
5095   op_cost(0);
5096   format %{ "[$reg, $off]" %}
5097   interface(MEMORY_INTER) %{
5098     base($reg);
5099     index(0xffffffff);
5100     scale(0x0);
5101     disp($off);
5102   %}
5103 %}
5104 
5105 //----------Special Memory Operands--------------------------------------------
5106 // Stack Slot Operand - This operand is used for loading and storing temporary
5107 //                      values on the stack where a match requires a value to
5108 //                      flow through memory.
5109 operand stackSlotP(sRegP reg)
5110 %{
5111   constraint(ALLOC_IN_RC(stack_slots));
5112   op_cost(100);
5113   // No match rule because this operand is only generated in matching
5114   // match(RegP);
5115   format %{ "[$reg]" %}
5116   interface(MEMORY_INTER) %{
5117     base(0x1e);  // RSP
5118     index(0x0);  // No Index
5119     scale(0x0);  // No Scale
5120     disp($reg);  // Stack Offset
5121   %}
5122 %}
5123 
5124 operand stackSlotI(sRegI reg)
5125 %{
5126   constraint(ALLOC_IN_RC(stack_slots));
5127   // No match rule because this operand is only generated in matching
5128   // match(RegI);
5129   format %{ "[$reg]" %}
5130   interface(MEMORY_INTER) %{
5131     base(0x1e);  // RSP
5132     index(0x0);  // No Index
5133     scale(0x0);  // No Scale
5134     disp($reg);  // Stack Offset
5135   %}
5136 %}
5137 
5138 operand stackSlotF(sRegF reg)
5139 %{
5140   constraint(ALLOC_IN_RC(stack_slots));
5141   // No match rule because this operand is only generated in matching
5142   // match(RegF);
5143   format %{ "[$reg]" %}
5144   interface(MEMORY_INTER) %{
5145     base(0x1e);  // RSP
5146     index(0x0);  // No Index
5147     scale(0x0);  // No Scale
5148     disp($reg);  // Stack Offset
5149   %}
5150 %}
5151 
5152 operand stackSlotD(sRegD reg)
5153 %{
5154   constraint(ALLOC_IN_RC(stack_slots));
5155   // No match rule because this operand is only generated in matching
5156   // match(RegD);
5157   format %{ "[$reg]" %}
5158   interface(MEMORY_INTER) %{
5159     base(0x1e);  // RSP
5160     index(0x0);  // No Index
5161     scale(0x0);  // No Scale
5162     disp($reg);  // Stack Offset
5163   %}
5164 %}
5165 
5166 operand stackSlotL(sRegL reg)
5167 %{
5168   constraint(ALLOC_IN_RC(stack_slots));
5169   // No match rule because this operand is only generated in matching
5170   // match(RegL);
5171   format %{ "[$reg]" %}
5172   interface(MEMORY_INTER) %{
5173     base(0x1e);  // RSP
5174     index(0x0);  // No Index
5175     scale(0x0);  // No Scale
5176     disp($reg);  // Stack Offset
5177   %}
5178 %}
5179 
5180 // Operands for expressing Control Flow
5181 // NOTE: Label is a predefined operand which should not be redefined in
5182 //       the AD file. It is generically handled within the ADLC.
5183 
5184 //----------Conditional Branch Operands----------------------------------------
5185 // Comparison Op  - This is the operation of the comparison, and is limited to
5186 //                  the following set of codes:
5187 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5188 //
5189 // Other attributes of the comparison, such as unsignedness, are specified
5190 // by the comparison instruction that sets a condition code flags register.
5191 // That result is represented by a flags operand whose subtype is appropriate
5192 // to the unsignedness (etc.) of the comparison.
5193 //
5194 // Later, the instruction which matches both the Comparison Op (a Bool) and
5195 // the flags (produced by the Cmp) specifies the coding of the comparison op
5196 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5197 
5198 // used for signed integral comparisons and fp comparisons
5199 
5200 operand cmpOp()
5201 %{
5202   match(Bool);
5203 
5204   format %{ "" %}
5205   interface(COND_INTER) %{
5206     equal(0x0, "eq");
5207     not_equal(0x1, "ne");
5208     less(0xb, "lt");
5209     greater_equal(0xa, "ge");
5210     less_equal(0xd, "le");
5211     greater(0xc, "gt");
5212     overflow(0x6, "vs");
5213     no_overflow(0x7, "vc");
5214   %}
5215 %}
5216 
5217 // used for unsigned integral comparisons
5218 
5219 operand cmpOpU()
5220 %{
5221   match(Bool);
5222 
5223   format %{ "" %}
5224   interface(COND_INTER) %{
5225     equal(0x0, "eq");
5226     not_equal(0x1, "ne");
5227     less(0x3, "lo");
5228     greater_equal(0x2, "hs");
5229     less_equal(0x9, "ls");
5230     greater(0x8, "hi");
5231     overflow(0x6, "vs");
5232     no_overflow(0x7, "vc");
5233   %}
5234 %}
5235 
5236 // used for certain integral comparisons which can be
5237 // converted to cbxx or tbxx instructions
5238 
5239 operand cmpOpEqNe()
5240 %{
5241   match(Bool);
5242   match(CmpOp);
5243   op_cost(0);
5244   predicate(n->as_Bool()->_test._test == BoolTest::ne
5245             || n->as_Bool()->_test._test == BoolTest::eq);
5246 
5247   format %{ "" %}
5248   interface(COND_INTER) %{
5249     equal(0x0, "eq");
5250     not_equal(0x1, "ne");
5251     less(0xb, "lt");
5252     greater_equal(0xa, "ge");
5253     less_equal(0xd, "le");
5254     greater(0xc, "gt");
5255     overflow(0x6, "vs");
5256     no_overflow(0x7, "vc");
5257   %}
5258 %}
5259 
5260 // used for certain integral comparisons which can be
5261 // converted to cbxx or tbxx instructions
5262 
5263 operand cmpOpLtGe()
5264 %{
5265   match(Bool);
5266   match(CmpOp);
5267   op_cost(0);
5268 
5269   predicate(n->as_Bool()->_test._test == BoolTest::lt
5270             || n->as_Bool()->_test._test == BoolTest::ge);
5271 
5272   format %{ "" %}
5273   interface(COND_INTER) %{
5274     equal(0x0, "eq");
5275     not_equal(0x1, "ne");
5276     less(0xb, "lt");
5277     greater_equal(0xa, "ge");
5278     less_equal(0xd, "le");
5279     greater(0xc, "gt");
5280     overflow(0x6, "vs");
5281     no_overflow(0x7, "vc");
5282   %}
5283 %}
5284 
5285 // used for certain unsigned integral comparisons which can be
5286 // converted to cbxx or tbxx instructions
5287 
5288 operand cmpOpUEqNeLtGe()
5289 %{
5290   match(Bool);
5291   match(CmpOp);
5292   op_cost(0);
5293 
5294   predicate(n->as_Bool()->_test._test == BoolTest::eq
5295             || n->as_Bool()->_test._test == BoolTest::ne
5296             || n->as_Bool()->_test._test == BoolTest::lt
5297             || n->as_Bool()->_test._test == BoolTest::ge);
5298 
5299   format %{ "" %}
5300   interface(COND_INTER) %{
5301     equal(0x0, "eq");
5302     not_equal(0x1, "ne");
5303     less(0xb, "lt");
5304     greater_equal(0xa, "ge");
5305     less_equal(0xd, "le");
5306     greater(0xc, "gt");
5307     overflow(0x6, "vs");
5308     no_overflow(0x7, "vc");
5309   %}
5310 %}
5311 
5312 // Special operand allowing long args to int ops to be truncated for free
5313 
5314 operand iRegL2I(iRegL reg) %{
5315 
5316   op_cost(0);
5317 
5318   match(ConvL2I reg);
5319 
5320   format %{ "l2i($reg)" %}
5321 
5322   interface(REG_INTER)
5323 %}
5324 
5325 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
5326 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
5327 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
5328 
5329 //----------OPERAND CLASSES----------------------------------------------------
5330 // Operand Classes are groups of operands that are used as to simplify
5331 // instruction definitions by not requiring the AD writer to specify
5332 // separate instructions for every form of operand when the
5333 // instruction accepts multiple operand types with the same basic
5334 // encoding and format. The classic case of this is memory operands.
5335 
5336 // memory is used to define read/write location for load/store
5337 // instruction defs. we can turn a memory op into an Address
5338 
5339 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
5340                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
5341 
5342 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5343 // operations. it allows the src to be either an iRegI or a (ConvL2I
5344 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5345 // can be elided because the 32-bit instruction will just employ the
5346 // lower 32 bits anyway.
5347 //
5348 // n.b. this does not elide all L2I conversions. if the truncated
5349 // value is consumed by more than one operation then the ConvL2I
5350 // cannot be bundled into the consuming nodes so an l2i gets planted
5351 // (actually a movw $dst $src) and the downstream instructions consume
5352 // the result of the l2i as an iRegI input. That's a shame since the
5353 // movw is actually redundant but its not too costly.
5354 
5355 opclass iRegIorL2I(iRegI, iRegL2I);
5356 
5357 //----------PIPELINE-----------------------------------------------------------
5358 // Rules which define the behavior of the target architectures pipeline.
5359 
5360 // For specific pipelines, eg A53, define the stages of that pipeline
5361 //pipe_desc(ISS, EX1, EX2, WR);
5362 #define ISS S0
5363 #define EX1 S1
5364 #define EX2 S2
5365 #define WR  S3
5366 
5367 // Integer ALU reg operation
5368 pipeline %{
5369 
5370 attributes %{
5371   // ARM instructions are of fixed length
5372   fixed_size_instructions;        // Fixed size instructions TODO does
5373   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5374   // ARM instructions come in 32-bit word units
5375   instruction_unit_size = 4;         // An instruction is 4 bytes long
5376   instruction_fetch_unit_size = 64;  // The processor fetches one line
5377   instruction_fetch_units = 1;       // of 64 bytes
5378 
5379   // List of nop instructions
5380   nops( MachNop );
5381 %}
5382 
5383 // We don't use an actual pipeline model so don't care about resources
5384 // or description. we do use pipeline classes to introduce fixed
5385 // latencies
5386 
5387 //----------RESOURCES----------------------------------------------------------
5388 // Resources are the functional units available to the machine
5389 
5390 resources( INS0, INS1, INS01 = INS0 | INS1,
5391            ALU0, ALU1, ALU = ALU0 | ALU1,
5392            MAC,
5393            DIV,
5394            BRANCH,
5395            LDST,
5396            NEON_FP);
5397 
5398 //----------PIPELINE DESCRIPTION-----------------------------------------------
5399 // Pipeline Description specifies the stages in the machine's pipeline
5400 
5401 // Define the pipeline as a generic 6 stage pipeline
5402 pipe_desc(S0, S1, S2, S3, S4, S5);
5403 
5404 //----------PIPELINE CLASSES---------------------------------------------------
5405 // Pipeline Classes describe the stages in which input and output are
5406 // referenced by the hardware pipeline.
5407 
5408 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
5409 %{
5410   single_instruction;
5411   src1   : S1(read);
5412   src2   : S2(read);
5413   dst    : S5(write);
5414   INS01  : ISS;
5415   NEON_FP : S5;
5416 %}
5417 
5418 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
5419 %{
5420   single_instruction;
5421   src1   : S1(read);
5422   src2   : S2(read);
5423   dst    : S5(write);
5424   INS01  : ISS;
5425   NEON_FP : S5;
5426 %}
5427 
5428 pipe_class fp_uop_s(vRegF dst, vRegF src)
5429 %{
5430   single_instruction;
5431   src    : S1(read);
5432   dst    : S5(write);
5433   INS01  : ISS;
5434   NEON_FP : S5;
5435 %}
5436 
5437 pipe_class fp_uop_d(vRegD dst, vRegD src)
5438 %{
5439   single_instruction;
5440   src    : S1(read);
5441   dst    : S5(write);
5442   INS01  : ISS;
5443   NEON_FP : S5;
5444 %}
5445 
5446 pipe_class fp_d2f(vRegF dst, vRegD src)
5447 %{
5448   single_instruction;
5449   src    : S1(read);
5450   dst    : S5(write);
5451   INS01  : ISS;
5452   NEON_FP : S5;
5453 %}
5454 
5455 pipe_class fp_f2d(vRegD dst, vRegF src)
5456 %{
5457   single_instruction;
5458   src    : S1(read);
5459   dst    : S5(write);
5460   INS01  : ISS;
5461   NEON_FP : S5;
5462 %}
5463 
5464 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
5465 %{
5466   single_instruction;
5467   src    : S1(read);
5468   dst    : S5(write);
5469   INS01  : ISS;
5470   NEON_FP : S5;
5471 %}
5472 
5473 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
5474 %{
5475   single_instruction;
5476   src    : S1(read);
5477   dst    : S5(write);
5478   INS01  : ISS;
5479   NEON_FP : S5;
5480 %}
5481 
5482 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
5483 %{
5484   single_instruction;
5485   src    : S1(read);
5486   dst    : S5(write);
5487   INS01  : ISS;
5488   NEON_FP : S5;
5489 %}
5490 
5491 pipe_class fp_l2f(vRegF dst, iRegL src)
5492 %{
5493   single_instruction;
5494   src    : S1(read);
5495   dst    : S5(write);
5496   INS01  : ISS;
5497   NEON_FP : S5;
5498 %}
5499 
5500 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
5501 %{
5502   single_instruction;
5503   src    : S1(read);
5504   dst    : S5(write);
5505   INS01  : ISS;
5506   NEON_FP : S5;
5507 %}
5508 
5509 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
5510 %{
5511   single_instruction;
5512   src    : S1(read);
5513   dst    : S5(write);
5514   INS01  : ISS;
5515   NEON_FP : S5;
5516 %}
5517 
5518 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
5519 %{
5520   single_instruction;
5521   src    : S1(read);
5522   dst    : S5(write);
5523   INS01  : ISS;
5524   NEON_FP : S5;
5525 %}
5526 
5527 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
5528 %{
5529   single_instruction;
5530   src    : S1(read);
5531   dst    : S5(write);
5532   INS01  : ISS;
5533   NEON_FP : S5;
5534 %}
5535 
5536 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
5537 %{
5538   single_instruction;
5539   src1   : S1(read);
5540   src2   : S2(read);
5541   dst    : S5(write);
5542   INS0   : ISS;
5543   NEON_FP : S5;
5544 %}
5545 
5546 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
5547 %{
5548   single_instruction;
5549   src1   : S1(read);
5550   src2   : S2(read);
5551   dst    : S5(write);
5552   INS0   : ISS;
5553   NEON_FP : S5;
5554 %}
5555 
5556 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
5557 %{
5558   single_instruction;
5559   cr     : S1(read);
5560   src1   : S1(read);
5561   src2   : S1(read);
5562   dst    : S3(write);
5563   INS01  : ISS;
5564   NEON_FP : S3;
5565 %}
5566 
5567 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
5568 %{
5569   single_instruction;
5570   cr     : S1(read);
5571   src1   : S1(read);
5572   src2   : S1(read);
5573   dst    : S3(write);
5574   INS01  : ISS;
5575   NEON_FP : S3;
5576 %}
5577 
5578 pipe_class fp_imm_s(vRegF dst)
5579 %{
5580   single_instruction;
5581   dst    : S3(write);
5582   INS01  : ISS;
5583   NEON_FP : S3;
5584 %}
5585 
5586 pipe_class fp_imm_d(vRegD dst)
5587 %{
5588   single_instruction;
5589   dst    : S3(write);
5590   INS01  : ISS;
5591   NEON_FP : S3;
5592 %}
5593 
5594 pipe_class fp_load_constant_s(vRegF dst)
5595 %{
5596   single_instruction;
5597   dst    : S4(write);
5598   INS01  : ISS;
5599   NEON_FP : S4;
5600 %}
5601 
5602 pipe_class fp_load_constant_d(vRegD dst)
5603 %{
5604   single_instruction;
5605   dst    : S4(write);
5606   INS01  : ISS;
5607   NEON_FP : S4;
5608 %}
5609 
5610 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
5611 %{
5612   single_instruction;
5613   dst    : S5(write);
5614   src1   : S1(read);
5615   src2   : S1(read);
5616   INS01  : ISS;
5617   NEON_FP : S5;
5618 %}
5619 
5620 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
5621 %{
5622   single_instruction;
5623   dst    : S5(write);
5624   src1   : S1(read);
5625   src2   : S1(read);
5626   INS0   : ISS;
5627   NEON_FP : S5;
5628 %}
5629 
5630 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
5631 %{
5632   single_instruction;
5633   dst    : S5(write);
5634   src1   : S1(read);
5635   src2   : S1(read);
5636   dst    : S1(read);
5637   INS01  : ISS;
5638   NEON_FP : S5;
5639 %}
5640 
5641 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
5642 %{
5643   single_instruction;
5644   dst    : S5(write);
5645   src1   : S1(read);
5646   src2   : S1(read);
5647   dst    : S1(read);
5648   INS0   : ISS;
5649   NEON_FP : S5;
5650 %}
5651 
5652 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
5653 %{
5654   single_instruction;
5655   dst    : S4(write);
5656   src1   : S2(read);
5657   src2   : S2(read);
5658   INS01  : ISS;
5659   NEON_FP : S4;
5660 %}
5661 
5662 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
5663 %{
5664   single_instruction;
5665   dst    : S4(write);
5666   src1   : S2(read);
5667   src2   : S2(read);
5668   INS0   : ISS;
5669   NEON_FP : S4;
5670 %}
5671 
5672 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
5673 %{
5674   single_instruction;
5675   dst    : S3(write);
5676   src1   : S2(read);
5677   src2   : S2(read);
5678   INS01  : ISS;
5679   NEON_FP : S3;
5680 %}
5681 
5682 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
5683 %{
5684   single_instruction;
5685   dst    : S3(write);
5686   src1   : S2(read);
5687   src2   : S2(read);
5688   INS0   : ISS;
5689   NEON_FP : S3;
5690 %}
5691 
5692 pipe_class vshift64(vecD dst, vecD src, vecX shift)
5693 %{
5694   single_instruction;
5695   dst    : S3(write);
5696   src    : S1(read);
5697   shift  : S1(read);
5698   INS01  : ISS;
5699   NEON_FP : S3;
5700 %}
5701 
5702 pipe_class vshift128(vecX dst, vecX src, vecX shift)
5703 %{
5704   single_instruction;
5705   dst    : S3(write);
5706   src    : S1(read);
5707   shift  : S1(read);
5708   INS0   : ISS;
5709   NEON_FP : S3;
5710 %}
5711 
5712 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
5713 %{
5714   single_instruction;
5715   dst    : S3(write);
5716   src    : S1(read);
5717   INS01  : ISS;
5718   NEON_FP : S3;
5719 %}
5720 
5721 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
5722 %{
5723   single_instruction;
5724   dst    : S3(write);
5725   src    : S1(read);
5726   INS0   : ISS;
5727   NEON_FP : S3;
5728 %}
5729 
5730 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
5731 %{
5732   single_instruction;
5733   dst    : S5(write);
5734   src1   : S1(read);
5735   src2   : S1(read);
5736   INS01  : ISS;
5737   NEON_FP : S5;
5738 %}
5739 
5740 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
5741 %{
5742   single_instruction;
5743   dst    : S5(write);
5744   src1   : S1(read);
5745   src2   : S1(read);
5746   INS0   : ISS;
5747   NEON_FP : S5;
5748 %}
5749 
5750 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
5751 %{
5752   single_instruction;
5753   dst    : S5(write);
5754   src1   : S1(read);
5755   src2   : S1(read);
5756   INS0   : ISS;
5757   NEON_FP : S5;
5758 %}
5759 
5760 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
5761 %{
5762   single_instruction;
5763   dst    : S5(write);
5764   src1   : S1(read);
5765   src2   : S1(read);
5766   INS0   : ISS;
5767   NEON_FP : S5;
5768 %}
5769 
5770 pipe_class vsqrt_fp128(vecX dst, vecX src)
5771 %{
5772   single_instruction;
5773   dst    : S5(write);
5774   src    : S1(read);
5775   INS0   : ISS;
5776   NEON_FP : S5;
5777 %}
5778 
5779 pipe_class vunop_fp64(vecD dst, vecD src)
5780 %{
5781   single_instruction;
5782   dst    : S5(write);
5783   src    : S1(read);
5784   INS01  : ISS;
5785   NEON_FP : S5;
5786 %}
5787 
5788 pipe_class vunop_fp128(vecX dst, vecX src)
5789 %{
5790   single_instruction;
5791   dst    : S5(write);
5792   src    : S1(read);
5793   INS0   : ISS;
5794   NEON_FP : S5;
5795 %}
5796 
5797 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
5798 %{
5799   single_instruction;
5800   dst    : S3(write);
5801   src    : S1(read);
5802   INS01  : ISS;
5803   NEON_FP : S3;
5804 %}
5805 
5806 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
5807 %{
5808   single_instruction;
5809   dst    : S3(write);
5810   src    : S1(read);
5811   INS01  : ISS;
5812   NEON_FP : S3;
5813 %}
5814 
5815 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
5816 %{
5817   single_instruction;
5818   dst    : S3(write);
5819   src    : S1(read);
5820   INS01  : ISS;
5821   NEON_FP : S3;
5822 %}
5823 
5824 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
5825 %{
5826   single_instruction;
5827   dst    : S3(write);
5828   src    : S1(read);
5829   INS01  : ISS;
5830   NEON_FP : S3;
5831 %}
5832 
5833 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
5834 %{
5835   single_instruction;
5836   dst    : S3(write);
5837   src    : S1(read);
5838   INS01  : ISS;
5839   NEON_FP : S3;
5840 %}
5841 
5842 pipe_class vmovi_reg_imm64(vecD dst)
5843 %{
5844   single_instruction;
5845   dst    : S3(write);
5846   INS01  : ISS;
5847   NEON_FP : S3;
5848 %}
5849 
5850 pipe_class vmovi_reg_imm128(vecX dst)
5851 %{
5852   single_instruction;
5853   dst    : S3(write);
5854   INS0   : ISS;
5855   NEON_FP : S3;
5856 %}
5857 
5858 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
5859 %{
5860   single_instruction;
5861   dst    : S5(write);
5862   mem    : ISS(read);
5863   INS01  : ISS;
5864   NEON_FP : S3;
5865 %}
5866 
5867 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
5868 %{
5869   single_instruction;
5870   dst    : S5(write);
5871   mem    : ISS(read);
5872   INS01  : ISS;
5873   NEON_FP : S3;
5874 %}
5875 
5876 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
5877 %{
5878   single_instruction;
5879   mem    : ISS(read);
5880   src    : S2(read);
5881   INS01  : ISS;
5882   NEON_FP : S3;
5883 %}
5884 
5885 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
5886 %{
5887   single_instruction;
5888   mem    : ISS(read);
5889   src    : S2(read);
5890   INS01  : ISS;
5891   NEON_FP : S3;
5892 %}
5893 
5894 //------- Integer ALU operations --------------------------
5895 
5896 // Integer ALU reg-reg operation
5897 // Operands needed in EX1, result generated in EX2
5898 // Eg.  ADD     x0, x1, x2
5899 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
5900 %{
5901   single_instruction;
5902   dst    : EX2(write);
5903   src1   : EX1(read);
5904   src2   : EX1(read);
5905   INS01  : ISS; // Dual issue as instruction 0 or 1
5906   ALU    : EX2;
5907 %}
5908 
5909 // Integer ALU reg-reg operation with constant shift
5910 // Shifted register must be available in LATE_ISS instead of EX1
5911 // Eg.  ADD     x0, x1, x2, LSL #2
5912 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
5913 %{
5914   single_instruction;
5915   dst    : EX2(write);
5916   src1   : EX1(read);
5917   src2   : ISS(read);
5918   INS01  : ISS;
5919   ALU    : EX2;
5920 %}
5921 
5922 // Integer ALU reg operation with constant shift
5923 // Eg.  LSL     x0, x1, #shift
5924 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
5925 %{
5926   single_instruction;
5927   dst    : EX2(write);
5928   src1   : ISS(read);
5929   INS01  : ISS;
5930   ALU    : EX2;
5931 %}
5932 
5933 // Integer ALU reg-reg operation with variable shift
5934 // Both operands must be available in LATE_ISS instead of EX1
5935 // Result is available in EX1 instead of EX2
5936 // Eg.  LSLV    x0, x1, x2
5937 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
5938 %{
5939   single_instruction;
5940   dst    : EX1(write);
5941   src1   : ISS(read);
5942   src2   : ISS(read);
5943   INS01  : ISS;
5944   ALU    : EX1;
5945 %}
5946 
5947 // Integer ALU reg-reg operation with extract
5948 // As for _vshift above, but result generated in EX2
5949 // Eg.  EXTR    x0, x1, x2, #N
5950 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
5951 %{
5952   single_instruction;
5953   dst    : EX2(write);
5954   src1   : ISS(read);
5955   src2   : ISS(read);
5956   INS1   : ISS; // Can only dual issue as Instruction 1
5957   ALU    : EX1;
5958 %}
5959 
5960 // Integer ALU reg operation
5961 // Eg.  NEG     x0, x1
5962 pipe_class ialu_reg(iRegI dst, iRegI src)
5963 %{
5964   single_instruction;
5965   dst    : EX2(write);
5966   src    : EX1(read);
5967   INS01  : ISS;
5968   ALU    : EX2;
5969 %}
5970 
5971 // Integer ALU reg mmediate operation
5972 // Eg.  ADD     x0, x1, #N
5973 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
5974 %{
5975   single_instruction;
5976   dst    : EX2(write);
5977   src1   : EX1(read);
5978   INS01  : ISS;
5979   ALU    : EX2;
5980 %}
5981 
5982 // Integer ALU immediate operation (no source operands)
5983 // Eg.  MOV     x0, #N
5984 pipe_class ialu_imm(iRegI dst)
5985 %{
5986   single_instruction;
5987   dst    : EX1(write);
5988   INS01  : ISS;
5989   ALU    : EX1;
5990 %}
5991 
5992 //------- Compare operation -------------------------------
5993 
5994 // Compare reg-reg
5995 // Eg.  CMP     x0, x1
5996 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
5997 %{
5998   single_instruction;
5999 //  fixed_latency(16);
6000   cr     : EX2(write);
6001   op1    : EX1(read);
6002   op2    : EX1(read);
6003   INS01  : ISS;
6004   ALU    : EX2;
6005 %}
6006 
6007 // Compare reg-reg
6008 // Eg.  CMP     x0, #N
6009 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6010 %{
6011   single_instruction;
6012 //  fixed_latency(16);
6013   cr     : EX2(write);
6014   op1    : EX1(read);
6015   INS01  : ISS;
6016   ALU    : EX2;
6017 %}
6018 
6019 //------- Conditional instructions ------------------------
6020 
6021 // Conditional no operands
6022 // Eg.  CSINC   x0, zr, zr, <cond>
6023 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6024 %{
6025   single_instruction;
6026   cr     : EX1(read);
6027   dst    : EX2(write);
6028   INS01  : ISS;
6029   ALU    : EX2;
6030 %}
6031 
6032 // Conditional 2 operand
6033 // EG.  CSEL    X0, X1, X2, <cond>
6034 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6035 %{
6036   single_instruction;
6037   cr     : EX1(read);
6038   src1   : EX1(read);
6039   src2   : EX1(read);
6040   dst    : EX2(write);
6041   INS01  : ISS;
6042   ALU    : EX2;
6043 %}
6044 
6045 // Conditional 2 operand
6046 // EG.  CSEL    X0, X1, X2, <cond>
6047 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6048 %{
6049   single_instruction;
6050   cr     : EX1(read);
6051   src    : EX1(read);
6052   dst    : EX2(write);
6053   INS01  : ISS;
6054   ALU    : EX2;
6055 %}
6056 
6057 //------- Multiply pipeline operations --------------------
6058 
6059 // Multiply reg-reg
6060 // Eg.  MUL     w0, w1, w2
6061 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6062 %{
6063   single_instruction;
6064   dst    : WR(write);
6065   src1   : ISS(read);
6066   src2   : ISS(read);
6067   INS01  : ISS;
6068   MAC    : WR;
6069 %}
6070 
6071 // Multiply accumulate
6072 // Eg.  MADD    w0, w1, w2, w3
6073 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6074 %{
6075   single_instruction;
6076   dst    : WR(write);
6077   src1   : ISS(read);
6078   src2   : ISS(read);
6079   src3   : ISS(read);
6080   INS01  : ISS;
6081   MAC    : WR;
6082 %}
6083 
6084 // Eg.  MUL     w0, w1, w2
6085 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6086 %{
6087   single_instruction;
6088   fixed_latency(3); // Maximum latency for 64 bit mul
6089   dst    : WR(write);
6090   src1   : ISS(read);
6091   src2   : ISS(read);
6092   INS01  : ISS;
6093   MAC    : WR;
6094 %}
6095 
6096 // Multiply accumulate
6097 // Eg.  MADD    w0, w1, w2, w3
6098 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6099 %{
6100   single_instruction;
6101   fixed_latency(3); // Maximum latency for 64 bit mul
6102   dst    : WR(write);
6103   src1   : ISS(read);
6104   src2   : ISS(read);
6105   src3   : ISS(read);
6106   INS01  : ISS;
6107   MAC    : WR;
6108 %}
6109 
6110 //------- Divide pipeline operations --------------------
6111 
6112 // Eg.  SDIV    w0, w1, w2
6113 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6114 %{
6115   single_instruction;
6116   fixed_latency(8); // Maximum latency for 32 bit divide
6117   dst    : WR(write);
6118   src1   : ISS(read);
6119   src2   : ISS(read);
6120   INS0   : ISS; // Can only dual issue as instruction 0
6121   DIV    : WR;
6122 %}
6123 
6124 // Eg.  SDIV    x0, x1, x2
6125 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6126 %{
6127   single_instruction;
6128   fixed_latency(16); // Maximum latency for 64 bit divide
6129   dst    : WR(write);
6130   src1   : ISS(read);
6131   src2   : ISS(read);
6132   INS0   : ISS; // Can only dual issue as instruction 0
6133   DIV    : WR;
6134 %}
6135 
6136 //------- Load pipeline operations ------------------------
6137 
6138 // Load - prefetch
6139 // Eg.  PFRM    <mem>
6140 pipe_class iload_prefetch(memory mem)
6141 %{
6142   single_instruction;
6143   mem    : ISS(read);
6144   INS01  : ISS;
6145   LDST   : WR;
6146 %}
6147 
6148 // Load - reg, mem
6149 // Eg.  LDR     x0, <mem>
6150 pipe_class iload_reg_mem(iRegI dst, memory mem)
6151 %{
6152   single_instruction;
6153   dst    : WR(write);
6154   mem    : ISS(read);
6155   INS01  : ISS;
6156   LDST   : WR;
6157 %}
6158 
6159 // Load - reg, reg
6160 // Eg.  LDR     x0, [sp, x1]
6161 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6162 %{
6163   single_instruction;
6164   dst    : WR(write);
6165   src    : ISS(read);
6166   INS01  : ISS;
6167   LDST   : WR;
6168 %}
6169 
6170 //------- Store pipeline operations -----------------------
6171 
6172 // Store - zr, mem
6173 // Eg.  STR     zr, <mem>
6174 pipe_class istore_mem(memory mem)
6175 %{
6176   single_instruction;
6177   mem    : ISS(read);
6178   INS01  : ISS;
6179   LDST   : WR;
6180 %}
6181 
6182 // Store - reg, mem
6183 // Eg.  STR     x0, <mem>
6184 pipe_class istore_reg_mem(iRegI src, memory mem)
6185 %{
6186   single_instruction;
6187   mem    : ISS(read);
6188   src    : EX2(read);
6189   INS01  : ISS;
6190   LDST   : WR;
6191 %}
6192 
6193 // Store - reg, reg
6194 // Eg. STR      x0, [sp, x1]
6195 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6196 %{
6197   single_instruction;
6198   dst    : ISS(read);
6199   src    : EX2(read);
6200   INS01  : ISS;
6201   LDST   : WR;
6202 %}
6203 
6204 //------- Store pipeline operations -----------------------
6205 
6206 // Branch
6207 pipe_class pipe_branch()
6208 %{
6209   single_instruction;
6210   INS01  : ISS;
6211   BRANCH : EX1;
6212 %}
6213 
6214 // Conditional branch
6215 pipe_class pipe_branch_cond(rFlagsReg cr)
6216 %{
6217   single_instruction;
6218   cr     : EX1(read);
6219   INS01  : ISS;
6220   BRANCH : EX1;
6221 %}
6222 
6223 // Compare & Branch
6224 // EG.  CBZ/CBNZ
6225 pipe_class pipe_cmp_branch(iRegI op1)
6226 %{
6227   single_instruction;
6228   op1    : EX1(read);
6229   INS01  : ISS;
6230   BRANCH : EX1;
6231 %}
6232 
6233 //------- Synchronisation operations ----------------------
6234 
6235 // Any operation requiring serialization.
6236 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6237 pipe_class pipe_serial()
6238 %{
6239   single_instruction;
6240   force_serialization;
6241   fixed_latency(16);
6242   INS01  : ISS(2); // Cannot dual issue with any other instruction
6243   LDST   : WR;
6244 %}
6245 
6246 // Generic big/slow expanded idiom - also serialized
6247 pipe_class pipe_slow()
6248 %{
6249   instruction_count(10);
6250   multiple_bundles;
6251   force_serialization;
6252   fixed_latency(16);
6253   INS01  : ISS(2); // Cannot dual issue with any other instruction
6254   LDST   : WR;
6255 %}
6256 
6257 // Empty pipeline class
6258 pipe_class pipe_class_empty()
6259 %{
6260   single_instruction;
6261   fixed_latency(0);
6262 %}
6263 
6264 // Default pipeline class.
6265 pipe_class pipe_class_default()
6266 %{
6267   single_instruction;
6268   fixed_latency(2);
6269 %}
6270 
6271 // Pipeline class for compares.
6272 pipe_class pipe_class_compare()
6273 %{
6274   single_instruction;
6275   fixed_latency(16);
6276 %}
6277 
6278 // Pipeline class for memory operations.
6279 pipe_class pipe_class_memory()
6280 %{
6281   single_instruction;
6282   fixed_latency(16);
6283 %}
6284 
6285 // Pipeline class for call.
6286 pipe_class pipe_class_call()
6287 %{
6288   single_instruction;
6289   fixed_latency(100);
6290 %}
6291 
6292 // Define the class for the Nop node.
6293 define %{
6294    MachNop = pipe_class_empty;
6295 %}
6296 
6297 %}
6298 //----------INSTRUCTIONS-------------------------------------------------------
6299 //
6300 // match      -- States which machine-independent subtree may be replaced
6301 //               by this instruction.
6302 // ins_cost   -- The estimated cost of this instruction is used by instruction
6303 //               selection to identify a minimum cost tree of machine
6304 //               instructions that matches a tree of machine-independent
6305 //               instructions.
6306 // format     -- A string providing the disassembly for this instruction.
6307 //               The value of an instruction's operand may be inserted
6308 //               by referring to it with a '$' prefix.
6309 // opcode     -- Three instruction opcodes may be provided.  These are referred
6310 //               to within an encode class as $primary, $secondary, and $tertiary
6311 //               rrspectively.  The primary opcode is commonly used to
6312 //               indicate the type of machine instruction, while secondary
6313 //               and tertiary are often used for prefix options or addressing
6314 //               modes.
6315 // ins_encode -- A list of encode classes with parameters. The encode class
6316 //               name must have been defined in an 'enc_class' specification
6317 //               in the encode section of the architecture description.
6318 
6319 // ============================================================================
6320 // Memory (Load/Store) Instructions
6321 
6322 // Load Instructions
6323 
6324 // Load Byte (8 bit signed)
6325 instruct loadB(iRegINoSp dst, memory mem)
6326 %{
6327   match(Set dst (LoadB mem));
6328   predicate(!needs_acquiring_load(n));
6329 
6330   ins_cost(4 * INSN_COST);
6331   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6332 
6333   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6334 
6335   ins_pipe(iload_reg_mem);
6336 %}
6337 
6338 // Load Byte (8 bit signed) into long
6339 instruct loadB2L(iRegLNoSp dst, memory mem)
6340 %{
6341   match(Set dst (ConvI2L (LoadB mem)));
6342   predicate(!needs_acquiring_load(n->in(1)));
6343 
6344   ins_cost(4 * INSN_COST);
6345   format %{ "ldrsb  $dst, $mem\t# byte" %}
6346 
6347   ins_encode(aarch64_enc_ldrsb(dst, mem));
6348 
6349   ins_pipe(iload_reg_mem);
6350 %}
6351 
6352 // Load Byte (8 bit unsigned)
6353 instruct loadUB(iRegINoSp dst, memory mem)
6354 %{
6355   match(Set dst (LoadUB mem));
6356   predicate(!needs_acquiring_load(n));
6357 
6358   ins_cost(4 * INSN_COST);
6359   format %{ "ldrbw  $dst, $mem\t# byte" %}
6360 
6361   ins_encode(aarch64_enc_ldrb(dst, mem));
6362 
6363   ins_pipe(iload_reg_mem);
6364 %}
6365 
6366 // Load Byte (8 bit unsigned) into long
6367 instruct loadUB2L(iRegLNoSp dst, memory mem)
6368 %{
6369   match(Set dst (ConvI2L (LoadUB mem)));
6370   predicate(!needs_acquiring_load(n->in(1)));
6371 
6372   ins_cost(4 * INSN_COST);
6373   format %{ "ldrb  $dst, $mem\t# byte" %}
6374 
6375   ins_encode(aarch64_enc_ldrb(dst, mem));
6376 
6377   ins_pipe(iload_reg_mem);
6378 %}
6379 
6380 // Load Short (16 bit signed)
6381 instruct loadS(iRegINoSp dst, memory mem)
6382 %{
6383   match(Set dst (LoadS mem));
6384   predicate(!needs_acquiring_load(n));
6385 
6386   ins_cost(4 * INSN_COST);
6387   format %{ "ldrshw  $dst, $mem\t# short" %}
6388 
6389   ins_encode(aarch64_enc_ldrshw(dst, mem));
6390 
6391   ins_pipe(iload_reg_mem);
6392 %}
6393 
6394 // Load Short (16 bit signed) into long
6395 instruct loadS2L(iRegLNoSp dst, memory mem)
6396 %{
6397   match(Set dst (ConvI2L (LoadS mem)));
6398   predicate(!needs_acquiring_load(n->in(1)));
6399 
6400   ins_cost(4 * INSN_COST);
6401   format %{ "ldrsh  $dst, $mem\t# short" %}
6402 
6403   ins_encode(aarch64_enc_ldrsh(dst, mem));
6404 
6405   ins_pipe(iload_reg_mem);
6406 %}
6407 
6408 // Load Char (16 bit unsigned)
6409 instruct loadUS(iRegINoSp dst, memory mem)
6410 %{
6411   match(Set dst (LoadUS mem));
6412   predicate(!needs_acquiring_load(n));
6413 
6414   ins_cost(4 * INSN_COST);
6415   format %{ "ldrh  $dst, $mem\t# short" %}
6416 
6417   ins_encode(aarch64_enc_ldrh(dst, mem));
6418 
6419   ins_pipe(iload_reg_mem);
6420 %}
6421 
6422 // Load Short/Char (16 bit unsigned) into long
6423 instruct loadUS2L(iRegLNoSp dst, memory mem)
6424 %{
6425   match(Set dst (ConvI2L (LoadUS mem)));
6426   predicate(!needs_acquiring_load(n->in(1)));
6427 
6428   ins_cost(4 * INSN_COST);
6429   format %{ "ldrh  $dst, $mem\t# short" %}
6430 
6431   ins_encode(aarch64_enc_ldrh(dst, mem));
6432 
6433   ins_pipe(iload_reg_mem);
6434 %}
6435 
6436 // Load Integer (32 bit signed)
6437 instruct loadI(iRegINoSp dst, memory mem)
6438 %{
6439   match(Set dst (LoadI mem));
6440   predicate(!needs_acquiring_load(n));
6441 
6442   ins_cost(4 * INSN_COST);
6443   format %{ "ldrw  $dst, $mem\t# int" %}
6444 
6445   ins_encode(aarch64_enc_ldrw(dst, mem));
6446 
6447   ins_pipe(iload_reg_mem);
6448 %}
6449 
6450 // Load Integer (32 bit signed) into long
6451 instruct loadI2L(iRegLNoSp dst, memory mem)
6452 %{
6453   match(Set dst (ConvI2L (LoadI mem)));
6454   predicate(!needs_acquiring_load(n->in(1)));
6455 
6456   ins_cost(4 * INSN_COST);
6457   format %{ "ldrsw  $dst, $mem\t# int" %}
6458 
6459   ins_encode(aarch64_enc_ldrsw(dst, mem));
6460 
6461   ins_pipe(iload_reg_mem);
6462 %}
6463 
6464 // Load Integer (32 bit unsigned) into long
6465 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6466 %{
6467   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6468   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6469 
6470   ins_cost(4 * INSN_COST);
6471   format %{ "ldrw  $dst, $mem\t# int" %}
6472 
6473   ins_encode(aarch64_enc_ldrw(dst, mem));
6474 
6475   ins_pipe(iload_reg_mem);
6476 %}
6477 
6478 // Load Long (64 bit signed)
6479 instruct loadL(iRegLNoSp dst, memory mem)
6480 %{
6481   match(Set dst (LoadL mem));
6482   predicate(!needs_acquiring_load(n));
6483 
6484   ins_cost(4 * INSN_COST);
6485   format %{ "ldr  $dst, $mem\t# int" %}
6486 
6487   ins_encode(aarch64_enc_ldr(dst, mem));
6488 
6489   ins_pipe(iload_reg_mem);
6490 %}
6491 
6492 // Load Range
6493 instruct loadRange(iRegINoSp dst, memory mem)
6494 %{
6495   match(Set dst (LoadRange mem));
6496 
6497   ins_cost(4 * INSN_COST);
6498   format %{ "ldrw  $dst, $mem\t# range" %}
6499 
6500   ins_encode(aarch64_enc_ldrw(dst, mem));
6501 
6502   ins_pipe(iload_reg_mem);
6503 %}
6504 
6505 // Load Pointer
6506 instruct loadP(iRegPNoSp dst, memory mem)
6507 %{
6508   match(Set dst (LoadP mem));
6509   predicate(!needs_acquiring_load(n));
6510 
6511   ins_cost(4 * INSN_COST);
6512   format %{ "ldr  $dst, $mem\t# ptr" %}
6513 
6514   ins_encode(aarch64_enc_ldr(dst, mem));
6515 
6516   ins_pipe(iload_reg_mem);
6517 %}
6518 
6519 // Load Compressed Pointer
6520 instruct loadN(iRegNNoSp dst, memory mem)
6521 %{
6522   match(Set dst (LoadN mem));
6523   predicate(!needs_acquiring_load(n));
6524 
6525   ins_cost(4 * INSN_COST);
6526   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6527 
6528   ins_encode(aarch64_enc_ldrw(dst, mem));
6529 
6530   ins_pipe(iload_reg_mem);
6531 %}
6532 
6533 // Load Klass Pointer
6534 instruct loadKlass(iRegPNoSp dst, memory mem)
6535 %{
6536   match(Set dst (LoadKlass mem));
6537   predicate(!needs_acquiring_load(n));
6538 
6539   ins_cost(4 * INSN_COST);
6540   format %{ "ldr  $dst, $mem\t# class" %}
6541 
6542   ins_encode(aarch64_enc_ldr(dst, mem));
6543 
6544   ins_pipe(iload_reg_mem);
6545 %}
6546 
6547 // Load Narrow Klass Pointer
6548 instruct loadNKlass(iRegNNoSp dst, memory mem)
6549 %{
6550   match(Set dst (LoadNKlass mem));
6551   predicate(!needs_acquiring_load(n));
6552 
6553   ins_cost(4 * INSN_COST);
6554   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6555 
6556   ins_encode(aarch64_enc_ldrw(dst, mem));
6557 
6558   ins_pipe(iload_reg_mem);
6559 %}
6560 
6561 // Load Float
6562 instruct loadF(vRegF dst, memory mem)
6563 %{
6564   match(Set dst (LoadF mem));
6565   predicate(!needs_acquiring_load(n));
6566 
6567   ins_cost(4 * INSN_COST);
6568   format %{ "ldrs  $dst, $mem\t# float" %}
6569 
6570   ins_encode( aarch64_enc_ldrs(dst, mem) );
6571 
6572   ins_pipe(pipe_class_memory);
6573 %}
6574 
6575 // Load Double
6576 instruct loadD(vRegD dst, memory mem)
6577 %{
6578   match(Set dst (LoadD mem));
6579   predicate(!needs_acquiring_load(n));
6580 
6581   ins_cost(4 * INSN_COST);
6582   format %{ "ldrd  $dst, $mem\t# double" %}
6583 
6584   ins_encode( aarch64_enc_ldrd(dst, mem) );
6585 
6586   ins_pipe(pipe_class_memory);
6587 %}
6588 
6589 
6590 // Load Int Constant
6591 instruct loadConI(iRegINoSp dst, immI src)
6592 %{
6593   match(Set dst src);
6594 
6595   ins_cost(INSN_COST);
6596   format %{ "mov $dst, $src\t# int" %}
6597 
6598   ins_encode( aarch64_enc_movw_imm(dst, src) );
6599 
6600   ins_pipe(ialu_imm);
6601 %}
6602 
6603 // Load Long Constant
6604 instruct loadConL(iRegLNoSp dst, immL src)
6605 %{
6606   match(Set dst src);
6607 
6608   ins_cost(INSN_COST);
6609   format %{ "mov $dst, $src\t# long" %}
6610 
6611   ins_encode( aarch64_enc_mov_imm(dst, src) );
6612 
6613   ins_pipe(ialu_imm);
6614 %}
6615 
6616 // Load Pointer Constant
6617 
6618 instruct loadConP(iRegPNoSp dst, immP con)
6619 %{
6620   match(Set dst con);
6621 
6622   ins_cost(INSN_COST * 4);
6623   format %{
6624     "mov  $dst, $con\t# ptr\n\t"
6625   %}
6626 
6627   ins_encode(aarch64_enc_mov_p(dst, con));
6628 
6629   ins_pipe(ialu_imm);
6630 %}
6631 
6632 // Load Null Pointer Constant
6633 
6634 instruct loadConP0(iRegPNoSp dst, immP0 con)
6635 %{
6636   match(Set dst con);
6637 
6638   ins_cost(INSN_COST);
6639   format %{ "mov  $dst, $con\t# NULL ptr" %}
6640 
6641   ins_encode(aarch64_enc_mov_p0(dst, con));
6642 
6643   ins_pipe(ialu_imm);
6644 %}
6645 
6646 // Load Pointer Constant One
6647 
6648 instruct loadConP1(iRegPNoSp dst, immP_1 con)
6649 %{
6650   match(Set dst con);
6651 
6652   ins_cost(INSN_COST);
6653   format %{ "mov  $dst, $con\t# NULL ptr" %}
6654 
6655   ins_encode(aarch64_enc_mov_p1(dst, con));
6656 
6657   ins_pipe(ialu_imm);
6658 %}
6659 
6660 // Load Poll Page Constant
6661 
6662 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
6663 %{
6664   match(Set dst con);
6665 
6666   ins_cost(INSN_COST);
6667   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
6668 
6669   ins_encode(aarch64_enc_mov_poll_page(dst, con));
6670 
6671   ins_pipe(ialu_imm);
6672 %}
6673 
6674 // Load Byte Map Base Constant
6675 
6676 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
6677 %{
6678   match(Set dst con);
6679 
6680   ins_cost(INSN_COST);
6681   format %{ "adr  $dst, $con\t# Byte Map Base" %}
6682 
6683   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
6684 
6685   ins_pipe(ialu_imm);
6686 %}
6687 
6688 // Load Narrow Pointer Constant
6689 
6690 instruct loadConN(iRegNNoSp dst, immN con)
6691 %{
6692   match(Set dst con);
6693 
6694   ins_cost(INSN_COST * 4);
6695   format %{ "mov  $dst, $con\t# compressed ptr" %}
6696 
6697   ins_encode(aarch64_enc_mov_n(dst, con));
6698 
6699   ins_pipe(ialu_imm);
6700 %}
6701 
6702 // Load Narrow Null Pointer Constant
6703 
6704 instruct loadConN0(iRegNNoSp dst, immN0 con)
6705 %{
6706   match(Set dst con);
6707 
6708   ins_cost(INSN_COST);
6709   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
6710 
6711   ins_encode(aarch64_enc_mov_n0(dst, con));
6712 
6713   ins_pipe(ialu_imm);
6714 %}
6715 
6716 // Load Narrow Klass Constant
6717 
6718 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
6719 %{
6720   match(Set dst con);
6721 
6722   ins_cost(INSN_COST);
6723   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
6724 
6725   ins_encode(aarch64_enc_mov_nk(dst, con));
6726 
6727   ins_pipe(ialu_imm);
6728 %}
6729 
6730 // Load Packed Float Constant
6731 
6732 instruct loadConF_packed(vRegF dst, immFPacked con) %{
6733   match(Set dst con);
6734   ins_cost(INSN_COST * 4);
6735   format %{ "fmovs  $dst, $con"%}
6736   ins_encode %{
6737     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
6738   %}
6739 
6740   ins_pipe(fp_imm_s);
6741 %}
6742 
6743 // Load Float Constant
6744 
6745 instruct loadConF(vRegF dst, immF con) %{
6746   match(Set dst con);
6747 
6748   ins_cost(INSN_COST * 4);
6749 
6750   format %{
6751     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6752   %}
6753 
6754   ins_encode %{
6755     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6756   %}
6757 
6758   ins_pipe(fp_load_constant_s);
6759 %}
6760 
6761 // Load Packed Double Constant
6762 
6763 instruct loadConD_packed(vRegD dst, immDPacked con) %{
6764   match(Set dst con);
6765   ins_cost(INSN_COST);
6766   format %{ "fmovd  $dst, $con"%}
6767   ins_encode %{
6768     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
6769   %}
6770 
6771   ins_pipe(fp_imm_d);
6772 %}
6773 
6774 // Load Double Constant
6775 
6776 instruct loadConD(vRegD dst, immD con) %{
6777   match(Set dst con);
6778 
6779   ins_cost(INSN_COST * 5);
6780   format %{
6781     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6782   %}
6783 
6784   ins_encode %{
6785     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
6786   %}
6787 
6788   ins_pipe(fp_load_constant_d);
6789 %}
6790 
6791 // Store Instructions
6792 
6793 // Store CMS card-mark Immediate
6794 instruct storeimmCM0(immI0 zero, memory mem)
6795 %{
6796   match(Set mem (StoreCM mem zero));
6797   predicate(unnecessary_storestore(n));
6798 
6799   ins_cost(INSN_COST);
6800   format %{ "storestore (elided)\n\t"
6801             "strb zr, $mem\t# byte" %}
6802 
6803   ins_encode(aarch64_enc_strb0(mem));
6804 
6805   ins_pipe(istore_mem);
6806 %}
6807 
6808 // Store CMS card-mark Immediate with intervening StoreStore
6809 // needed when using CMS with no conditional card marking
6810 instruct storeimmCM0_ordered(immI0 zero, memory mem)
6811 %{
6812   match(Set mem (StoreCM mem zero));
6813 
6814   ins_cost(INSN_COST * 2);
6815   format %{ "storestore\n\t"
6816             "dmb ishst"
6817             "\n\tstrb zr, $mem\t# byte" %}
6818 
6819   ins_encode(aarch64_enc_strb0_ordered(mem));
6820 
6821   ins_pipe(istore_mem);
6822 %}
6823 
6824 // Store Byte
6825 instruct storeB(iRegIorL2I src, memory mem)
6826 %{
6827   match(Set mem (StoreB mem src));
6828   predicate(!needs_releasing_store(n));
6829 
6830   ins_cost(INSN_COST);
6831   format %{ "strb  $src, $mem\t# byte" %}
6832 
6833   ins_encode(aarch64_enc_strb(src, mem));
6834 
6835   ins_pipe(istore_reg_mem);
6836 %}
6837 
6838 
6839 instruct storeimmB0(immI0 zero, memory mem)
6840 %{
6841   match(Set mem (StoreB mem zero));
6842   predicate(!needs_releasing_store(n));
6843 
6844   ins_cost(INSN_COST);
6845   format %{ "strb rscractch2, $mem\t# byte" %}
6846 
6847   ins_encode(aarch64_enc_strb0(mem));
6848 
6849   ins_pipe(istore_mem);
6850 %}
6851 
6852 // Store Char/Short
6853 instruct storeC(iRegIorL2I src, memory mem)
6854 %{
6855   match(Set mem (StoreC mem src));
6856   predicate(!needs_releasing_store(n));
6857 
6858   ins_cost(INSN_COST);
6859   format %{ "strh  $src, $mem\t# short" %}
6860 
6861   ins_encode(aarch64_enc_strh(src, mem));
6862 
6863   ins_pipe(istore_reg_mem);
6864 %}
6865 
6866 instruct storeimmC0(immI0 zero, memory mem)
6867 %{
6868   match(Set mem (StoreC mem zero));
6869   predicate(!needs_releasing_store(n));
6870 
6871   ins_cost(INSN_COST);
6872   format %{ "strh  zr, $mem\t# short" %}
6873 
6874   ins_encode(aarch64_enc_strh0(mem));
6875 
6876   ins_pipe(istore_mem);
6877 %}
6878 
6879 // Store Integer
6880 
6881 instruct storeI(iRegIorL2I src, memory mem)
6882 %{
6883   match(Set mem(StoreI mem src));
6884   predicate(!needs_releasing_store(n));
6885 
6886   ins_cost(INSN_COST);
6887   format %{ "strw  $src, $mem\t# int" %}
6888 
6889   ins_encode(aarch64_enc_strw(src, mem));
6890 
6891   ins_pipe(istore_reg_mem);
6892 %}
6893 
6894 instruct storeimmI0(immI0 zero, memory mem)
6895 %{
6896   match(Set mem(StoreI mem zero));
6897   predicate(!needs_releasing_store(n));
6898 
6899   ins_cost(INSN_COST);
6900   format %{ "strw  zr, $mem\t# int" %}
6901 
6902   ins_encode(aarch64_enc_strw0(mem));
6903 
6904   ins_pipe(istore_mem);
6905 %}
6906 
6907 // Store Long (64 bit signed)
6908 instruct storeL(iRegL src, memory mem)
6909 %{
6910   match(Set mem (StoreL mem src));
6911   predicate(!needs_releasing_store(n));
6912 
6913   ins_cost(INSN_COST);
6914   format %{ "str  $src, $mem\t# int" %}
6915 
6916   ins_encode(aarch64_enc_str(src, mem));
6917 
6918   ins_pipe(istore_reg_mem);
6919 %}
6920 
6921 // Store Long (64 bit signed)
6922 instruct storeimmL0(immL0 zero, memory mem)
6923 %{
6924   match(Set mem (StoreL mem zero));
6925   predicate(!needs_releasing_store(n));
6926 
6927   ins_cost(INSN_COST);
6928   format %{ "str  zr, $mem\t# int" %}
6929 
6930   ins_encode(aarch64_enc_str0(mem));
6931 
6932   ins_pipe(istore_mem);
6933 %}
6934 
6935 // Store Pointer
6936 instruct storeP(iRegP src, memory mem)
6937 %{
6938   match(Set mem (StoreP mem src));
6939   predicate(!needs_releasing_store(n));
6940 
6941   ins_cost(INSN_COST);
6942   format %{ "str  $src, $mem\t# ptr" %}
6943 
6944   ins_encode(aarch64_enc_str(src, mem));
6945 
6946   ins_pipe(istore_reg_mem);
6947 %}
6948 
6949 // Store Pointer
6950 instruct storeimmP0(immP0 zero, memory mem)
6951 %{
6952   match(Set mem (StoreP mem zero));
6953   predicate(!needs_releasing_store(n));
6954 
6955   ins_cost(INSN_COST);
6956   format %{ "str zr, $mem\t# ptr" %}
6957 
6958   ins_encode(aarch64_enc_str0(mem));
6959 
6960   ins_pipe(istore_mem);
6961 %}
6962 
6963 // Store Compressed Pointer
6964 instruct storeN(iRegN src, memory mem)
6965 %{
6966   match(Set mem (StoreN mem src));
6967   predicate(!needs_releasing_store(n));
6968 
6969   ins_cost(INSN_COST);
6970   format %{ "strw  $src, $mem\t# compressed ptr" %}
6971 
6972   ins_encode(aarch64_enc_strw(src, mem));
6973 
6974   ins_pipe(istore_reg_mem);
6975 %}
6976 
6977 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
6978 %{
6979   match(Set mem (StoreN mem zero));
6980   predicate(Universe::narrow_oop_base() == NULL &&
6981             Universe::narrow_klass_base() == NULL &&
6982             (!needs_releasing_store(n)));
6983 
6984   ins_cost(INSN_COST);
6985   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
6986 
6987   ins_encode(aarch64_enc_strw(heapbase, mem));
6988 
6989   ins_pipe(istore_reg_mem);
6990 %}
6991 
6992 // Store Float
6993 instruct storeF(vRegF src, memory mem)
6994 %{
6995   match(Set mem (StoreF mem src));
6996   predicate(!needs_releasing_store(n));
6997 
6998   ins_cost(INSN_COST);
6999   format %{ "strs  $src, $mem\t# float" %}
7000 
7001   ins_encode( aarch64_enc_strs(src, mem) );
7002 
7003   ins_pipe(pipe_class_memory);
7004 %}
7005 
7006 // TODO
7007 // implement storeImmF0 and storeFImmPacked
7008 
7009 // Store Double
7010 instruct storeD(vRegD src, memory mem)
7011 %{
7012   match(Set mem (StoreD mem src));
7013   predicate(!needs_releasing_store(n));
7014 
7015   ins_cost(INSN_COST);
7016   format %{ "strd  $src, $mem\t# double" %}
7017 
7018   ins_encode( aarch64_enc_strd(src, mem) );
7019 
7020   ins_pipe(pipe_class_memory);
7021 %}
7022 
7023 // Store Compressed Klass Pointer
7024 instruct storeNKlass(iRegN src, memory mem)
7025 %{
7026   predicate(!needs_releasing_store(n));
7027   match(Set mem (StoreNKlass mem src));
7028 
7029   ins_cost(INSN_COST);
7030   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7031 
7032   ins_encode(aarch64_enc_strw(src, mem));
7033 
7034   ins_pipe(istore_reg_mem);
7035 %}
7036 
7037 // TODO
7038 // implement storeImmD0 and storeDImmPacked
7039 
7040 // prefetch instructions
7041 // Must be safe to execute with invalid address (cannot fault).
7042 
7043 instruct prefetchalloc( memory mem ) %{
7044   match(PrefetchAllocation mem);
7045 
7046   ins_cost(INSN_COST);
7047   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7048 
7049   ins_encode( aarch64_enc_prefetchw(mem) );
7050 
7051   ins_pipe(iload_prefetch);
7052 %}
7053 
7054 //  ---------------- volatile loads and stores ----------------
7055 
7056 // Load Byte (8 bit signed)
7057 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7058 %{
7059   match(Set dst (LoadB mem));
7060 
7061   ins_cost(VOLATILE_REF_COST);
7062   format %{ "ldarsb  $dst, $mem\t# byte" %}
7063 
7064   ins_encode(aarch64_enc_ldarsb(dst, mem));
7065 
7066   ins_pipe(pipe_serial);
7067 %}
7068 
7069 // Load Byte (8 bit signed) into long
7070 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7071 %{
7072   match(Set dst (ConvI2L (LoadB mem)));
7073 
7074   ins_cost(VOLATILE_REF_COST);
7075   format %{ "ldarsb  $dst, $mem\t# byte" %}
7076 
7077   ins_encode(aarch64_enc_ldarsb(dst, mem));
7078 
7079   ins_pipe(pipe_serial);
7080 %}
7081 
7082 // Load Byte (8 bit unsigned)
7083 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7084 %{
7085   match(Set dst (LoadUB mem));
7086 
7087   ins_cost(VOLATILE_REF_COST);
7088   format %{ "ldarb  $dst, $mem\t# byte" %}
7089 
7090   ins_encode(aarch64_enc_ldarb(dst, mem));
7091 
7092   ins_pipe(pipe_serial);
7093 %}
7094 
7095 // Load Byte (8 bit unsigned) into long
7096 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7097 %{
7098   match(Set dst (ConvI2L (LoadUB mem)));
7099 
7100   ins_cost(VOLATILE_REF_COST);
7101   format %{ "ldarb  $dst, $mem\t# byte" %}
7102 
7103   ins_encode(aarch64_enc_ldarb(dst, mem));
7104 
7105   ins_pipe(pipe_serial);
7106 %}
7107 
7108 // Load Short (16 bit signed)
7109 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7110 %{
7111   match(Set dst (LoadS mem));
7112 
7113   ins_cost(VOLATILE_REF_COST);
7114   format %{ "ldarshw  $dst, $mem\t# short" %}
7115 
7116   ins_encode(aarch64_enc_ldarshw(dst, mem));
7117 
7118   ins_pipe(pipe_serial);
7119 %}
7120 
7121 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7122 %{
7123   match(Set dst (LoadUS mem));
7124 
7125   ins_cost(VOLATILE_REF_COST);
7126   format %{ "ldarhw  $dst, $mem\t# short" %}
7127 
7128   ins_encode(aarch64_enc_ldarhw(dst, mem));
7129 
7130   ins_pipe(pipe_serial);
7131 %}
7132 
7133 // Load Short/Char (16 bit unsigned) into long
7134 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7135 %{
7136   match(Set dst (ConvI2L (LoadUS mem)));
7137 
7138   ins_cost(VOLATILE_REF_COST);
7139   format %{ "ldarh  $dst, $mem\t# short" %}
7140 
7141   ins_encode(aarch64_enc_ldarh(dst, mem));
7142 
7143   ins_pipe(pipe_serial);
7144 %}
7145 
7146 // Load Short/Char (16 bit signed) into long
7147 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7148 %{
7149   match(Set dst (ConvI2L (LoadS mem)));
7150 
7151   ins_cost(VOLATILE_REF_COST);
7152   format %{ "ldarh  $dst, $mem\t# short" %}
7153 
7154   ins_encode(aarch64_enc_ldarsh(dst, mem));
7155 
7156   ins_pipe(pipe_serial);
7157 %}
7158 
7159 // Load Integer (32 bit signed)
7160 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7161 %{
7162   match(Set dst (LoadI mem));
7163 
7164   ins_cost(VOLATILE_REF_COST);
7165   format %{ "ldarw  $dst, $mem\t# int" %}
7166 
7167   ins_encode(aarch64_enc_ldarw(dst, mem));
7168 
7169   ins_pipe(pipe_serial);
7170 %}
7171 
7172 // Load Integer (32 bit unsigned) into long
7173 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7174 %{
7175   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7176 
7177   ins_cost(VOLATILE_REF_COST);
7178   format %{ "ldarw  $dst, $mem\t# int" %}
7179 
7180   ins_encode(aarch64_enc_ldarw(dst, mem));
7181 
7182   ins_pipe(pipe_serial);
7183 %}
7184 
7185 // Load Long (64 bit signed)
7186 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7187 %{
7188   match(Set dst (LoadL mem));
7189 
7190   ins_cost(VOLATILE_REF_COST);
7191   format %{ "ldar  $dst, $mem\t# int" %}
7192 
7193   ins_encode(aarch64_enc_ldar(dst, mem));
7194 
7195   ins_pipe(pipe_serial);
7196 %}
7197 
7198 // Load Pointer
7199 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7200 %{
7201   match(Set dst (LoadP mem));
7202 
7203   ins_cost(VOLATILE_REF_COST);
7204   format %{ "ldar  $dst, $mem\t# ptr" %}
7205 
7206   ins_encode(aarch64_enc_ldar(dst, mem));
7207 
7208   ins_pipe(pipe_serial);
7209 %}
7210 
7211 // Load Compressed Pointer
7212 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7213 %{
7214   match(Set dst (LoadN mem));
7215 
7216   ins_cost(VOLATILE_REF_COST);
7217   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7218 
7219   ins_encode(aarch64_enc_ldarw(dst, mem));
7220 
7221   ins_pipe(pipe_serial);
7222 %}
7223 
7224 // Load Float
7225 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7226 %{
7227   match(Set dst (LoadF mem));
7228 
7229   ins_cost(VOLATILE_REF_COST);
7230   format %{ "ldars  $dst, $mem\t# float" %}
7231 
7232   ins_encode( aarch64_enc_fldars(dst, mem) );
7233 
7234   ins_pipe(pipe_serial);
7235 %}
7236 
7237 // Load Double
7238 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7239 %{
7240   match(Set dst (LoadD mem));
7241 
7242   ins_cost(VOLATILE_REF_COST);
7243   format %{ "ldard  $dst, $mem\t# double" %}
7244 
7245   ins_encode( aarch64_enc_fldard(dst, mem) );
7246 
7247   ins_pipe(pipe_serial);
7248 %}
7249 
7250 // Store Byte
7251 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7252 %{
7253   match(Set mem (StoreB mem src));
7254 
7255   ins_cost(VOLATILE_REF_COST);
7256   format %{ "stlrb  $src, $mem\t# byte" %}
7257 
7258   ins_encode(aarch64_enc_stlrb(src, mem));
7259 
7260   ins_pipe(pipe_class_memory);
7261 %}
7262 
7263 // Store Char/Short
7264 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7265 %{
7266   match(Set mem (StoreC mem src));
7267 
7268   ins_cost(VOLATILE_REF_COST);
7269   format %{ "stlrh  $src, $mem\t# short" %}
7270 
7271   ins_encode(aarch64_enc_stlrh(src, mem));
7272 
7273   ins_pipe(pipe_class_memory);
7274 %}
7275 
7276 // Store Integer
7277 
7278 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7279 %{
7280   match(Set mem(StoreI mem src));
7281 
7282   ins_cost(VOLATILE_REF_COST);
7283   format %{ "stlrw  $src, $mem\t# int" %}
7284 
7285   ins_encode(aarch64_enc_stlrw(src, mem));
7286 
7287   ins_pipe(pipe_class_memory);
7288 %}
7289 
7290 // Store Long (64 bit signed)
7291 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7292 %{
7293   match(Set mem (StoreL mem src));
7294 
7295   ins_cost(VOLATILE_REF_COST);
7296   format %{ "stlr  $src, $mem\t# int" %}
7297 
7298   ins_encode(aarch64_enc_stlr(src, mem));
7299 
7300   ins_pipe(pipe_class_memory);
7301 %}
7302 
7303 // Store Pointer
7304 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7305 %{
7306   match(Set mem (StoreP mem src));
7307 
7308   ins_cost(VOLATILE_REF_COST);
7309   format %{ "stlr  $src, $mem\t# ptr" %}
7310 
7311   ins_encode(aarch64_enc_stlr(src, mem));
7312 
7313   ins_pipe(pipe_class_memory);
7314 %}
7315 
7316 // Store Compressed Pointer
7317 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7318 %{
7319   match(Set mem (StoreN mem src));
7320 
7321   ins_cost(VOLATILE_REF_COST);
7322   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7323 
7324   ins_encode(aarch64_enc_stlrw(src, mem));
7325 
7326   ins_pipe(pipe_class_memory);
7327 %}
7328 
7329 // Store Float
7330 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7331 %{
7332   match(Set mem (StoreF mem src));
7333 
7334   ins_cost(VOLATILE_REF_COST);
7335   format %{ "stlrs  $src, $mem\t# float" %}
7336 
7337   ins_encode( aarch64_enc_fstlrs(src, mem) );
7338 
7339   ins_pipe(pipe_class_memory);
7340 %}
7341 
7342 // TODO
7343 // implement storeImmF0 and storeFImmPacked
7344 
7345 // Store Double
7346 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7347 %{
7348   match(Set mem (StoreD mem src));
7349 
7350   ins_cost(VOLATILE_REF_COST);
7351   format %{ "stlrd  $src, $mem\t# double" %}
7352 
7353   ins_encode( aarch64_enc_fstlrd(src, mem) );
7354 
7355   ins_pipe(pipe_class_memory);
7356 %}
7357 
7358 //  ---------------- end of volatile loads and stores ----------------
7359 
7360 // ============================================================================
7361 // BSWAP Instructions
7362 
7363 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7364   match(Set dst (ReverseBytesI src));
7365 
7366   ins_cost(INSN_COST);
7367   format %{ "revw  $dst, $src" %}
7368 
7369   ins_encode %{
7370     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7371   %}
7372 
7373   ins_pipe(ialu_reg);
7374 %}
7375 
7376 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7377   match(Set dst (ReverseBytesL src));
7378 
7379   ins_cost(INSN_COST);
7380   format %{ "rev  $dst, $src" %}
7381 
7382   ins_encode %{
7383     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7384   %}
7385 
7386   ins_pipe(ialu_reg);
7387 %}
7388 
7389 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7390   match(Set dst (ReverseBytesUS src));
7391 
7392   ins_cost(INSN_COST);
7393   format %{ "rev16w  $dst, $src" %}
7394 
7395   ins_encode %{
7396     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7397   %}
7398 
7399   ins_pipe(ialu_reg);
7400 %}
7401 
7402 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7403   match(Set dst (ReverseBytesS src));
7404 
7405   ins_cost(INSN_COST);
7406   format %{ "rev16w  $dst, $src\n\t"
7407             "sbfmw $dst, $dst, #0, #15" %}
7408 
7409   ins_encode %{
7410     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7411     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7412   %}
7413 
7414   ins_pipe(ialu_reg);
7415 %}
7416 
7417 // ============================================================================
7418 // Zero Count Instructions
7419 
7420 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7421   match(Set dst (CountLeadingZerosI src));
7422 
7423   ins_cost(INSN_COST);
7424   format %{ "clzw  $dst, $src" %}
7425   ins_encode %{
7426     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7427   %}
7428 
7429   ins_pipe(ialu_reg);
7430 %}
7431 
7432 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7433   match(Set dst (CountLeadingZerosL src));
7434 
7435   ins_cost(INSN_COST);
7436   format %{ "clz   $dst, $src" %}
7437   ins_encode %{
7438     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7439   %}
7440 
7441   ins_pipe(ialu_reg);
7442 %}
7443 
7444 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7445   match(Set dst (CountTrailingZerosI src));
7446 
7447   ins_cost(INSN_COST * 2);
7448   format %{ "rbitw  $dst, $src\n\t"
7449             "clzw   $dst, $dst" %}
7450   ins_encode %{
7451     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7452     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7453   %}
7454 
7455   ins_pipe(ialu_reg);
7456 %}
7457 
7458 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7459   match(Set dst (CountTrailingZerosL src));
7460 
7461   ins_cost(INSN_COST * 2);
7462   format %{ "rbit   $dst, $src\n\t"
7463             "clz    $dst, $dst" %}
7464   ins_encode %{
7465     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7466     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7467   %}
7468 
7469   ins_pipe(ialu_reg);
7470 %}
7471 
7472 //---------- Population Count Instructions -------------------------------------
7473 //
7474 
7475 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
7476   predicate(UsePopCountInstruction);
7477   match(Set dst (PopCountI src));
7478   effect(TEMP tmp);
7479   ins_cost(INSN_COST * 13);
7480 
7481   format %{ "movw   $src, $src\n\t"
7482             "mov    $tmp, $src\t# vector (1D)\n\t"
7483             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7484             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7485             "mov    $dst, $tmp\t# vector (1D)" %}
7486   ins_encode %{
7487     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
7488     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7489     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7490     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7491     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7492   %}
7493 
7494   ins_pipe(pipe_class_default);
7495 %}
7496 
7497 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
7498   predicate(UsePopCountInstruction);
7499   match(Set dst (PopCountI (LoadI mem)));
7500   effect(TEMP tmp);
7501   ins_cost(INSN_COST * 13);
7502 
7503   format %{ "ldrs   $tmp, $mem\n\t"
7504             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7505             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7506             "mov    $dst, $tmp\t# vector (1D)" %}
7507   ins_encode %{
7508     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7509     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
7510                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7511     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7512     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7513     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7514   %}
7515 
7516   ins_pipe(pipe_class_default);
7517 %}
7518 
7519 // Note: Long.bitCount(long) returns an int.
7520 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
7521   predicate(UsePopCountInstruction);
7522   match(Set dst (PopCountL src));
7523   effect(TEMP tmp);
7524   ins_cost(INSN_COST * 13);
7525 
7526   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
7527             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7528             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7529             "mov    $dst, $tmp\t# vector (1D)" %}
7530   ins_encode %{
7531     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7532     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7533     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7534     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7535   %}
7536 
7537   ins_pipe(pipe_class_default);
7538 %}
7539 
7540 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
7541   predicate(UsePopCountInstruction);
7542   match(Set dst (PopCountL (LoadL mem)));
7543   effect(TEMP tmp);
7544   ins_cost(INSN_COST * 13);
7545 
7546   format %{ "ldrd   $tmp, $mem\n\t"
7547             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7548             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7549             "mov    $dst, $tmp\t# vector (1D)" %}
7550   ins_encode %{
7551     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7552     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
7553                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7554     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7555     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7556     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7557   %}
7558 
7559   ins_pipe(pipe_class_default);
7560 %}
7561 
7562 // ============================================================================
7563 // MemBar Instruction
7564 
7565 instruct load_fence() %{
7566   match(LoadFence);
7567   ins_cost(VOLATILE_REF_COST);
7568 
7569   format %{ "load_fence" %}
7570 
7571   ins_encode %{
7572     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7573   %}
7574   ins_pipe(pipe_serial);
7575 %}
7576 
7577 instruct unnecessary_membar_acquire() %{
7578   predicate(unnecessary_acquire(n));
7579   match(MemBarAcquire);
7580   ins_cost(0);
7581 
7582   format %{ "membar_acquire (elided)" %}
7583 
7584   ins_encode %{
7585     __ block_comment("membar_acquire (elided)");
7586   %}
7587 
7588   ins_pipe(pipe_class_empty);
7589 %}
7590 
7591 instruct membar_acquire() %{
7592   match(MemBarAcquire);
7593   ins_cost(VOLATILE_REF_COST);
7594 
7595   format %{ "membar_acquire\n\t"
7596             "dmb ish" %}
7597 
7598   ins_encode %{
7599     __ block_comment("membar_acquire");
7600     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7601   %}
7602 
7603   ins_pipe(pipe_serial);
7604 %}
7605 
7606 
7607 instruct membar_acquire_lock() %{
7608   match(MemBarAcquireLock);
7609   ins_cost(VOLATILE_REF_COST);
7610 
7611   format %{ "membar_acquire_lock (elided)" %}
7612 
7613   ins_encode %{
7614     __ block_comment("membar_acquire_lock (elided)");
7615   %}
7616 
7617   ins_pipe(pipe_serial);
7618 %}
7619 
7620 instruct store_fence() %{
7621   match(StoreFence);
7622   ins_cost(VOLATILE_REF_COST);
7623 
7624   format %{ "store_fence" %}
7625 
7626   ins_encode %{
7627     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7628   %}
7629   ins_pipe(pipe_serial);
7630 %}
7631 
7632 instruct unnecessary_membar_release() %{
7633   predicate(unnecessary_release(n));
7634   match(MemBarRelease);
7635   ins_cost(0);
7636 
7637   format %{ "membar_release (elided)" %}
7638 
7639   ins_encode %{
7640     __ block_comment("membar_release (elided)");
7641   %}
7642   ins_pipe(pipe_serial);
7643 %}
7644 
7645 instruct membar_release() %{
7646   match(MemBarRelease);
7647   ins_cost(VOLATILE_REF_COST);
7648 
7649   format %{ "membar_release\n\t"
7650             "dmb ish" %}
7651 
7652   ins_encode %{
7653     __ block_comment("membar_release");
7654     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7655   %}
7656   ins_pipe(pipe_serial);
7657 %}
7658 
7659 instruct membar_storestore() %{
7660   match(MemBarStoreStore);
7661   ins_cost(VOLATILE_REF_COST);
7662 
7663   format %{ "MEMBAR-store-store" %}
7664 
7665   ins_encode %{
7666     __ membar(Assembler::StoreStore);
7667   %}
7668   ins_pipe(pipe_serial);
7669 %}
7670 
7671 instruct membar_release_lock() %{
7672   match(MemBarReleaseLock);
7673   ins_cost(VOLATILE_REF_COST);
7674 
7675   format %{ "membar_release_lock (elided)" %}
7676 
7677   ins_encode %{
7678     __ block_comment("membar_release_lock (elided)");
7679   %}
7680 
7681   ins_pipe(pipe_serial);
7682 %}
7683 
7684 instruct unnecessary_membar_volatile() %{
7685   predicate(unnecessary_volatile(n));
7686   match(MemBarVolatile);
7687   ins_cost(0);
7688 
7689   format %{ "membar_volatile (elided)" %}
7690 
7691   ins_encode %{
7692     __ block_comment("membar_volatile (elided)");
7693   %}
7694 
7695   ins_pipe(pipe_serial);
7696 %}
7697 
7698 instruct membar_volatile() %{
7699   match(MemBarVolatile);
7700   ins_cost(VOLATILE_REF_COST*100);
7701 
7702   format %{ "membar_volatile\n\t"
7703              "dmb ish"%}
7704 
7705   ins_encode %{
7706     __ block_comment("membar_volatile");
7707     __ membar(Assembler::StoreLoad);
7708   %}
7709 
7710   ins_pipe(pipe_serial);
7711 %}
7712 
7713 // ============================================================================
7714 // Cast/Convert Instructions
7715 
7716 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7717   match(Set dst (CastX2P src));
7718 
7719   ins_cost(INSN_COST);
7720   format %{ "mov $dst, $src\t# long -> ptr" %}
7721 
7722   ins_encode %{
7723     if ($dst$$reg != $src$$reg) {
7724       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7725     }
7726   %}
7727 
7728   ins_pipe(ialu_reg);
7729 %}
7730 
7731 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7732   match(Set dst (CastP2X src));
7733 
7734   ins_cost(INSN_COST);
7735   format %{ "mov $dst, $src\t# ptr -> long" %}
7736 
7737   ins_encode %{
7738     if ($dst$$reg != $src$$reg) {
7739       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7740     }
7741   %}
7742 
7743   ins_pipe(ialu_reg);
7744 %}
7745 
7746 // Convert oop into int for vectors alignment masking
7747 instruct convP2I(iRegINoSp dst, iRegP src) %{
7748   match(Set dst (ConvL2I (CastP2X src)));
7749 
7750   ins_cost(INSN_COST);
7751   format %{ "movw $dst, $src\t# ptr -> int" %}
7752   ins_encode %{
7753     __ movw($dst$$Register, $src$$Register);
7754   %}
7755 
7756   ins_pipe(ialu_reg);
7757 %}
7758 
7759 // Convert compressed oop into int for vectors alignment masking
7760 // in case of 32bit oops (heap < 4Gb).
7761 instruct convN2I(iRegINoSp dst, iRegN src)
7762 %{
7763   predicate(Universe::narrow_oop_shift() == 0);
7764   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7765 
7766   ins_cost(INSN_COST);
7767   format %{ "mov dst, $src\t# compressed ptr -> int" %}
7768   ins_encode %{
7769     __ movw($dst$$Register, $src$$Register);
7770   %}
7771 
7772   ins_pipe(ialu_reg);
7773 %}
7774 
7775 
7776 // Convert oop pointer into compressed form
7777 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7778   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7779   match(Set dst (EncodeP src));
7780   effect(KILL cr);
7781   ins_cost(INSN_COST * 3);
7782   format %{ "encode_heap_oop $dst, $src" %}
7783   ins_encode %{
7784     Register s = $src$$Register;
7785     Register d = $dst$$Register;
7786     __ encode_heap_oop(d, s);
7787   %}
7788   ins_pipe(ialu_reg);
7789 %}
7790 
7791 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7792   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7793   match(Set dst (EncodeP src));
7794   ins_cost(INSN_COST * 3);
7795   format %{ "encode_heap_oop_not_null $dst, $src" %}
7796   ins_encode %{
7797     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7798   %}
7799   ins_pipe(ialu_reg);
7800 %}
7801 
7802 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7803   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
7804             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
7805   match(Set dst (DecodeN src));
7806   ins_cost(INSN_COST * 3);
7807   format %{ "decode_heap_oop $dst, $src" %}
7808   ins_encode %{
7809     Register s = $src$$Register;
7810     Register d = $dst$$Register;
7811     __ decode_heap_oop(d, s);
7812   %}
7813   ins_pipe(ialu_reg);
7814 %}
7815 
7816 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7817   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
7818             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
7819   match(Set dst (DecodeN src));
7820   ins_cost(INSN_COST * 3);
7821   format %{ "decode_heap_oop_not_null $dst, $src" %}
7822   ins_encode %{
7823     Register s = $src$$Register;
7824     Register d = $dst$$Register;
7825     __ decode_heap_oop_not_null(d, s);
7826   %}
7827   ins_pipe(ialu_reg);
7828 %}
7829 
7830 // n.b. AArch64 implementations of encode_klass_not_null and
7831 // decode_klass_not_null do not modify the flags register so, unlike
7832 // Intel, we don't kill CR as a side effect here
7833 
7834 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
7835   match(Set dst (EncodePKlass src));
7836 
7837   ins_cost(INSN_COST * 3);
7838   format %{ "encode_klass_not_null $dst,$src" %}
7839 
7840   ins_encode %{
7841     Register src_reg = as_Register($src$$reg);
7842     Register dst_reg = as_Register($dst$$reg);
7843     __ encode_klass_not_null(dst_reg, src_reg);
7844   %}
7845 
7846    ins_pipe(ialu_reg);
7847 %}
7848 
7849 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
7850   match(Set dst (DecodeNKlass src));
7851 
7852   ins_cost(INSN_COST * 3);
7853   format %{ "decode_klass_not_null $dst,$src" %}
7854 
7855   ins_encode %{
7856     Register src_reg = as_Register($src$$reg);
7857     Register dst_reg = as_Register($dst$$reg);
7858     if (dst_reg != src_reg) {
7859       __ decode_klass_not_null(dst_reg, src_reg);
7860     } else {
7861       __ decode_klass_not_null(dst_reg);
7862     }
7863   %}
7864 
7865    ins_pipe(ialu_reg);
7866 %}
7867 
7868 instruct checkCastPP(iRegPNoSp dst)
7869 %{
7870   match(Set dst (CheckCastPP dst));
7871 
7872   size(0);
7873   format %{ "# checkcastPP of $dst" %}
7874   ins_encode(/* empty encoding */);
7875   ins_pipe(pipe_class_empty);
7876 %}
7877 
7878 instruct castPP(iRegPNoSp dst)
7879 %{
7880   match(Set dst (CastPP dst));
7881 
7882   size(0);
7883   format %{ "# castPP of $dst" %}
7884   ins_encode(/* empty encoding */);
7885   ins_pipe(pipe_class_empty);
7886 %}
7887 
7888 instruct castII(iRegI dst)
7889 %{
7890   match(Set dst (CastII dst));
7891 
7892   size(0);
7893   format %{ "# castII of $dst" %}
7894   ins_encode(/* empty encoding */);
7895   ins_cost(0);
7896   ins_pipe(pipe_class_empty);
7897 %}
7898 
7899 // ============================================================================
7900 // Atomic operation instructions
7901 //
7902 // Intel and SPARC both implement Ideal Node LoadPLocked and
7903 // Store{PIL}Conditional instructions using a normal load for the
7904 // LoadPLocked and a CAS for the Store{PIL}Conditional.
7905 //
7906 // The ideal code appears only to use LoadPLocked/StorePLocked as a
7907 // pair to lock object allocations from Eden space when not using
7908 // TLABs.
7909 //
7910 // There does not appear to be a Load{IL}Locked Ideal Node and the
7911 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
7912 // and to use StoreIConditional only for 32-bit and StoreLConditional
7913 // only for 64-bit.
7914 //
7915 // We implement LoadPLocked and StorePLocked instructions using,
7916 // respectively the AArch64 hw load-exclusive and store-conditional
7917 // instructions. Whereas we must implement each of
7918 // Store{IL}Conditional using a CAS which employs a pair of
7919 // instructions comprising a load-exclusive followed by a
7920 // store-conditional.
7921 
7922 
7923 // Locked-load (linked load) of the current heap-top
7924 // used when updating the eden heap top
7925 // implemented using ldaxr on AArch64
7926 
7927 instruct loadPLocked(iRegPNoSp dst, indirect mem)
7928 %{
7929   match(Set dst (LoadPLocked mem));
7930 
7931   ins_cost(VOLATILE_REF_COST);
7932 
7933   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
7934 
7935   ins_encode(aarch64_enc_ldaxr(dst, mem));
7936 
7937   ins_pipe(pipe_serial);
7938 %}
7939 
7940 // Conditional-store of the updated heap-top.
7941 // Used during allocation of the shared heap.
7942 // Sets flag (EQ) on success.
7943 // implemented using stlxr on AArch64.
7944 
7945 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
7946 %{
7947   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7948 
7949   ins_cost(VOLATILE_REF_COST);
7950 
7951  // TODO
7952  // do we need to do a store-conditional release or can we just use a
7953  // plain store-conditional?
7954 
7955   format %{
7956     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
7957     "cmpw rscratch1, zr\t# EQ on successful write"
7958   %}
7959 
7960   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
7961 
7962   ins_pipe(pipe_serial);
7963 %}
7964 
7965 
7966 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
7967 // when attempting to rebias a lock towards the current thread.  We
7968 // must use the acquire form of cmpxchg in order to guarantee acquire
7969 // semantics in this case.
7970 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
7971 %{
7972   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7973 
7974   ins_cost(VOLATILE_REF_COST);
7975 
7976   format %{
7977     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
7978     "cmpw rscratch1, zr\t# EQ on successful write"
7979   %}
7980 
7981   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
7982 
7983   ins_pipe(pipe_slow);
7984 %}
7985 
7986 // storeIConditional also has acquire semantics, for no better reason
7987 // than matching storeLConditional.  At the time of writing this
7988 // comment storeIConditional was not used anywhere by AArch64.
7989 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
7990 %{
7991   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7992 
7993   ins_cost(VOLATILE_REF_COST);
7994 
7995   format %{
7996     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
7997     "cmpw rscratch1, zr\t# EQ on successful write"
7998   %}
7999 
8000   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8001 
8002   ins_pipe(pipe_slow);
8003 %}
8004 
8005 // standard CompareAndSwapX when we are using barriers
8006 // these have higher priority than the rules selected by a predicate
8007 
8008 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8009 // can't match them
8010 
8011 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8012 
8013   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
8014   ins_cost(2 * VOLATILE_REF_COST);
8015 
8016   effect(KILL cr);
8017 
8018   format %{
8019     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8020     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8021   %}
8022 
8023   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
8024             aarch64_enc_cset_eq(res));
8025 
8026   ins_pipe(pipe_slow);
8027 %}
8028 
8029 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8030 
8031   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
8032   ins_cost(2 * VOLATILE_REF_COST);
8033 
8034   effect(KILL cr);
8035 
8036   format %{
8037     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8038     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8039   %}
8040 
8041   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
8042             aarch64_enc_cset_eq(res));
8043 
8044   ins_pipe(pipe_slow);
8045 %}
8046 
8047 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8048 
8049   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8050   ins_cost(2 * VOLATILE_REF_COST);
8051 
8052   effect(KILL cr);
8053 
8054  format %{
8055     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8056     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8057  %}
8058 
8059  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8060             aarch64_enc_cset_eq(res));
8061 
8062   ins_pipe(pipe_slow);
8063 %}
8064 
8065 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8066 
8067   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8068   ins_cost(2 * VOLATILE_REF_COST);
8069 
8070   effect(KILL cr);
8071 
8072  format %{
8073     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8074     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8075  %}
8076 
8077  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8078             aarch64_enc_cset_eq(res));
8079 
8080   ins_pipe(pipe_slow);
8081 %}
8082 
8083 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8084 
8085   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8086   ins_cost(2 * VOLATILE_REF_COST);
8087 
8088   effect(KILL cr);
8089 
8090  format %{
8091     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8092     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8093  %}
8094 
8095  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8096             aarch64_enc_cset_eq(res));
8097 
8098   ins_pipe(pipe_slow);
8099 %}
8100 
8101 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8102 
8103   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8104   ins_cost(2 * VOLATILE_REF_COST);
8105 
8106   effect(KILL cr);
8107 
8108  format %{
8109     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8110     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8111  %}
8112 
8113  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8114             aarch64_enc_cset_eq(res));
8115 
8116   ins_pipe(pipe_slow);
8117 %}
8118 
8119 // alternative CompareAndSwapX when we are eliding barriers
8120 
8121 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8122 
8123   predicate(needs_acquiring_load_exclusive(n));
8124   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8125   ins_cost(VOLATILE_REF_COST);
8126 
8127   effect(KILL cr);
8128 
8129  format %{
8130     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8131     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8132  %}
8133 
8134  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8135             aarch64_enc_cset_eq(res));
8136 
8137   ins_pipe(pipe_slow);
8138 %}
8139 
8140 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8141 
8142   predicate(needs_acquiring_load_exclusive(n));
8143   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8144   ins_cost(VOLATILE_REF_COST);
8145 
8146   effect(KILL cr);
8147 
8148  format %{
8149     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8150     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8151  %}
8152 
8153  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8154             aarch64_enc_cset_eq(res));
8155 
8156   ins_pipe(pipe_slow);
8157 %}
8158 
8159 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8160 
8161   predicate(needs_acquiring_load_exclusive(n));
8162   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8163   ins_cost(VOLATILE_REF_COST);
8164 
8165   effect(KILL cr);
8166 
8167  format %{
8168     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8169     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8170  %}
8171 
8172  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8173             aarch64_enc_cset_eq(res));
8174 
8175   ins_pipe(pipe_slow);
8176 %}
8177 
8178 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8179 
8180   predicate(needs_acquiring_load_exclusive(n));
8181   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8182   ins_cost(VOLATILE_REF_COST);
8183 
8184   effect(KILL cr);
8185 
8186  format %{
8187     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8188     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8189  %}
8190 
8191  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8192             aarch64_enc_cset_eq(res));
8193 
8194   ins_pipe(pipe_slow);
8195 %}
8196 
8197 
8198 // ---------------------------------------------------------------------
8199 
8200 
8201 // BEGIN This section of the file is automatically generated. Do not edit --------------
8202 
8203 // Sundry CAS operations.  Note that release is always true,
8204 // regardless of the memory ordering of the CAS.  This is because we
8205 // need the volatile case to be sequentially consistent but there is
8206 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
8207 // can't check the type of memory ordering here, so we always emit a
8208 // STLXR.
8209 
8210 // This section is generated from aarch64_ad_cas.m4
8211 
8212 
8213 
8214 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8215   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
8216   ins_cost(2 * VOLATILE_REF_COST);
8217   effect(TEMP_DEF res, KILL cr);
8218   format %{
8219     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8220   %}
8221   ins_encode %{
8222     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8223                Assembler::byte, /*acquire*/ false, /*release*/ true,
8224                /*weak*/ false, $res$$Register);
8225     __ sxtbw($res$$Register, $res$$Register);
8226   %}
8227   ins_pipe(pipe_slow);
8228 %}
8229 
8230 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8231   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
8232   ins_cost(2 * VOLATILE_REF_COST);
8233   effect(TEMP_DEF res, KILL cr);
8234   format %{
8235     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8236   %}
8237   ins_encode %{
8238     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8239                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8240                /*weak*/ false, $res$$Register);
8241     __ sxthw($res$$Register, $res$$Register);
8242   %}
8243   ins_pipe(pipe_slow);
8244 %}
8245 
8246 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8247   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
8248   ins_cost(2 * VOLATILE_REF_COST);
8249   effect(TEMP_DEF res, KILL cr);
8250   format %{
8251     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8252   %}
8253   ins_encode %{
8254     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8255                Assembler::word, /*acquire*/ false, /*release*/ true,
8256                /*weak*/ false, $res$$Register);
8257   %}
8258   ins_pipe(pipe_slow);
8259 %}
8260 
8261 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8262   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
8263   ins_cost(2 * VOLATILE_REF_COST);
8264   effect(TEMP_DEF res, KILL cr);
8265   format %{
8266     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8267   %}
8268   ins_encode %{
8269     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8270                Assembler::xword, /*acquire*/ false, /*release*/ true,
8271                /*weak*/ false, $res$$Register);
8272   %}
8273   ins_pipe(pipe_slow);
8274 %}
8275 
8276 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8277   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
8278   ins_cost(2 * VOLATILE_REF_COST);
8279   effect(TEMP_DEF res, KILL cr);
8280   format %{
8281     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8282   %}
8283   ins_encode %{
8284     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8285                Assembler::word, /*acquire*/ false, /*release*/ true,
8286                /*weak*/ false, $res$$Register);
8287   %}
8288   ins_pipe(pipe_slow);
8289 %}
8290 
8291 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8292   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
8293   ins_cost(2 * VOLATILE_REF_COST);
8294   effect(TEMP_DEF res, KILL cr);
8295   format %{
8296     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8297   %}
8298   ins_encode %{
8299     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8300                Assembler::xword, /*acquire*/ false, /*release*/ true,
8301                /*weak*/ false, $res$$Register);
8302   %}
8303   ins_pipe(pipe_slow);
8304 %}
8305 
8306 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8307   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
8308   ins_cost(2 * VOLATILE_REF_COST);
8309   effect(KILL cr);
8310   format %{
8311     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8312     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8313   %}
8314   ins_encode %{
8315     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8316                Assembler::byte, /*acquire*/ false, /*release*/ true,
8317                /*weak*/ true, noreg);
8318     __ csetw($res$$Register, Assembler::EQ);
8319   %}
8320   ins_pipe(pipe_slow);
8321 %}
8322 
8323 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8324   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
8325   ins_cost(2 * VOLATILE_REF_COST);
8326   effect(KILL cr);
8327   format %{
8328     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8329     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8330   %}
8331   ins_encode %{
8332     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8333                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8334                /*weak*/ true, noreg);
8335     __ csetw($res$$Register, Assembler::EQ);
8336   %}
8337   ins_pipe(pipe_slow);
8338 %}
8339 
8340 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8341   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
8342   ins_cost(2 * VOLATILE_REF_COST);
8343   effect(KILL cr);
8344   format %{
8345     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8346     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8347   %}
8348   ins_encode %{
8349     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8350                Assembler::word, /*acquire*/ false, /*release*/ true,
8351                /*weak*/ true, noreg);
8352     __ csetw($res$$Register, Assembler::EQ);
8353   %}
8354   ins_pipe(pipe_slow);
8355 %}
8356 
8357 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8358   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
8359   ins_cost(2 * VOLATILE_REF_COST);
8360   effect(KILL cr);
8361   format %{
8362     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8363     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8364   %}
8365   ins_encode %{
8366     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8367                Assembler::xword, /*acquire*/ false, /*release*/ true,
8368                /*weak*/ true, noreg);
8369     __ csetw($res$$Register, Assembler::EQ);
8370   %}
8371   ins_pipe(pipe_slow);
8372 %}
8373 
8374 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8375   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
8376   ins_cost(2 * VOLATILE_REF_COST);
8377   effect(KILL cr);
8378   format %{
8379     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8380     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8381   %}
8382   ins_encode %{
8383     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8384                Assembler::word, /*acquire*/ false, /*release*/ true,
8385                /*weak*/ true, noreg);
8386     __ csetw($res$$Register, Assembler::EQ);
8387   %}
8388   ins_pipe(pipe_slow);
8389 %}
8390 
8391 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8392   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
8393   ins_cost(2 * VOLATILE_REF_COST);
8394   effect(KILL cr);
8395   format %{
8396     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8397     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8398   %}
8399   ins_encode %{
8400     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8401                Assembler::xword, /*acquire*/ false, /*release*/ true,
8402                /*weak*/ true, noreg);
8403     __ csetw($res$$Register, Assembler::EQ);
8404   %}
8405   ins_pipe(pipe_slow);
8406 %}
8407 
8408 // END This section of the file is automatically generated. Do not edit --------------
8409 // ---------------------------------------------------------------------
8410 
8411 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
8412   match(Set prev (GetAndSetI mem newv));
8413   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8414   ins_encode %{
8415     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8416   %}
8417   ins_pipe(pipe_serial);
8418 %}
8419 
8420 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
8421   match(Set prev (GetAndSetL mem newv));
8422   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8423   ins_encode %{
8424     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8425   %}
8426   ins_pipe(pipe_serial);
8427 %}
8428 
8429 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
8430   match(Set prev (GetAndSetN mem newv));
8431   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8432   ins_encode %{
8433     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8434   %}
8435   ins_pipe(pipe_serial);
8436 %}
8437 
8438 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
8439   match(Set prev (GetAndSetP mem newv));
8440   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8441   ins_encode %{
8442     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8443   %}
8444   ins_pipe(pipe_serial);
8445 %}
8446 
8447 
8448 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8449   match(Set newval (GetAndAddL mem incr));
8450   ins_cost(INSN_COST * 10);
8451   format %{ "get_and_addL $newval, [$mem], $incr" %}
8452   ins_encode %{
8453     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
8454   %}
8455   ins_pipe(pipe_serial);
8456 %}
8457 
8458 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
8459   predicate(n->as_LoadStore()->result_not_used());
8460   match(Set dummy (GetAndAddL mem incr));
8461   ins_cost(INSN_COST * 9);
8462   format %{ "get_and_addL [$mem], $incr" %}
8463   ins_encode %{
8464     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
8465   %}
8466   ins_pipe(pipe_serial);
8467 %}
8468 
8469 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8470   match(Set newval (GetAndAddL mem incr));
8471   ins_cost(INSN_COST * 10);
8472   format %{ "get_and_addL $newval, [$mem], $incr" %}
8473   ins_encode %{
8474     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
8475   %}
8476   ins_pipe(pipe_serial);
8477 %}
8478 
8479 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
8480   predicate(n->as_LoadStore()->result_not_used());
8481   match(Set dummy (GetAndAddL mem incr));
8482   ins_cost(INSN_COST * 9);
8483   format %{ "get_and_addL [$mem], $incr" %}
8484   ins_encode %{
8485     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
8486   %}
8487   ins_pipe(pipe_serial);
8488 %}
8489 
8490 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8491   match(Set newval (GetAndAddI mem incr));
8492   ins_cost(INSN_COST * 10);
8493   format %{ "get_and_addI $newval, [$mem], $incr" %}
8494   ins_encode %{
8495     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8496   %}
8497   ins_pipe(pipe_serial);
8498 %}
8499 
8500 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
8501   predicate(n->as_LoadStore()->result_not_used());
8502   match(Set dummy (GetAndAddI mem incr));
8503   ins_cost(INSN_COST * 9);
8504   format %{ "get_and_addI [$mem], $incr" %}
8505   ins_encode %{
8506     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
8507   %}
8508   ins_pipe(pipe_serial);
8509 %}
8510 
8511 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8512   match(Set newval (GetAndAddI mem incr));
8513   ins_cost(INSN_COST * 10);
8514   format %{ "get_and_addI $newval, [$mem], $incr" %}
8515   ins_encode %{
8516     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8517   %}
8518   ins_pipe(pipe_serial);
8519 %}
8520 
8521 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
8522   predicate(n->as_LoadStore()->result_not_used());
8523   match(Set dummy (GetAndAddI mem incr));
8524   ins_cost(INSN_COST * 9);
8525   format %{ "get_and_addI [$mem], $incr" %}
8526   ins_encode %{
8527     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
8528   %}
8529   ins_pipe(pipe_serial);
8530 %}
8531 
8532 // Manifest a CmpL result in an integer register.
8533 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
8534 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
8535 %{
8536   match(Set dst (CmpL3 src1 src2));
8537   effect(KILL flags);
8538 
8539   ins_cost(INSN_COST * 6);
8540   format %{
8541       "cmp $src1, $src2"
8542       "csetw $dst, ne"
8543       "cnegw $dst, lt"
8544   %}
8545   // format %{ "CmpL3 $dst, $src1, $src2" %}
8546   ins_encode %{
8547     __ cmp($src1$$Register, $src2$$Register);
8548     __ csetw($dst$$Register, Assembler::NE);
8549     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8550   %}
8551 
8552   ins_pipe(pipe_class_default);
8553 %}
8554 
8555 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
8556 %{
8557   match(Set dst (CmpL3 src1 src2));
8558   effect(KILL flags);
8559 
8560   ins_cost(INSN_COST * 6);
8561   format %{
8562       "cmp $src1, $src2"
8563       "csetw $dst, ne"
8564       "cnegw $dst, lt"
8565   %}
8566   ins_encode %{
8567     int32_t con = (int32_t)$src2$$constant;
8568      if (con < 0) {
8569       __ adds(zr, $src1$$Register, -con);
8570     } else {
8571       __ subs(zr, $src1$$Register, con);
8572     }
8573     __ csetw($dst$$Register, Assembler::NE);
8574     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8575   %}
8576 
8577   ins_pipe(pipe_class_default);
8578 %}
8579 
8580 // ============================================================================
8581 // Conditional Move Instructions
8582 
8583 // n.b. we have identical rules for both a signed compare op (cmpOp)
8584 // and an unsigned compare op (cmpOpU). it would be nice if we could
8585 // define an op class which merged both inputs and use it to type the
8586 // argument to a single rule. unfortunatelyt his fails because the
8587 // opclass does not live up to the COND_INTER interface of its
8588 // component operands. When the generic code tries to negate the
8589 // operand it ends up running the generci Machoper::negate method
8590 // which throws a ShouldNotHappen. So, we have to provide two flavours
8591 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
8592 
8593 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8594   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8595 
8596   ins_cost(INSN_COST * 2);
8597   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
8598 
8599   ins_encode %{
8600     __ cselw(as_Register($dst$$reg),
8601              as_Register($src2$$reg),
8602              as_Register($src1$$reg),
8603              (Assembler::Condition)$cmp$$cmpcode);
8604   %}
8605 
8606   ins_pipe(icond_reg_reg);
8607 %}
8608 
8609 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8610   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8611 
8612   ins_cost(INSN_COST * 2);
8613   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
8614 
8615   ins_encode %{
8616     __ cselw(as_Register($dst$$reg),
8617              as_Register($src2$$reg),
8618              as_Register($src1$$reg),
8619              (Assembler::Condition)$cmp$$cmpcode);
8620   %}
8621 
8622   ins_pipe(icond_reg_reg);
8623 %}
8624 
8625 // special cases where one arg is zero
8626 
8627 // n.b. this is selected in preference to the rule above because it
8628 // avoids loading constant 0 into a source register
8629 
8630 // TODO
8631 // we ought only to be able to cull one of these variants as the ideal
8632 // transforms ought always to order the zero consistently (to left/right?)
8633 
8634 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8635   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8636 
8637   ins_cost(INSN_COST * 2);
8638   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
8639 
8640   ins_encode %{
8641     __ cselw(as_Register($dst$$reg),
8642              as_Register($src$$reg),
8643              zr,
8644              (Assembler::Condition)$cmp$$cmpcode);
8645   %}
8646 
8647   ins_pipe(icond_reg);
8648 %}
8649 
8650 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8651   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8652 
8653   ins_cost(INSN_COST * 2);
8654   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
8655 
8656   ins_encode %{
8657     __ cselw(as_Register($dst$$reg),
8658              as_Register($src$$reg),
8659              zr,
8660              (Assembler::Condition)$cmp$$cmpcode);
8661   %}
8662 
8663   ins_pipe(icond_reg);
8664 %}
8665 
8666 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8667   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8668 
8669   ins_cost(INSN_COST * 2);
8670   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
8671 
8672   ins_encode %{
8673     __ cselw(as_Register($dst$$reg),
8674              zr,
8675              as_Register($src$$reg),
8676              (Assembler::Condition)$cmp$$cmpcode);
8677   %}
8678 
8679   ins_pipe(icond_reg);
8680 %}
8681 
8682 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8683   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8684 
8685   ins_cost(INSN_COST * 2);
8686   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
8687 
8688   ins_encode %{
8689     __ cselw(as_Register($dst$$reg),
8690              zr,
8691              as_Register($src$$reg),
8692              (Assembler::Condition)$cmp$$cmpcode);
8693   %}
8694 
8695   ins_pipe(icond_reg);
8696 %}
8697 
8698 // special case for creating a boolean 0 or 1
8699 
8700 // n.b. this is selected in preference to the rule above because it
8701 // avoids loading constants 0 and 1 into a source register
8702 
8703 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8704   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8705 
8706   ins_cost(INSN_COST * 2);
8707   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
8708 
8709   ins_encode %{
8710     // equivalently
8711     // cset(as_Register($dst$$reg),
8712     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8713     __ csincw(as_Register($dst$$reg),
8714              zr,
8715              zr,
8716              (Assembler::Condition)$cmp$$cmpcode);
8717   %}
8718 
8719   ins_pipe(icond_none);
8720 %}
8721 
8722 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8723   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8724 
8725   ins_cost(INSN_COST * 2);
8726   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
8727 
8728   ins_encode %{
8729     // equivalently
8730     // cset(as_Register($dst$$reg),
8731     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8732     __ csincw(as_Register($dst$$reg),
8733              zr,
8734              zr,
8735              (Assembler::Condition)$cmp$$cmpcode);
8736   %}
8737 
8738   ins_pipe(icond_none);
8739 %}
8740 
8741 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8742   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8743 
8744   ins_cost(INSN_COST * 2);
8745   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
8746 
8747   ins_encode %{
8748     __ csel(as_Register($dst$$reg),
8749             as_Register($src2$$reg),
8750             as_Register($src1$$reg),
8751             (Assembler::Condition)$cmp$$cmpcode);
8752   %}
8753 
8754   ins_pipe(icond_reg_reg);
8755 %}
8756 
8757 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8758   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8759 
8760   ins_cost(INSN_COST * 2);
8761   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
8762 
8763   ins_encode %{
8764     __ csel(as_Register($dst$$reg),
8765             as_Register($src2$$reg),
8766             as_Register($src1$$reg),
8767             (Assembler::Condition)$cmp$$cmpcode);
8768   %}
8769 
8770   ins_pipe(icond_reg_reg);
8771 %}
8772 
8773 // special cases where one arg is zero
8774 
8775 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8776   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8777 
8778   ins_cost(INSN_COST * 2);
8779   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
8780 
8781   ins_encode %{
8782     __ csel(as_Register($dst$$reg),
8783             zr,
8784             as_Register($src$$reg),
8785             (Assembler::Condition)$cmp$$cmpcode);
8786   %}
8787 
8788   ins_pipe(icond_reg);
8789 %}
8790 
8791 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8792   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8793 
8794   ins_cost(INSN_COST * 2);
8795   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
8796 
8797   ins_encode %{
8798     __ csel(as_Register($dst$$reg),
8799             zr,
8800             as_Register($src$$reg),
8801             (Assembler::Condition)$cmp$$cmpcode);
8802   %}
8803 
8804   ins_pipe(icond_reg);
8805 %}
8806 
8807 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8808   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8809 
8810   ins_cost(INSN_COST * 2);
8811   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
8812 
8813   ins_encode %{
8814     __ csel(as_Register($dst$$reg),
8815             as_Register($src$$reg),
8816             zr,
8817             (Assembler::Condition)$cmp$$cmpcode);
8818   %}
8819 
8820   ins_pipe(icond_reg);
8821 %}
8822 
8823 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8824   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8825 
8826   ins_cost(INSN_COST * 2);
8827   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
8828 
8829   ins_encode %{
8830     __ csel(as_Register($dst$$reg),
8831             as_Register($src$$reg),
8832             zr,
8833             (Assembler::Condition)$cmp$$cmpcode);
8834   %}
8835 
8836   ins_pipe(icond_reg);
8837 %}
8838 
8839 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8840   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8841 
8842   ins_cost(INSN_COST * 2);
8843   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
8844 
8845   ins_encode %{
8846     __ csel(as_Register($dst$$reg),
8847             as_Register($src2$$reg),
8848             as_Register($src1$$reg),
8849             (Assembler::Condition)$cmp$$cmpcode);
8850   %}
8851 
8852   ins_pipe(icond_reg_reg);
8853 %}
8854 
8855 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8856   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8857 
8858   ins_cost(INSN_COST * 2);
8859   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
8860 
8861   ins_encode %{
8862     __ csel(as_Register($dst$$reg),
8863             as_Register($src2$$reg),
8864             as_Register($src1$$reg),
8865             (Assembler::Condition)$cmp$$cmpcode);
8866   %}
8867 
8868   ins_pipe(icond_reg_reg);
8869 %}
8870 
8871 // special cases where one arg is zero
8872 
8873 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8874   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8875 
8876   ins_cost(INSN_COST * 2);
8877   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
8878 
8879   ins_encode %{
8880     __ csel(as_Register($dst$$reg),
8881             zr,
8882             as_Register($src$$reg),
8883             (Assembler::Condition)$cmp$$cmpcode);
8884   %}
8885 
8886   ins_pipe(icond_reg);
8887 %}
8888 
8889 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8890   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8891 
8892   ins_cost(INSN_COST * 2);
8893   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
8894 
8895   ins_encode %{
8896     __ csel(as_Register($dst$$reg),
8897             zr,
8898             as_Register($src$$reg),
8899             (Assembler::Condition)$cmp$$cmpcode);
8900   %}
8901 
8902   ins_pipe(icond_reg);
8903 %}
8904 
8905 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8906   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8907 
8908   ins_cost(INSN_COST * 2);
8909   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
8910 
8911   ins_encode %{
8912     __ csel(as_Register($dst$$reg),
8913             as_Register($src$$reg),
8914             zr,
8915             (Assembler::Condition)$cmp$$cmpcode);
8916   %}
8917 
8918   ins_pipe(icond_reg);
8919 %}
8920 
8921 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8922   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8923 
8924   ins_cost(INSN_COST * 2);
8925   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
8926 
8927   ins_encode %{
8928     __ csel(as_Register($dst$$reg),
8929             as_Register($src$$reg),
8930             zr,
8931             (Assembler::Condition)$cmp$$cmpcode);
8932   %}
8933 
8934   ins_pipe(icond_reg);
8935 %}
8936 
8937 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8938   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8939 
8940   ins_cost(INSN_COST * 2);
8941   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8942 
8943   ins_encode %{
8944     __ cselw(as_Register($dst$$reg),
8945              as_Register($src2$$reg),
8946              as_Register($src1$$reg),
8947              (Assembler::Condition)$cmp$$cmpcode);
8948   %}
8949 
8950   ins_pipe(icond_reg_reg);
8951 %}
8952 
8953 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8954   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8955 
8956   ins_cost(INSN_COST * 2);
8957   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8958 
8959   ins_encode %{
8960     __ cselw(as_Register($dst$$reg),
8961              as_Register($src2$$reg),
8962              as_Register($src1$$reg),
8963              (Assembler::Condition)$cmp$$cmpcode);
8964   %}
8965 
8966   ins_pipe(icond_reg_reg);
8967 %}
8968 
8969 // special cases where one arg is zero
8970 
8971 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8972   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8973 
8974   ins_cost(INSN_COST * 2);
8975   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
8976 
8977   ins_encode %{
8978     __ cselw(as_Register($dst$$reg),
8979              zr,
8980              as_Register($src$$reg),
8981              (Assembler::Condition)$cmp$$cmpcode);
8982   %}
8983 
8984   ins_pipe(icond_reg);
8985 %}
8986 
8987 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8988   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8989 
8990   ins_cost(INSN_COST * 2);
8991   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
8992 
8993   ins_encode %{
8994     __ cselw(as_Register($dst$$reg),
8995              zr,
8996              as_Register($src$$reg),
8997              (Assembler::Condition)$cmp$$cmpcode);
8998   %}
8999 
9000   ins_pipe(icond_reg);
9001 %}
9002 
9003 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9004   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9005 
9006   ins_cost(INSN_COST * 2);
9007   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
9008 
9009   ins_encode %{
9010     __ cselw(as_Register($dst$$reg),
9011              as_Register($src$$reg),
9012              zr,
9013              (Assembler::Condition)$cmp$$cmpcode);
9014   %}
9015 
9016   ins_pipe(icond_reg);
9017 %}
9018 
9019 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9020   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9021 
9022   ins_cost(INSN_COST * 2);
9023   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
9024 
9025   ins_encode %{
9026     __ cselw(as_Register($dst$$reg),
9027              as_Register($src$$reg),
9028              zr,
9029              (Assembler::Condition)$cmp$$cmpcode);
9030   %}
9031 
9032   ins_pipe(icond_reg);
9033 %}
9034 
9035 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
9036 %{
9037   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9038 
9039   ins_cost(INSN_COST * 3);
9040 
9041   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9042   ins_encode %{
9043     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9044     __ fcsels(as_FloatRegister($dst$$reg),
9045               as_FloatRegister($src2$$reg),
9046               as_FloatRegister($src1$$reg),
9047               cond);
9048   %}
9049 
9050   ins_pipe(fp_cond_reg_reg_s);
9051 %}
9052 
9053 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
9054 %{
9055   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9056 
9057   ins_cost(INSN_COST * 3);
9058 
9059   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9060   ins_encode %{
9061     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9062     __ fcsels(as_FloatRegister($dst$$reg),
9063               as_FloatRegister($src2$$reg),
9064               as_FloatRegister($src1$$reg),
9065               cond);
9066   %}
9067 
9068   ins_pipe(fp_cond_reg_reg_s);
9069 %}
9070 
9071 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
9072 %{
9073   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9074 
9075   ins_cost(INSN_COST * 3);
9076 
9077   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9078   ins_encode %{
9079     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9080     __ fcseld(as_FloatRegister($dst$$reg),
9081               as_FloatRegister($src2$$reg),
9082               as_FloatRegister($src1$$reg),
9083               cond);
9084   %}
9085 
9086   ins_pipe(fp_cond_reg_reg_d);
9087 %}
9088 
9089 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
9090 %{
9091   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9092 
9093   ins_cost(INSN_COST * 3);
9094 
9095   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9096   ins_encode %{
9097     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9098     __ fcseld(as_FloatRegister($dst$$reg),
9099               as_FloatRegister($src2$$reg),
9100               as_FloatRegister($src1$$reg),
9101               cond);
9102   %}
9103 
9104   ins_pipe(fp_cond_reg_reg_d);
9105 %}
9106 
9107 // ============================================================================
9108 // Arithmetic Instructions
9109 //
9110 
9111 // Integer Addition
9112 
9113 // TODO
9114 // these currently employ operations which do not set CR and hence are
9115 // not flagged as killing CR but we would like to isolate the cases
9116 // where we want to set flags from those where we don't. need to work
9117 // out how to do that.
9118 
9119 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9120   match(Set dst (AddI src1 src2));
9121 
9122   ins_cost(INSN_COST);
9123   format %{ "addw  $dst, $src1, $src2" %}
9124 
9125   ins_encode %{
9126     __ addw(as_Register($dst$$reg),
9127             as_Register($src1$$reg),
9128             as_Register($src2$$reg));
9129   %}
9130 
9131   ins_pipe(ialu_reg_reg);
9132 %}
9133 
9134 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9135   match(Set dst (AddI src1 src2));
9136 
9137   ins_cost(INSN_COST);
9138   format %{ "addw $dst, $src1, $src2" %}
9139 
9140   // use opcode to indicate that this is an add not a sub
9141   opcode(0x0);
9142 
9143   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9144 
9145   ins_pipe(ialu_reg_imm);
9146 %}
9147 
9148 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
9149   match(Set dst (AddI (ConvL2I src1) src2));
9150 
9151   ins_cost(INSN_COST);
9152   format %{ "addw $dst, $src1, $src2" %}
9153 
9154   // use opcode to indicate that this is an add not a sub
9155   opcode(0x0);
9156 
9157   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9158 
9159   ins_pipe(ialu_reg_imm);
9160 %}
9161 
9162 // Pointer Addition
9163 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9164   match(Set dst (AddP src1 src2));
9165 
9166   ins_cost(INSN_COST);
9167   format %{ "add $dst, $src1, $src2\t# ptr" %}
9168 
9169   ins_encode %{
9170     __ add(as_Register($dst$$reg),
9171            as_Register($src1$$reg),
9172            as_Register($src2$$reg));
9173   %}
9174 
9175   ins_pipe(ialu_reg_reg);
9176 %}
9177 
9178 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9179   match(Set dst (AddP src1 (ConvI2L src2)));
9180 
9181   ins_cost(1.9 * INSN_COST);
9182   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9183 
9184   ins_encode %{
9185     __ add(as_Register($dst$$reg),
9186            as_Register($src1$$reg),
9187            as_Register($src2$$reg), ext::sxtw);
9188   %}
9189 
9190   ins_pipe(ialu_reg_reg);
9191 %}
9192 
9193 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9194   match(Set dst (AddP src1 (LShiftL src2 scale)));
9195 
9196   ins_cost(1.9 * INSN_COST);
9197   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9198 
9199   ins_encode %{
9200     __ lea(as_Register($dst$$reg),
9201            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9202                    Address::lsl($scale$$constant)));
9203   %}
9204 
9205   ins_pipe(ialu_reg_reg_shift);
9206 %}
9207 
9208 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9209   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9210 
9211   ins_cost(1.9 * INSN_COST);
9212   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9213 
9214   ins_encode %{
9215     __ lea(as_Register($dst$$reg),
9216            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9217                    Address::sxtw($scale$$constant)));
9218   %}
9219 
9220   ins_pipe(ialu_reg_reg_shift);
9221 %}
9222 
9223 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9224   match(Set dst (LShiftL (ConvI2L src) scale));
9225 
9226   ins_cost(INSN_COST);
9227   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9228 
9229   ins_encode %{
9230     __ sbfiz(as_Register($dst$$reg),
9231           as_Register($src$$reg),
9232           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9233   %}
9234 
9235   ins_pipe(ialu_reg_shift);
9236 %}
9237 
9238 // Pointer Immediate Addition
9239 // n.b. this needs to be more expensive than using an indirect memory
9240 // operand
9241 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9242   match(Set dst (AddP src1 src2));
9243 
9244   ins_cost(INSN_COST);
9245   format %{ "add $dst, $src1, $src2\t# ptr" %}
9246 
9247   // use opcode to indicate that this is an add not a sub
9248   opcode(0x0);
9249 
9250   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9251 
9252   ins_pipe(ialu_reg_imm);
9253 %}
9254 
9255 // Long Addition
9256 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9257 
9258   match(Set dst (AddL src1 src2));
9259 
9260   ins_cost(INSN_COST);
9261   format %{ "add  $dst, $src1, $src2" %}
9262 
9263   ins_encode %{
9264     __ add(as_Register($dst$$reg),
9265            as_Register($src1$$reg),
9266            as_Register($src2$$reg));
9267   %}
9268 
9269   ins_pipe(ialu_reg_reg);
9270 %}
9271 
9272 // No constant pool entries requiredLong Immediate Addition.
9273 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9274   match(Set dst (AddL src1 src2));
9275 
9276   ins_cost(INSN_COST);
9277   format %{ "add $dst, $src1, $src2" %}
9278 
9279   // use opcode to indicate that this is an add not a sub
9280   opcode(0x0);
9281 
9282   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9283 
9284   ins_pipe(ialu_reg_imm);
9285 %}
9286 
9287 // Integer Subtraction
9288 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9289   match(Set dst (SubI src1 src2));
9290 
9291   ins_cost(INSN_COST);
9292   format %{ "subw  $dst, $src1, $src2" %}
9293 
9294   ins_encode %{
9295     __ subw(as_Register($dst$$reg),
9296             as_Register($src1$$reg),
9297             as_Register($src2$$reg));
9298   %}
9299 
9300   ins_pipe(ialu_reg_reg);
9301 %}
9302 
9303 // Immediate Subtraction
9304 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9305   match(Set dst (SubI src1 src2));
9306 
9307   ins_cost(INSN_COST);
9308   format %{ "subw $dst, $src1, $src2" %}
9309 
9310   // use opcode to indicate that this is a sub not an add
9311   opcode(0x1);
9312 
9313   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9314 
9315   ins_pipe(ialu_reg_imm);
9316 %}
9317 
9318 // Long Subtraction
9319 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9320 
9321   match(Set dst (SubL src1 src2));
9322 
9323   ins_cost(INSN_COST);
9324   format %{ "sub  $dst, $src1, $src2" %}
9325 
9326   ins_encode %{
9327     __ sub(as_Register($dst$$reg),
9328            as_Register($src1$$reg),
9329            as_Register($src2$$reg));
9330   %}
9331 
9332   ins_pipe(ialu_reg_reg);
9333 %}
9334 
9335 // No constant pool entries requiredLong Immediate Subtraction.
9336 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9337   match(Set dst (SubL src1 src2));
9338 
9339   ins_cost(INSN_COST);
9340   format %{ "sub$dst, $src1, $src2" %}
9341 
9342   // use opcode to indicate that this is a sub not an add
9343   opcode(0x1);
9344 
9345   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9346 
9347   ins_pipe(ialu_reg_imm);
9348 %}
9349 
9350 // Integer Negation (special case for sub)
9351 
9352 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9353   match(Set dst (SubI zero src));
9354 
9355   ins_cost(INSN_COST);
9356   format %{ "negw $dst, $src\t# int" %}
9357 
9358   ins_encode %{
9359     __ negw(as_Register($dst$$reg),
9360             as_Register($src$$reg));
9361   %}
9362 
9363   ins_pipe(ialu_reg);
9364 %}
9365 
9366 // Long Negation
9367 
9368 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
9369   match(Set dst (SubL zero src));
9370 
9371   ins_cost(INSN_COST);
9372   format %{ "neg $dst, $src\t# long" %}
9373 
9374   ins_encode %{
9375     __ neg(as_Register($dst$$reg),
9376            as_Register($src$$reg));
9377   %}
9378 
9379   ins_pipe(ialu_reg);
9380 %}
9381 
9382 // Integer Multiply
9383 
9384 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9385   match(Set dst (MulI src1 src2));
9386 
9387   ins_cost(INSN_COST * 3);
9388   format %{ "mulw  $dst, $src1, $src2" %}
9389 
9390   ins_encode %{
9391     __ mulw(as_Register($dst$$reg),
9392             as_Register($src1$$reg),
9393             as_Register($src2$$reg));
9394   %}
9395 
9396   ins_pipe(imul_reg_reg);
9397 %}
9398 
9399 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9400   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9401 
9402   ins_cost(INSN_COST * 3);
9403   format %{ "smull  $dst, $src1, $src2" %}
9404 
9405   ins_encode %{
9406     __ smull(as_Register($dst$$reg),
9407              as_Register($src1$$reg),
9408              as_Register($src2$$reg));
9409   %}
9410 
9411   ins_pipe(imul_reg_reg);
9412 %}
9413 
9414 // Long Multiply
9415 
9416 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9417   match(Set dst (MulL src1 src2));
9418 
9419   ins_cost(INSN_COST * 5);
9420   format %{ "mul  $dst, $src1, $src2" %}
9421 
9422   ins_encode %{
9423     __ mul(as_Register($dst$$reg),
9424            as_Register($src1$$reg),
9425            as_Register($src2$$reg));
9426   %}
9427 
9428   ins_pipe(lmul_reg_reg);
9429 %}
9430 
9431 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
9432 %{
9433   match(Set dst (MulHiL src1 src2));
9434 
9435   ins_cost(INSN_COST * 7);
9436   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
9437 
9438   ins_encode %{
9439     __ smulh(as_Register($dst$$reg),
9440              as_Register($src1$$reg),
9441              as_Register($src2$$reg));
9442   %}
9443 
9444   ins_pipe(lmul_reg_reg);
9445 %}
9446 
9447 // Combined Integer Multiply & Add/Sub
9448 
9449 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9450   match(Set dst (AddI src3 (MulI src1 src2)));
9451 
9452   ins_cost(INSN_COST * 3);
9453   format %{ "madd  $dst, $src1, $src2, $src3" %}
9454 
9455   ins_encode %{
9456     __ maddw(as_Register($dst$$reg),
9457              as_Register($src1$$reg),
9458              as_Register($src2$$reg),
9459              as_Register($src3$$reg));
9460   %}
9461 
9462   ins_pipe(imac_reg_reg);
9463 %}
9464 
9465 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9466   match(Set dst (SubI src3 (MulI src1 src2)));
9467 
9468   ins_cost(INSN_COST * 3);
9469   format %{ "msub  $dst, $src1, $src2, $src3" %}
9470 
9471   ins_encode %{
9472     __ msubw(as_Register($dst$$reg),
9473              as_Register($src1$$reg),
9474              as_Register($src2$$reg),
9475              as_Register($src3$$reg));
9476   %}
9477 
9478   ins_pipe(imac_reg_reg);
9479 %}
9480 
9481 // Combined Long Multiply & Add/Sub
9482 
9483 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9484   match(Set dst (AddL src3 (MulL src1 src2)));
9485 
9486   ins_cost(INSN_COST * 5);
9487   format %{ "madd  $dst, $src1, $src2, $src3" %}
9488 
9489   ins_encode %{
9490     __ madd(as_Register($dst$$reg),
9491             as_Register($src1$$reg),
9492             as_Register($src2$$reg),
9493             as_Register($src3$$reg));
9494   %}
9495 
9496   ins_pipe(lmac_reg_reg);
9497 %}
9498 
9499 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9500   match(Set dst (SubL src3 (MulL src1 src2)));
9501 
9502   ins_cost(INSN_COST * 5);
9503   format %{ "msub  $dst, $src1, $src2, $src3" %}
9504 
9505   ins_encode %{
9506     __ msub(as_Register($dst$$reg),
9507             as_Register($src1$$reg),
9508             as_Register($src2$$reg),
9509             as_Register($src3$$reg));
9510   %}
9511 
9512   ins_pipe(lmac_reg_reg);
9513 %}
9514 
9515 // Integer Divide
9516 
9517 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9518   match(Set dst (DivI src1 src2));
9519 
9520   ins_cost(INSN_COST * 19);
9521   format %{ "sdivw  $dst, $src1, $src2" %}
9522 
9523   ins_encode(aarch64_enc_divw(dst, src1, src2));
9524   ins_pipe(idiv_reg_reg);
9525 %}
9526 
9527 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
9528   match(Set dst (URShiftI (RShiftI src1 div1) div2));
9529   ins_cost(INSN_COST);
9530   format %{ "lsrw $dst, $src1, $div1" %}
9531   ins_encode %{
9532     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
9533   %}
9534   ins_pipe(ialu_reg_shift);
9535 %}
9536 
9537 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
9538   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
9539   ins_cost(INSN_COST);
9540   format %{ "addw $dst, $src, LSR $div1" %}
9541 
9542   ins_encode %{
9543     __ addw(as_Register($dst$$reg),
9544               as_Register($src$$reg),
9545               as_Register($src$$reg),
9546               Assembler::LSR, 31);
9547   %}
9548   ins_pipe(ialu_reg);
9549 %}
9550 
9551 // Long Divide
9552 
9553 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9554   match(Set dst (DivL src1 src2));
9555 
9556   ins_cost(INSN_COST * 35);
9557   format %{ "sdiv   $dst, $src1, $src2" %}
9558 
9559   ins_encode(aarch64_enc_div(dst, src1, src2));
9560   ins_pipe(ldiv_reg_reg);
9561 %}
9562 
9563 instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
9564   match(Set dst (URShiftL (RShiftL src1 div1) div2));
9565   ins_cost(INSN_COST);
9566   format %{ "lsr $dst, $src1, $div1" %}
9567   ins_encode %{
9568     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
9569   %}
9570   ins_pipe(ialu_reg_shift);
9571 %}
9572 
9573 instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
9574   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
9575   ins_cost(INSN_COST);
9576   format %{ "add $dst, $src, $div1" %}
9577 
9578   ins_encode %{
9579     __ add(as_Register($dst$$reg),
9580               as_Register($src$$reg),
9581               as_Register($src$$reg),
9582               Assembler::LSR, 63);
9583   %}
9584   ins_pipe(ialu_reg);
9585 %}
9586 
9587 // Integer Remainder
9588 
9589 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9590   match(Set dst (ModI src1 src2));
9591 
9592   ins_cost(INSN_COST * 22);
9593   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
9594             "msubw($dst, rscratch1, $src2, $src1" %}
9595 
9596   ins_encode(aarch64_enc_modw(dst, src1, src2));
9597   ins_pipe(idiv_reg_reg);
9598 %}
9599 
9600 // Long Remainder
9601 
9602 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9603   match(Set dst (ModL src1 src2));
9604 
9605   ins_cost(INSN_COST * 38);
9606   format %{ "sdiv   rscratch1, $src1, $src2\n"
9607             "msub($dst, rscratch1, $src2, $src1" %}
9608 
9609   ins_encode(aarch64_enc_mod(dst, src1, src2));
9610   ins_pipe(ldiv_reg_reg);
9611 %}
9612 
9613 // Integer Shifts
9614 
9615 // Shift Left Register
9616 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9617   match(Set dst (LShiftI src1 src2));
9618 
9619   ins_cost(INSN_COST * 2);
9620   format %{ "lslvw  $dst, $src1, $src2" %}
9621 
9622   ins_encode %{
9623     __ lslvw(as_Register($dst$$reg),
9624              as_Register($src1$$reg),
9625              as_Register($src2$$reg));
9626   %}
9627 
9628   ins_pipe(ialu_reg_reg_vshift);
9629 %}
9630 
9631 // Shift Left Immediate
9632 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9633   match(Set dst (LShiftI src1 src2));
9634 
9635   ins_cost(INSN_COST);
9636   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
9637 
9638   ins_encode %{
9639     __ lslw(as_Register($dst$$reg),
9640             as_Register($src1$$reg),
9641             $src2$$constant & 0x1f);
9642   %}
9643 
9644   ins_pipe(ialu_reg_shift);
9645 %}
9646 
9647 // Shift Right Logical Register
9648 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9649   match(Set dst (URShiftI src1 src2));
9650 
9651   ins_cost(INSN_COST * 2);
9652   format %{ "lsrvw  $dst, $src1, $src2" %}
9653 
9654   ins_encode %{
9655     __ lsrvw(as_Register($dst$$reg),
9656              as_Register($src1$$reg),
9657              as_Register($src2$$reg));
9658   %}
9659 
9660   ins_pipe(ialu_reg_reg_vshift);
9661 %}
9662 
9663 // Shift Right Logical Immediate
9664 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9665   match(Set dst (URShiftI src1 src2));
9666 
9667   ins_cost(INSN_COST);
9668   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
9669 
9670   ins_encode %{
9671     __ lsrw(as_Register($dst$$reg),
9672             as_Register($src1$$reg),
9673             $src2$$constant & 0x1f);
9674   %}
9675 
9676   ins_pipe(ialu_reg_shift);
9677 %}
9678 
9679 // Shift Right Arithmetic Register
9680 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9681   match(Set dst (RShiftI src1 src2));
9682 
9683   ins_cost(INSN_COST * 2);
9684   format %{ "asrvw  $dst, $src1, $src2" %}
9685 
9686   ins_encode %{
9687     __ asrvw(as_Register($dst$$reg),
9688              as_Register($src1$$reg),
9689              as_Register($src2$$reg));
9690   %}
9691 
9692   ins_pipe(ialu_reg_reg_vshift);
9693 %}
9694 
9695 // Shift Right Arithmetic Immediate
9696 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9697   match(Set dst (RShiftI src1 src2));
9698 
9699   ins_cost(INSN_COST);
9700   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
9701 
9702   ins_encode %{
9703     __ asrw(as_Register($dst$$reg),
9704             as_Register($src1$$reg),
9705             $src2$$constant & 0x1f);
9706   %}
9707 
9708   ins_pipe(ialu_reg_shift);
9709 %}
9710 
9711 // Combined Int Mask and Right Shift (using UBFM)
9712 // TODO
9713 
9714 // Long Shifts
9715 
9716 // Shift Left Register
9717 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9718   match(Set dst (LShiftL src1 src2));
9719 
9720   ins_cost(INSN_COST * 2);
9721   format %{ "lslv  $dst, $src1, $src2" %}
9722 
9723   ins_encode %{
9724     __ lslv(as_Register($dst$$reg),
9725             as_Register($src1$$reg),
9726             as_Register($src2$$reg));
9727   %}
9728 
9729   ins_pipe(ialu_reg_reg_vshift);
9730 %}
9731 
9732 // Shift Left Immediate
9733 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9734   match(Set dst (LShiftL src1 src2));
9735 
9736   ins_cost(INSN_COST);
9737   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
9738 
9739   ins_encode %{
9740     __ lsl(as_Register($dst$$reg),
9741             as_Register($src1$$reg),
9742             $src2$$constant & 0x3f);
9743   %}
9744 
9745   ins_pipe(ialu_reg_shift);
9746 %}
9747 
9748 // Shift Right Logical Register
9749 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9750   match(Set dst (URShiftL src1 src2));
9751 
9752   ins_cost(INSN_COST * 2);
9753   format %{ "lsrv  $dst, $src1, $src2" %}
9754 
9755   ins_encode %{
9756     __ lsrv(as_Register($dst$$reg),
9757             as_Register($src1$$reg),
9758             as_Register($src2$$reg));
9759   %}
9760 
9761   ins_pipe(ialu_reg_reg_vshift);
9762 %}
9763 
9764 // Shift Right Logical Immediate
9765 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9766   match(Set dst (URShiftL src1 src2));
9767 
9768   ins_cost(INSN_COST);
9769   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
9770 
9771   ins_encode %{
9772     __ lsr(as_Register($dst$$reg),
9773            as_Register($src1$$reg),
9774            $src2$$constant & 0x3f);
9775   %}
9776 
9777   ins_pipe(ialu_reg_shift);
9778 %}
9779 
9780 // A special-case pattern for card table stores.
9781 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
9782   match(Set dst (URShiftL (CastP2X src1) src2));
9783 
9784   ins_cost(INSN_COST);
9785   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
9786 
9787   ins_encode %{
9788     __ lsr(as_Register($dst$$reg),
9789            as_Register($src1$$reg),
9790            $src2$$constant & 0x3f);
9791   %}
9792 
9793   ins_pipe(ialu_reg_shift);
9794 %}
9795 
9796 // Shift Right Arithmetic Register
9797 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9798   match(Set dst (RShiftL src1 src2));
9799 
9800   ins_cost(INSN_COST * 2);
9801   format %{ "asrv  $dst, $src1, $src2" %}
9802 
9803   ins_encode %{
9804     __ asrv(as_Register($dst$$reg),
9805             as_Register($src1$$reg),
9806             as_Register($src2$$reg));
9807   %}
9808 
9809   ins_pipe(ialu_reg_reg_vshift);
9810 %}
9811 
9812 // Shift Right Arithmetic Immediate
9813 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9814   match(Set dst (RShiftL src1 src2));
9815 
9816   ins_cost(INSN_COST);
9817   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
9818 
9819   ins_encode %{
9820     __ asr(as_Register($dst$$reg),
9821            as_Register($src1$$reg),
9822            $src2$$constant & 0x3f);
9823   %}
9824 
9825   ins_pipe(ialu_reg_shift);
9826 %}
9827 
9828 // BEGIN This section of the file is automatically generated. Do not edit --------------
9829 
9830 instruct regL_not_reg(iRegLNoSp dst,
9831                          iRegL src1, immL_M1 m1,
9832                          rFlagsReg cr) %{
9833   match(Set dst (XorL src1 m1));
9834   ins_cost(INSN_COST);
9835   format %{ "eon  $dst, $src1, zr" %}
9836 
9837   ins_encode %{
9838     __ eon(as_Register($dst$$reg),
9839               as_Register($src1$$reg),
9840               zr,
9841               Assembler::LSL, 0);
9842   %}
9843 
9844   ins_pipe(ialu_reg);
9845 %}
9846 instruct regI_not_reg(iRegINoSp dst,
9847                          iRegIorL2I src1, immI_M1 m1,
9848                          rFlagsReg cr) %{
9849   match(Set dst (XorI src1 m1));
9850   ins_cost(INSN_COST);
9851   format %{ "eonw  $dst, $src1, zr" %}
9852 
9853   ins_encode %{
9854     __ eonw(as_Register($dst$$reg),
9855               as_Register($src1$$reg),
9856               zr,
9857               Assembler::LSL, 0);
9858   %}
9859 
9860   ins_pipe(ialu_reg);
9861 %}
9862 
9863 instruct AndI_reg_not_reg(iRegINoSp dst,
9864                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9865                          rFlagsReg cr) %{
9866   match(Set dst (AndI src1 (XorI src2 m1)));
9867   ins_cost(INSN_COST);
9868   format %{ "bicw  $dst, $src1, $src2" %}
9869 
9870   ins_encode %{
9871     __ bicw(as_Register($dst$$reg),
9872               as_Register($src1$$reg),
9873               as_Register($src2$$reg),
9874               Assembler::LSL, 0);
9875   %}
9876 
9877   ins_pipe(ialu_reg_reg);
9878 %}
9879 
9880 instruct AndL_reg_not_reg(iRegLNoSp dst,
9881                          iRegL src1, iRegL src2, immL_M1 m1,
9882                          rFlagsReg cr) %{
9883   match(Set dst (AndL src1 (XorL src2 m1)));
9884   ins_cost(INSN_COST);
9885   format %{ "bic  $dst, $src1, $src2" %}
9886 
9887   ins_encode %{
9888     __ bic(as_Register($dst$$reg),
9889               as_Register($src1$$reg),
9890               as_Register($src2$$reg),
9891               Assembler::LSL, 0);
9892   %}
9893 
9894   ins_pipe(ialu_reg_reg);
9895 %}
9896 
9897 instruct OrI_reg_not_reg(iRegINoSp dst,
9898                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9899                          rFlagsReg cr) %{
9900   match(Set dst (OrI src1 (XorI src2 m1)));
9901   ins_cost(INSN_COST);
9902   format %{ "ornw  $dst, $src1, $src2" %}
9903 
9904   ins_encode %{
9905     __ ornw(as_Register($dst$$reg),
9906               as_Register($src1$$reg),
9907               as_Register($src2$$reg),
9908               Assembler::LSL, 0);
9909   %}
9910 
9911   ins_pipe(ialu_reg_reg);
9912 %}
9913 
9914 instruct OrL_reg_not_reg(iRegLNoSp dst,
9915                          iRegL src1, iRegL src2, immL_M1 m1,
9916                          rFlagsReg cr) %{
9917   match(Set dst (OrL src1 (XorL src2 m1)));
9918   ins_cost(INSN_COST);
9919   format %{ "orn  $dst, $src1, $src2" %}
9920 
9921   ins_encode %{
9922     __ orn(as_Register($dst$$reg),
9923               as_Register($src1$$reg),
9924               as_Register($src2$$reg),
9925               Assembler::LSL, 0);
9926   %}
9927 
9928   ins_pipe(ialu_reg_reg);
9929 %}
9930 
9931 instruct XorI_reg_not_reg(iRegINoSp dst,
9932                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9933                          rFlagsReg cr) %{
9934   match(Set dst (XorI m1 (XorI src2 src1)));
9935   ins_cost(INSN_COST);
9936   format %{ "eonw  $dst, $src1, $src2" %}
9937 
9938   ins_encode %{
9939     __ eonw(as_Register($dst$$reg),
9940               as_Register($src1$$reg),
9941               as_Register($src2$$reg),
9942               Assembler::LSL, 0);
9943   %}
9944 
9945   ins_pipe(ialu_reg_reg);
9946 %}
9947 
9948 instruct XorL_reg_not_reg(iRegLNoSp dst,
9949                          iRegL src1, iRegL src2, immL_M1 m1,
9950                          rFlagsReg cr) %{
9951   match(Set dst (XorL m1 (XorL src2 src1)));
9952   ins_cost(INSN_COST);
9953   format %{ "eon  $dst, $src1, $src2" %}
9954 
9955   ins_encode %{
9956     __ eon(as_Register($dst$$reg),
9957               as_Register($src1$$reg),
9958               as_Register($src2$$reg),
9959               Assembler::LSL, 0);
9960   %}
9961 
9962   ins_pipe(ialu_reg_reg);
9963 %}
9964 
9965 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
9966                          iRegIorL2I src1, iRegIorL2I src2,
9967                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9968   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
9969   ins_cost(1.9 * INSN_COST);
9970   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
9971 
9972   ins_encode %{
9973     __ bicw(as_Register($dst$$reg),
9974               as_Register($src1$$reg),
9975               as_Register($src2$$reg),
9976               Assembler::LSR,
9977               $src3$$constant & 0x1f);
9978   %}
9979 
9980   ins_pipe(ialu_reg_reg_shift);
9981 %}
9982 
9983 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
9984                          iRegL src1, iRegL src2,
9985                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9986   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
9987   ins_cost(1.9 * INSN_COST);
9988   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
9989 
9990   ins_encode %{
9991     __ bic(as_Register($dst$$reg),
9992               as_Register($src1$$reg),
9993               as_Register($src2$$reg),
9994               Assembler::LSR,
9995               $src3$$constant & 0x3f);
9996   %}
9997 
9998   ins_pipe(ialu_reg_reg_shift);
9999 %}
10000 
10001 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
10002                          iRegIorL2I src1, iRegIorL2I src2,
10003                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10004   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
10005   ins_cost(1.9 * INSN_COST);
10006   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
10007 
10008   ins_encode %{
10009     __ bicw(as_Register($dst$$reg),
10010               as_Register($src1$$reg),
10011               as_Register($src2$$reg),
10012               Assembler::ASR,
10013               $src3$$constant & 0x1f);
10014   %}
10015 
10016   ins_pipe(ialu_reg_reg_shift);
10017 %}
10018 
10019 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
10020                          iRegL src1, iRegL src2,
10021                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10022   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
10023   ins_cost(1.9 * INSN_COST);
10024   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
10025 
10026   ins_encode %{
10027     __ bic(as_Register($dst$$reg),
10028               as_Register($src1$$reg),
10029               as_Register($src2$$reg),
10030               Assembler::ASR,
10031               $src3$$constant & 0x3f);
10032   %}
10033 
10034   ins_pipe(ialu_reg_reg_shift);
10035 %}
10036 
10037 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
10038                          iRegIorL2I src1, iRegIorL2I src2,
10039                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10040   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
10041   ins_cost(1.9 * INSN_COST);
10042   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
10043 
10044   ins_encode %{
10045     __ bicw(as_Register($dst$$reg),
10046               as_Register($src1$$reg),
10047               as_Register($src2$$reg),
10048               Assembler::LSL,
10049               $src3$$constant & 0x1f);
10050   %}
10051 
10052   ins_pipe(ialu_reg_reg_shift);
10053 %}
10054 
10055 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
10056                          iRegL src1, iRegL src2,
10057                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10058   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
10059   ins_cost(1.9 * INSN_COST);
10060   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
10061 
10062   ins_encode %{
10063     __ bic(as_Register($dst$$reg),
10064               as_Register($src1$$reg),
10065               as_Register($src2$$reg),
10066               Assembler::LSL,
10067               $src3$$constant & 0x3f);
10068   %}
10069 
10070   ins_pipe(ialu_reg_reg_shift);
10071 %}
10072 
10073 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
10074                          iRegIorL2I src1, iRegIorL2I src2,
10075                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10076   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
10077   ins_cost(1.9 * INSN_COST);
10078   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
10079 
10080   ins_encode %{
10081     __ eonw(as_Register($dst$$reg),
10082               as_Register($src1$$reg),
10083               as_Register($src2$$reg),
10084               Assembler::LSR,
10085               $src3$$constant & 0x1f);
10086   %}
10087 
10088   ins_pipe(ialu_reg_reg_shift);
10089 %}
10090 
10091 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
10092                          iRegL src1, iRegL src2,
10093                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10094   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
10095   ins_cost(1.9 * INSN_COST);
10096   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
10097 
10098   ins_encode %{
10099     __ eon(as_Register($dst$$reg),
10100               as_Register($src1$$reg),
10101               as_Register($src2$$reg),
10102               Assembler::LSR,
10103               $src3$$constant & 0x3f);
10104   %}
10105 
10106   ins_pipe(ialu_reg_reg_shift);
10107 %}
10108 
10109 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
10110                          iRegIorL2I src1, iRegIorL2I src2,
10111                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10112   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
10113   ins_cost(1.9 * INSN_COST);
10114   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
10115 
10116   ins_encode %{
10117     __ eonw(as_Register($dst$$reg),
10118               as_Register($src1$$reg),
10119               as_Register($src2$$reg),
10120               Assembler::ASR,
10121               $src3$$constant & 0x1f);
10122   %}
10123 
10124   ins_pipe(ialu_reg_reg_shift);
10125 %}
10126 
10127 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
10128                          iRegL src1, iRegL src2,
10129                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10130   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
10131   ins_cost(1.9 * INSN_COST);
10132   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
10133 
10134   ins_encode %{
10135     __ eon(as_Register($dst$$reg),
10136               as_Register($src1$$reg),
10137               as_Register($src2$$reg),
10138               Assembler::ASR,
10139               $src3$$constant & 0x3f);
10140   %}
10141 
10142   ins_pipe(ialu_reg_reg_shift);
10143 %}
10144 
10145 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
10146                          iRegIorL2I src1, iRegIorL2I src2,
10147                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10148   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
10149   ins_cost(1.9 * INSN_COST);
10150   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10151 
10152   ins_encode %{
10153     __ eonw(as_Register($dst$$reg),
10154               as_Register($src1$$reg),
10155               as_Register($src2$$reg),
10156               Assembler::LSL,
10157               $src3$$constant & 0x1f);
10158   %}
10159 
10160   ins_pipe(ialu_reg_reg_shift);
10161 %}
10162 
10163 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10164                          iRegL src1, iRegL src2,
10165                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10166   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10167   ins_cost(1.9 * INSN_COST);
10168   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10169 
10170   ins_encode %{
10171     __ eon(as_Register($dst$$reg),
10172               as_Register($src1$$reg),
10173               as_Register($src2$$reg),
10174               Assembler::LSL,
10175               $src3$$constant & 0x3f);
10176   %}
10177 
10178   ins_pipe(ialu_reg_reg_shift);
10179 %}
10180 
10181 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10182                          iRegIorL2I src1, iRegIorL2I src2,
10183                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10184   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10185   ins_cost(1.9 * INSN_COST);
10186   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10187 
10188   ins_encode %{
10189     __ ornw(as_Register($dst$$reg),
10190               as_Register($src1$$reg),
10191               as_Register($src2$$reg),
10192               Assembler::LSR,
10193               $src3$$constant & 0x1f);
10194   %}
10195 
10196   ins_pipe(ialu_reg_reg_shift);
10197 %}
10198 
10199 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10200                          iRegL src1, iRegL src2,
10201                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10202   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10203   ins_cost(1.9 * INSN_COST);
10204   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10205 
10206   ins_encode %{
10207     __ orn(as_Register($dst$$reg),
10208               as_Register($src1$$reg),
10209               as_Register($src2$$reg),
10210               Assembler::LSR,
10211               $src3$$constant & 0x3f);
10212   %}
10213 
10214   ins_pipe(ialu_reg_reg_shift);
10215 %}
10216 
10217 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10218                          iRegIorL2I src1, iRegIorL2I src2,
10219                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10220   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10221   ins_cost(1.9 * INSN_COST);
10222   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10223 
10224   ins_encode %{
10225     __ ornw(as_Register($dst$$reg),
10226               as_Register($src1$$reg),
10227               as_Register($src2$$reg),
10228               Assembler::ASR,
10229               $src3$$constant & 0x1f);
10230   %}
10231 
10232   ins_pipe(ialu_reg_reg_shift);
10233 %}
10234 
10235 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10236                          iRegL src1, iRegL src2,
10237                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10238   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10239   ins_cost(1.9 * INSN_COST);
10240   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10241 
10242   ins_encode %{
10243     __ orn(as_Register($dst$$reg),
10244               as_Register($src1$$reg),
10245               as_Register($src2$$reg),
10246               Assembler::ASR,
10247               $src3$$constant & 0x3f);
10248   %}
10249 
10250   ins_pipe(ialu_reg_reg_shift);
10251 %}
10252 
10253 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10254                          iRegIorL2I src1, iRegIorL2I src2,
10255                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10256   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10257   ins_cost(1.9 * INSN_COST);
10258   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10259 
10260   ins_encode %{
10261     __ ornw(as_Register($dst$$reg),
10262               as_Register($src1$$reg),
10263               as_Register($src2$$reg),
10264               Assembler::LSL,
10265               $src3$$constant & 0x1f);
10266   %}
10267 
10268   ins_pipe(ialu_reg_reg_shift);
10269 %}
10270 
10271 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10272                          iRegL src1, iRegL src2,
10273                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10274   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10275   ins_cost(1.9 * INSN_COST);
10276   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10277 
10278   ins_encode %{
10279     __ orn(as_Register($dst$$reg),
10280               as_Register($src1$$reg),
10281               as_Register($src2$$reg),
10282               Assembler::LSL,
10283               $src3$$constant & 0x3f);
10284   %}
10285 
10286   ins_pipe(ialu_reg_reg_shift);
10287 %}
10288 
10289 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10290                          iRegIorL2I src1, iRegIorL2I src2,
10291                          immI src3, rFlagsReg cr) %{
10292   match(Set dst (AndI src1 (URShiftI src2 src3)));
10293 
10294   ins_cost(1.9 * INSN_COST);
10295   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10296 
10297   ins_encode %{
10298     __ andw(as_Register($dst$$reg),
10299               as_Register($src1$$reg),
10300               as_Register($src2$$reg),
10301               Assembler::LSR,
10302               $src3$$constant & 0x1f);
10303   %}
10304 
10305   ins_pipe(ialu_reg_reg_shift);
10306 %}
10307 
10308 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10309                          iRegL src1, iRegL src2,
10310                          immI src3, rFlagsReg cr) %{
10311   match(Set dst (AndL src1 (URShiftL src2 src3)));
10312 
10313   ins_cost(1.9 * INSN_COST);
10314   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10315 
10316   ins_encode %{
10317     __ andr(as_Register($dst$$reg),
10318               as_Register($src1$$reg),
10319               as_Register($src2$$reg),
10320               Assembler::LSR,
10321               $src3$$constant & 0x3f);
10322   %}
10323 
10324   ins_pipe(ialu_reg_reg_shift);
10325 %}
10326 
10327 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10328                          iRegIorL2I src1, iRegIorL2I src2,
10329                          immI src3, rFlagsReg cr) %{
10330   match(Set dst (AndI src1 (RShiftI src2 src3)));
10331 
10332   ins_cost(1.9 * INSN_COST);
10333   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10334 
10335   ins_encode %{
10336     __ andw(as_Register($dst$$reg),
10337               as_Register($src1$$reg),
10338               as_Register($src2$$reg),
10339               Assembler::ASR,
10340               $src3$$constant & 0x1f);
10341   %}
10342 
10343   ins_pipe(ialu_reg_reg_shift);
10344 %}
10345 
10346 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10347                          iRegL src1, iRegL src2,
10348                          immI src3, rFlagsReg cr) %{
10349   match(Set dst (AndL src1 (RShiftL src2 src3)));
10350 
10351   ins_cost(1.9 * INSN_COST);
10352   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10353 
10354   ins_encode %{
10355     __ andr(as_Register($dst$$reg),
10356               as_Register($src1$$reg),
10357               as_Register($src2$$reg),
10358               Assembler::ASR,
10359               $src3$$constant & 0x3f);
10360   %}
10361 
10362   ins_pipe(ialu_reg_reg_shift);
10363 %}
10364 
10365 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10366                          iRegIorL2I src1, iRegIorL2I src2,
10367                          immI src3, rFlagsReg cr) %{
10368   match(Set dst (AndI src1 (LShiftI src2 src3)));
10369 
10370   ins_cost(1.9 * INSN_COST);
10371   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10372 
10373   ins_encode %{
10374     __ andw(as_Register($dst$$reg),
10375               as_Register($src1$$reg),
10376               as_Register($src2$$reg),
10377               Assembler::LSL,
10378               $src3$$constant & 0x1f);
10379   %}
10380 
10381   ins_pipe(ialu_reg_reg_shift);
10382 %}
10383 
10384 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10385                          iRegL src1, iRegL src2,
10386                          immI src3, rFlagsReg cr) %{
10387   match(Set dst (AndL src1 (LShiftL src2 src3)));
10388 
10389   ins_cost(1.9 * INSN_COST);
10390   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10391 
10392   ins_encode %{
10393     __ andr(as_Register($dst$$reg),
10394               as_Register($src1$$reg),
10395               as_Register($src2$$reg),
10396               Assembler::LSL,
10397               $src3$$constant & 0x3f);
10398   %}
10399 
10400   ins_pipe(ialu_reg_reg_shift);
10401 %}
10402 
10403 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10404                          iRegIorL2I src1, iRegIorL2I src2,
10405                          immI src3, rFlagsReg cr) %{
10406   match(Set dst (XorI src1 (URShiftI src2 src3)));
10407 
10408   ins_cost(1.9 * INSN_COST);
10409   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10410 
10411   ins_encode %{
10412     __ eorw(as_Register($dst$$reg),
10413               as_Register($src1$$reg),
10414               as_Register($src2$$reg),
10415               Assembler::LSR,
10416               $src3$$constant & 0x1f);
10417   %}
10418 
10419   ins_pipe(ialu_reg_reg_shift);
10420 %}
10421 
10422 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10423                          iRegL src1, iRegL src2,
10424                          immI src3, rFlagsReg cr) %{
10425   match(Set dst (XorL src1 (URShiftL src2 src3)));
10426 
10427   ins_cost(1.9 * INSN_COST);
10428   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10429 
10430   ins_encode %{
10431     __ eor(as_Register($dst$$reg),
10432               as_Register($src1$$reg),
10433               as_Register($src2$$reg),
10434               Assembler::LSR,
10435               $src3$$constant & 0x3f);
10436   %}
10437 
10438   ins_pipe(ialu_reg_reg_shift);
10439 %}
10440 
10441 instruct XorI_reg_RShift_reg(iRegINoSp dst,
10442                          iRegIorL2I src1, iRegIorL2I src2,
10443                          immI src3, rFlagsReg cr) %{
10444   match(Set dst (XorI src1 (RShiftI src2 src3)));
10445 
10446   ins_cost(1.9 * INSN_COST);
10447   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
10448 
10449   ins_encode %{
10450     __ eorw(as_Register($dst$$reg),
10451               as_Register($src1$$reg),
10452               as_Register($src2$$reg),
10453               Assembler::ASR,
10454               $src3$$constant & 0x1f);
10455   %}
10456 
10457   ins_pipe(ialu_reg_reg_shift);
10458 %}
10459 
10460 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
10461                          iRegL src1, iRegL src2,
10462                          immI src3, rFlagsReg cr) %{
10463   match(Set dst (XorL src1 (RShiftL src2 src3)));
10464 
10465   ins_cost(1.9 * INSN_COST);
10466   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
10467 
10468   ins_encode %{
10469     __ eor(as_Register($dst$$reg),
10470               as_Register($src1$$reg),
10471               as_Register($src2$$reg),
10472               Assembler::ASR,
10473               $src3$$constant & 0x3f);
10474   %}
10475 
10476   ins_pipe(ialu_reg_reg_shift);
10477 %}
10478 
10479 instruct XorI_reg_LShift_reg(iRegINoSp dst,
10480                          iRegIorL2I src1, iRegIorL2I src2,
10481                          immI src3, rFlagsReg cr) %{
10482   match(Set dst (XorI src1 (LShiftI src2 src3)));
10483 
10484   ins_cost(1.9 * INSN_COST);
10485   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
10486 
10487   ins_encode %{
10488     __ eorw(as_Register($dst$$reg),
10489               as_Register($src1$$reg),
10490               as_Register($src2$$reg),
10491               Assembler::LSL,
10492               $src3$$constant & 0x1f);
10493   %}
10494 
10495   ins_pipe(ialu_reg_reg_shift);
10496 %}
10497 
10498 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
10499                          iRegL src1, iRegL src2,
10500                          immI src3, rFlagsReg cr) %{
10501   match(Set dst (XorL src1 (LShiftL src2 src3)));
10502 
10503   ins_cost(1.9 * INSN_COST);
10504   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
10505 
10506   ins_encode %{
10507     __ eor(as_Register($dst$$reg),
10508               as_Register($src1$$reg),
10509               as_Register($src2$$reg),
10510               Assembler::LSL,
10511               $src3$$constant & 0x3f);
10512   %}
10513 
10514   ins_pipe(ialu_reg_reg_shift);
10515 %}
10516 
10517 instruct OrI_reg_URShift_reg(iRegINoSp dst,
10518                          iRegIorL2I src1, iRegIorL2I src2,
10519                          immI src3, rFlagsReg cr) %{
10520   match(Set dst (OrI src1 (URShiftI src2 src3)));
10521 
10522   ins_cost(1.9 * INSN_COST);
10523   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
10524 
10525   ins_encode %{
10526     __ orrw(as_Register($dst$$reg),
10527               as_Register($src1$$reg),
10528               as_Register($src2$$reg),
10529               Assembler::LSR,
10530               $src3$$constant & 0x1f);
10531   %}
10532 
10533   ins_pipe(ialu_reg_reg_shift);
10534 %}
10535 
10536 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
10537                          iRegL src1, iRegL src2,
10538                          immI src3, rFlagsReg cr) %{
10539   match(Set dst (OrL src1 (URShiftL src2 src3)));
10540 
10541   ins_cost(1.9 * INSN_COST);
10542   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
10543 
10544   ins_encode %{
10545     __ orr(as_Register($dst$$reg),
10546               as_Register($src1$$reg),
10547               as_Register($src2$$reg),
10548               Assembler::LSR,
10549               $src3$$constant & 0x3f);
10550   %}
10551 
10552   ins_pipe(ialu_reg_reg_shift);
10553 %}
10554 
10555 instruct OrI_reg_RShift_reg(iRegINoSp dst,
10556                          iRegIorL2I src1, iRegIorL2I src2,
10557                          immI src3, rFlagsReg cr) %{
10558   match(Set dst (OrI src1 (RShiftI src2 src3)));
10559 
10560   ins_cost(1.9 * INSN_COST);
10561   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
10562 
10563   ins_encode %{
10564     __ orrw(as_Register($dst$$reg),
10565               as_Register($src1$$reg),
10566               as_Register($src2$$reg),
10567               Assembler::ASR,
10568               $src3$$constant & 0x1f);
10569   %}
10570 
10571   ins_pipe(ialu_reg_reg_shift);
10572 %}
10573 
10574 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
10575                          iRegL src1, iRegL src2,
10576                          immI src3, rFlagsReg cr) %{
10577   match(Set dst (OrL src1 (RShiftL src2 src3)));
10578 
10579   ins_cost(1.9 * INSN_COST);
10580   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
10581 
10582   ins_encode %{
10583     __ orr(as_Register($dst$$reg),
10584               as_Register($src1$$reg),
10585               as_Register($src2$$reg),
10586               Assembler::ASR,
10587               $src3$$constant & 0x3f);
10588   %}
10589 
10590   ins_pipe(ialu_reg_reg_shift);
10591 %}
10592 
10593 instruct OrI_reg_LShift_reg(iRegINoSp dst,
10594                          iRegIorL2I src1, iRegIorL2I src2,
10595                          immI src3, rFlagsReg cr) %{
10596   match(Set dst (OrI src1 (LShiftI src2 src3)));
10597 
10598   ins_cost(1.9 * INSN_COST);
10599   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
10600 
10601   ins_encode %{
10602     __ orrw(as_Register($dst$$reg),
10603               as_Register($src1$$reg),
10604               as_Register($src2$$reg),
10605               Assembler::LSL,
10606               $src3$$constant & 0x1f);
10607   %}
10608 
10609   ins_pipe(ialu_reg_reg_shift);
10610 %}
10611 
10612 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
10613                          iRegL src1, iRegL src2,
10614                          immI src3, rFlagsReg cr) %{
10615   match(Set dst (OrL src1 (LShiftL src2 src3)));
10616 
10617   ins_cost(1.9 * INSN_COST);
10618   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
10619 
10620   ins_encode %{
10621     __ orr(as_Register($dst$$reg),
10622               as_Register($src1$$reg),
10623               as_Register($src2$$reg),
10624               Assembler::LSL,
10625               $src3$$constant & 0x3f);
10626   %}
10627 
10628   ins_pipe(ialu_reg_reg_shift);
10629 %}
10630 
10631 instruct AddI_reg_URShift_reg(iRegINoSp dst,
10632                          iRegIorL2I src1, iRegIorL2I src2,
10633                          immI src3, rFlagsReg cr) %{
10634   match(Set dst (AddI src1 (URShiftI src2 src3)));
10635 
10636   ins_cost(1.9 * INSN_COST);
10637   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
10638 
10639   ins_encode %{
10640     __ addw(as_Register($dst$$reg),
10641               as_Register($src1$$reg),
10642               as_Register($src2$$reg),
10643               Assembler::LSR,
10644               $src3$$constant & 0x1f);
10645   %}
10646 
10647   ins_pipe(ialu_reg_reg_shift);
10648 %}
10649 
10650 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
10651                          iRegL src1, iRegL src2,
10652                          immI src3, rFlagsReg cr) %{
10653   match(Set dst (AddL src1 (URShiftL src2 src3)));
10654 
10655   ins_cost(1.9 * INSN_COST);
10656   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
10657 
10658   ins_encode %{
10659     __ add(as_Register($dst$$reg),
10660               as_Register($src1$$reg),
10661               as_Register($src2$$reg),
10662               Assembler::LSR,
10663               $src3$$constant & 0x3f);
10664   %}
10665 
10666   ins_pipe(ialu_reg_reg_shift);
10667 %}
10668 
10669 instruct AddI_reg_RShift_reg(iRegINoSp dst,
10670                          iRegIorL2I src1, iRegIorL2I src2,
10671                          immI src3, rFlagsReg cr) %{
10672   match(Set dst (AddI src1 (RShiftI src2 src3)));
10673 
10674   ins_cost(1.9 * INSN_COST);
10675   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
10676 
10677   ins_encode %{
10678     __ addw(as_Register($dst$$reg),
10679               as_Register($src1$$reg),
10680               as_Register($src2$$reg),
10681               Assembler::ASR,
10682               $src3$$constant & 0x1f);
10683   %}
10684 
10685   ins_pipe(ialu_reg_reg_shift);
10686 %}
10687 
10688 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
10689                          iRegL src1, iRegL src2,
10690                          immI src3, rFlagsReg cr) %{
10691   match(Set dst (AddL src1 (RShiftL src2 src3)));
10692 
10693   ins_cost(1.9 * INSN_COST);
10694   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
10695 
10696   ins_encode %{
10697     __ add(as_Register($dst$$reg),
10698               as_Register($src1$$reg),
10699               as_Register($src2$$reg),
10700               Assembler::ASR,
10701               $src3$$constant & 0x3f);
10702   %}
10703 
10704   ins_pipe(ialu_reg_reg_shift);
10705 %}
10706 
10707 instruct AddI_reg_LShift_reg(iRegINoSp dst,
10708                          iRegIorL2I src1, iRegIorL2I src2,
10709                          immI src3, rFlagsReg cr) %{
10710   match(Set dst (AddI src1 (LShiftI src2 src3)));
10711 
10712   ins_cost(1.9 * INSN_COST);
10713   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
10714 
10715   ins_encode %{
10716     __ addw(as_Register($dst$$reg),
10717               as_Register($src1$$reg),
10718               as_Register($src2$$reg),
10719               Assembler::LSL,
10720               $src3$$constant & 0x1f);
10721   %}
10722 
10723   ins_pipe(ialu_reg_reg_shift);
10724 %}
10725 
10726 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
10727                          iRegL src1, iRegL src2,
10728                          immI src3, rFlagsReg cr) %{
10729   match(Set dst (AddL src1 (LShiftL src2 src3)));
10730 
10731   ins_cost(1.9 * INSN_COST);
10732   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
10733 
10734   ins_encode %{
10735     __ add(as_Register($dst$$reg),
10736               as_Register($src1$$reg),
10737               as_Register($src2$$reg),
10738               Assembler::LSL,
10739               $src3$$constant & 0x3f);
10740   %}
10741 
10742   ins_pipe(ialu_reg_reg_shift);
10743 %}
10744 
10745 instruct SubI_reg_URShift_reg(iRegINoSp dst,
10746                          iRegIorL2I src1, iRegIorL2I src2,
10747                          immI src3, rFlagsReg cr) %{
10748   match(Set dst (SubI src1 (URShiftI src2 src3)));
10749 
10750   ins_cost(1.9 * INSN_COST);
10751   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
10752 
10753   ins_encode %{
10754     __ subw(as_Register($dst$$reg),
10755               as_Register($src1$$reg),
10756               as_Register($src2$$reg),
10757               Assembler::LSR,
10758               $src3$$constant & 0x1f);
10759   %}
10760 
10761   ins_pipe(ialu_reg_reg_shift);
10762 %}
10763 
10764 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
10765                          iRegL src1, iRegL src2,
10766                          immI src3, rFlagsReg cr) %{
10767   match(Set dst (SubL src1 (URShiftL src2 src3)));
10768 
10769   ins_cost(1.9 * INSN_COST);
10770   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
10771 
10772   ins_encode %{
10773     __ sub(as_Register($dst$$reg),
10774               as_Register($src1$$reg),
10775               as_Register($src2$$reg),
10776               Assembler::LSR,
10777               $src3$$constant & 0x3f);
10778   %}
10779 
10780   ins_pipe(ialu_reg_reg_shift);
10781 %}
10782 
10783 instruct SubI_reg_RShift_reg(iRegINoSp dst,
10784                          iRegIorL2I src1, iRegIorL2I src2,
10785                          immI src3, rFlagsReg cr) %{
10786   match(Set dst (SubI src1 (RShiftI src2 src3)));
10787 
10788   ins_cost(1.9 * INSN_COST);
10789   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
10790 
10791   ins_encode %{
10792     __ subw(as_Register($dst$$reg),
10793               as_Register($src1$$reg),
10794               as_Register($src2$$reg),
10795               Assembler::ASR,
10796               $src3$$constant & 0x1f);
10797   %}
10798 
10799   ins_pipe(ialu_reg_reg_shift);
10800 %}
10801 
10802 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
10803                          iRegL src1, iRegL src2,
10804                          immI src3, rFlagsReg cr) %{
10805   match(Set dst (SubL src1 (RShiftL src2 src3)));
10806 
10807   ins_cost(1.9 * INSN_COST);
10808   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
10809 
10810   ins_encode %{
10811     __ sub(as_Register($dst$$reg),
10812               as_Register($src1$$reg),
10813               as_Register($src2$$reg),
10814               Assembler::ASR,
10815               $src3$$constant & 0x3f);
10816   %}
10817 
10818   ins_pipe(ialu_reg_reg_shift);
10819 %}
10820 
10821 instruct SubI_reg_LShift_reg(iRegINoSp dst,
10822                          iRegIorL2I src1, iRegIorL2I src2,
10823                          immI src3, rFlagsReg cr) %{
10824   match(Set dst (SubI src1 (LShiftI src2 src3)));
10825 
10826   ins_cost(1.9 * INSN_COST);
10827   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
10828 
10829   ins_encode %{
10830     __ subw(as_Register($dst$$reg),
10831               as_Register($src1$$reg),
10832               as_Register($src2$$reg),
10833               Assembler::LSL,
10834               $src3$$constant & 0x1f);
10835   %}
10836 
10837   ins_pipe(ialu_reg_reg_shift);
10838 %}
10839 
10840 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
10841                          iRegL src1, iRegL src2,
10842                          immI src3, rFlagsReg cr) %{
10843   match(Set dst (SubL src1 (LShiftL src2 src3)));
10844 
10845   ins_cost(1.9 * INSN_COST);
10846   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
10847 
10848   ins_encode %{
10849     __ sub(as_Register($dst$$reg),
10850               as_Register($src1$$reg),
10851               as_Register($src2$$reg),
10852               Assembler::LSL,
10853               $src3$$constant & 0x3f);
10854   %}
10855 
10856   ins_pipe(ialu_reg_reg_shift);
10857 %}
10858 
10859 
10860 
10861 // Shift Left followed by Shift Right.
10862 // This idiom is used by the compiler for the i2b bytecode etc.
10863 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10864 %{
10865   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
10866   // Make sure we are not going to exceed what sbfm can do.
10867   predicate((unsigned int)n->in(2)->get_int() <= 63
10868             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10869 
10870   ins_cost(INSN_COST * 2);
10871   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10872   ins_encode %{
10873     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10874     int s = 63 - lshift;
10875     int r = (rshift - lshift) & 63;
10876     __ sbfm(as_Register($dst$$reg),
10877             as_Register($src$$reg),
10878             r, s);
10879   %}
10880 
10881   ins_pipe(ialu_reg_shift);
10882 %}
10883 
10884 // Shift Left followed by Shift Right.
10885 // This idiom is used by the compiler for the i2b bytecode etc.
10886 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10887 %{
10888   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
10889   // Make sure we are not going to exceed what sbfmw can do.
10890   predicate((unsigned int)n->in(2)->get_int() <= 31
10891             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10892 
10893   ins_cost(INSN_COST * 2);
10894   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10895   ins_encode %{
10896     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10897     int s = 31 - lshift;
10898     int r = (rshift - lshift) & 31;
10899     __ sbfmw(as_Register($dst$$reg),
10900             as_Register($src$$reg),
10901             r, s);
10902   %}
10903 
10904   ins_pipe(ialu_reg_shift);
10905 %}
10906 
10907 // Shift Left followed by Shift Right.
10908 // This idiom is used by the compiler for the i2b bytecode etc.
10909 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10910 %{
10911   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
10912   // Make sure we are not going to exceed what ubfm can do.
10913   predicate((unsigned int)n->in(2)->get_int() <= 63
10914             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10915 
10916   ins_cost(INSN_COST * 2);
10917   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10918   ins_encode %{
10919     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10920     int s = 63 - lshift;
10921     int r = (rshift - lshift) & 63;
10922     __ ubfm(as_Register($dst$$reg),
10923             as_Register($src$$reg),
10924             r, s);
10925   %}
10926 
10927   ins_pipe(ialu_reg_shift);
10928 %}
10929 
10930 // Shift Left followed by Shift Right.
10931 // This idiom is used by the compiler for the i2b bytecode etc.
10932 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10933 %{
10934   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
10935   // Make sure we are not going to exceed what ubfmw can do.
10936   predicate((unsigned int)n->in(2)->get_int() <= 31
10937             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10938 
10939   ins_cost(INSN_COST * 2);
10940   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10941   ins_encode %{
10942     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10943     int s = 31 - lshift;
10944     int r = (rshift - lshift) & 31;
10945     __ ubfmw(as_Register($dst$$reg),
10946             as_Register($src$$reg),
10947             r, s);
10948   %}
10949 
10950   ins_pipe(ialu_reg_shift);
10951 %}
10952 // Bitfield extract with shift & mask
10953 
10954 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10955 %{
10956   match(Set dst (AndI (URShiftI src rshift) mask));
10957 
10958   ins_cost(INSN_COST);
10959   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
10960   ins_encode %{
10961     int rshift = $rshift$$constant;
10962     long mask = $mask$$constant;
10963     int width = exact_log2(mask+1);
10964     __ ubfxw(as_Register($dst$$reg),
10965             as_Register($src$$reg), rshift, width);
10966   %}
10967   ins_pipe(ialu_reg_shift);
10968 %}
10969 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
10970 %{
10971   match(Set dst (AndL (URShiftL src rshift) mask));
10972 
10973   ins_cost(INSN_COST);
10974   format %{ "ubfx $dst, $src, $rshift, $mask" %}
10975   ins_encode %{
10976     int rshift = $rshift$$constant;
10977     long mask = $mask$$constant;
10978     int width = exact_log2(mask+1);
10979     __ ubfx(as_Register($dst$$reg),
10980             as_Register($src$$reg), rshift, width);
10981   %}
10982   ins_pipe(ialu_reg_shift);
10983 %}
10984 
10985 // We can use ubfx when extending an And with a mask when we know mask
10986 // is positive.  We know that because immI_bitmask guarantees it.
10987 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10988 %{
10989   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
10990 
10991   ins_cost(INSN_COST * 2);
10992   format %{ "ubfx $dst, $src, $rshift, $mask" %}
10993   ins_encode %{
10994     int rshift = $rshift$$constant;
10995     long mask = $mask$$constant;
10996     int width = exact_log2(mask+1);
10997     __ ubfx(as_Register($dst$$reg),
10998             as_Register($src$$reg), rshift, width);
10999   %}
11000   ins_pipe(ialu_reg_shift);
11001 %}
11002 
11003 // We can use ubfiz when masking by a positive number and then left shifting the result.
11004 // We know that the mask is positive because immI_bitmask guarantees it.
11005 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
11006 %{
11007   match(Set dst (LShiftI (AndI src mask) lshift));
11008   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
11009     (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
11010 
11011   ins_cost(INSN_COST);
11012   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
11013   ins_encode %{
11014     int lshift = $lshift$$constant;
11015     long mask = $mask$$constant;
11016     int width = exact_log2(mask+1);
11017     __ ubfizw(as_Register($dst$$reg),
11018           as_Register($src$$reg), lshift, width);
11019   %}
11020   ins_pipe(ialu_reg_shift);
11021 %}
11022 // We can use ubfiz when masking by a positive number and then left shifting the result.
11023 // We know that the mask is positive because immL_bitmask guarantees it.
11024 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
11025 %{
11026   match(Set dst (LShiftL (AndL src mask) lshift));
11027   predicate((unsigned int)n->in(2)->get_int() <= 63 &&
11028     (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
11029 
11030   ins_cost(INSN_COST);
11031   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
11032   ins_encode %{
11033     int lshift = $lshift$$constant;
11034     long mask = $mask$$constant;
11035     int width = exact_log2(mask+1);
11036     __ ubfiz(as_Register($dst$$reg),
11037           as_Register($src$$reg), lshift, width);
11038   %}
11039   ins_pipe(ialu_reg_shift);
11040 %}
11041 
11042 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
11043 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
11044 %{
11045   match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
11046   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
11047     (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
11048 
11049   ins_cost(INSN_COST);
11050   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
11051   ins_encode %{
11052     int lshift = $lshift$$constant;
11053     long mask = $mask$$constant;
11054     int width = exact_log2(mask+1);
11055     __ ubfiz(as_Register($dst$$reg),
11056              as_Register($src$$reg), lshift, width);
11057   %}
11058   ins_pipe(ialu_reg_shift);
11059 %}
11060 
11061 // Rotations
11062 
11063 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11064 %{
11065   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11066   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11067 
11068   ins_cost(INSN_COST);
11069   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11070 
11071   ins_encode %{
11072     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11073             $rshift$$constant & 63);
11074   %}
11075   ins_pipe(ialu_reg_reg_extr);
11076 %}
11077 
11078 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11079 %{
11080   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11081   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11082 
11083   ins_cost(INSN_COST);
11084   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11085 
11086   ins_encode %{
11087     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11088             $rshift$$constant & 31);
11089   %}
11090   ins_pipe(ialu_reg_reg_extr);
11091 %}
11092 
11093 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11094 %{
11095   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11096   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11097 
11098   ins_cost(INSN_COST);
11099   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11100 
11101   ins_encode %{
11102     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11103             $rshift$$constant & 63);
11104   %}
11105   ins_pipe(ialu_reg_reg_extr);
11106 %}
11107 
11108 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11109 %{
11110   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11111   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11112 
11113   ins_cost(INSN_COST);
11114   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11115 
11116   ins_encode %{
11117     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11118             $rshift$$constant & 31);
11119   %}
11120   ins_pipe(ialu_reg_reg_extr);
11121 %}
11122 
11123 
11124 // rol expander
11125 
11126 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11127 %{
11128   effect(DEF dst, USE src, USE shift);
11129 
11130   format %{ "rol    $dst, $src, $shift" %}
11131   ins_cost(INSN_COST * 3);
11132   ins_encode %{
11133     __ subw(rscratch1, zr, as_Register($shift$$reg));
11134     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11135             rscratch1);
11136     %}
11137   ins_pipe(ialu_reg_reg_vshift);
11138 %}
11139 
11140 // rol expander
11141 
11142 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11143 %{
11144   effect(DEF dst, USE src, USE shift);
11145 
11146   format %{ "rol    $dst, $src, $shift" %}
11147   ins_cost(INSN_COST * 3);
11148   ins_encode %{
11149     __ subw(rscratch1, zr, as_Register($shift$$reg));
11150     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11151             rscratch1);
11152     %}
11153   ins_pipe(ialu_reg_reg_vshift);
11154 %}
11155 
11156 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11157 %{
11158   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
11159 
11160   expand %{
11161     rolL_rReg(dst, src, shift, cr);
11162   %}
11163 %}
11164 
11165 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11166 %{
11167   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
11168 
11169   expand %{
11170     rolL_rReg(dst, src, shift, cr);
11171   %}
11172 %}
11173 
11174 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11175 %{
11176   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
11177 
11178   expand %{
11179     rolI_rReg(dst, src, shift, cr);
11180   %}
11181 %}
11182 
11183 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11184 %{
11185   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
11186 
11187   expand %{
11188     rolI_rReg(dst, src, shift, cr);
11189   %}
11190 %}
11191 
11192 // ror expander
11193 
11194 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11195 %{
11196   effect(DEF dst, USE src, USE shift);
11197 
11198   format %{ "ror    $dst, $src, $shift" %}
11199   ins_cost(INSN_COST);
11200   ins_encode %{
11201     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11202             as_Register($shift$$reg));
11203     %}
11204   ins_pipe(ialu_reg_reg_vshift);
11205 %}
11206 
11207 // ror expander
11208 
11209 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11210 %{
11211   effect(DEF dst, USE src, USE shift);
11212 
11213   format %{ "ror    $dst, $src, $shift" %}
11214   ins_cost(INSN_COST);
11215   ins_encode %{
11216     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11217             as_Register($shift$$reg));
11218     %}
11219   ins_pipe(ialu_reg_reg_vshift);
11220 %}
11221 
11222 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11223 %{
11224   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11225 
11226   expand %{
11227     rorL_rReg(dst, src, shift, cr);
11228   %}
11229 %}
11230 
11231 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11232 %{
11233   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11234 
11235   expand %{
11236     rorL_rReg(dst, src, shift, cr);
11237   %}
11238 %}
11239 
11240 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11241 %{
11242   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11243 
11244   expand %{
11245     rorI_rReg(dst, src, shift, cr);
11246   %}
11247 %}
11248 
11249 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11250 %{
11251   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11252 
11253   expand %{
11254     rorI_rReg(dst, src, shift, cr);
11255   %}
11256 %}
11257 
11258 // Add/subtract (extended)
11259 
11260 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11261 %{
11262   match(Set dst (AddL src1 (ConvI2L src2)));
11263   ins_cost(INSN_COST);
11264   format %{ "add  $dst, $src1, $src2, sxtw" %}
11265 
11266    ins_encode %{
11267      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11268             as_Register($src2$$reg), ext::sxtw);
11269    %}
11270   ins_pipe(ialu_reg_reg);
11271 %};
11272 
11273 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11274 %{
11275   match(Set dst (SubL src1 (ConvI2L src2)));
11276   ins_cost(INSN_COST);
11277   format %{ "sub  $dst, $src1, $src2, sxtw" %}
11278 
11279    ins_encode %{
11280      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11281             as_Register($src2$$reg), ext::sxtw);
11282    %}
11283   ins_pipe(ialu_reg_reg);
11284 %};
11285 
11286 
11287 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11288 %{
11289   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11290   ins_cost(INSN_COST);
11291   format %{ "add  $dst, $src1, $src2, sxth" %}
11292 
11293    ins_encode %{
11294      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11295             as_Register($src2$$reg), ext::sxth);
11296    %}
11297   ins_pipe(ialu_reg_reg);
11298 %}
11299 
11300 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11301 %{
11302   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11303   ins_cost(INSN_COST);
11304   format %{ "add  $dst, $src1, $src2, sxtb" %}
11305 
11306    ins_encode %{
11307      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11308             as_Register($src2$$reg), ext::sxtb);
11309    %}
11310   ins_pipe(ialu_reg_reg);
11311 %}
11312 
11313 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11314 %{
11315   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11316   ins_cost(INSN_COST);
11317   format %{ "add  $dst, $src1, $src2, uxtb" %}
11318 
11319    ins_encode %{
11320      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11321             as_Register($src2$$reg), ext::uxtb);
11322    %}
11323   ins_pipe(ialu_reg_reg);
11324 %}
11325 
11326 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11327 %{
11328   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11329   ins_cost(INSN_COST);
11330   format %{ "add  $dst, $src1, $src2, sxth" %}
11331 
11332    ins_encode %{
11333      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11334             as_Register($src2$$reg), ext::sxth);
11335    %}
11336   ins_pipe(ialu_reg_reg);
11337 %}
11338 
11339 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11340 %{
11341   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11342   ins_cost(INSN_COST);
11343   format %{ "add  $dst, $src1, $src2, sxtw" %}
11344 
11345    ins_encode %{
11346      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11347             as_Register($src2$$reg), ext::sxtw);
11348    %}
11349   ins_pipe(ialu_reg_reg);
11350 %}
11351 
11352 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11353 %{
11354   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11355   ins_cost(INSN_COST);
11356   format %{ "add  $dst, $src1, $src2, sxtb" %}
11357 
11358    ins_encode %{
11359      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11360             as_Register($src2$$reg), ext::sxtb);
11361    %}
11362   ins_pipe(ialu_reg_reg);
11363 %}
11364 
11365 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11366 %{
11367   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11368   ins_cost(INSN_COST);
11369   format %{ "add  $dst, $src1, $src2, uxtb" %}
11370 
11371    ins_encode %{
11372      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11373             as_Register($src2$$reg), ext::uxtb);
11374    %}
11375   ins_pipe(ialu_reg_reg);
11376 %}
11377 
11378 
11379 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11380 %{
11381   match(Set dst (AddI src1 (AndI src2 mask)));
11382   ins_cost(INSN_COST);
11383   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11384 
11385    ins_encode %{
11386      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11387             as_Register($src2$$reg), ext::uxtb);
11388    %}
11389   ins_pipe(ialu_reg_reg);
11390 %}
11391 
11392 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11393 %{
11394   match(Set dst (AddI src1 (AndI src2 mask)));
11395   ins_cost(INSN_COST);
11396   format %{ "addw  $dst, $src1, $src2, uxth" %}
11397 
11398    ins_encode %{
11399      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11400             as_Register($src2$$reg), ext::uxth);
11401    %}
11402   ins_pipe(ialu_reg_reg);
11403 %}
11404 
11405 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11406 %{
11407   match(Set dst (AddL src1 (AndL src2 mask)));
11408   ins_cost(INSN_COST);
11409   format %{ "add  $dst, $src1, $src2, uxtb" %}
11410 
11411    ins_encode %{
11412      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11413             as_Register($src2$$reg), ext::uxtb);
11414    %}
11415   ins_pipe(ialu_reg_reg);
11416 %}
11417 
11418 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11419 %{
11420   match(Set dst (AddL src1 (AndL src2 mask)));
11421   ins_cost(INSN_COST);
11422   format %{ "add  $dst, $src1, $src2, uxth" %}
11423 
11424    ins_encode %{
11425      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11426             as_Register($src2$$reg), ext::uxth);
11427    %}
11428   ins_pipe(ialu_reg_reg);
11429 %}
11430 
11431 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11432 %{
11433   match(Set dst (AddL src1 (AndL src2 mask)));
11434   ins_cost(INSN_COST);
11435   format %{ "add  $dst, $src1, $src2, uxtw" %}
11436 
11437    ins_encode %{
11438      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11439             as_Register($src2$$reg), ext::uxtw);
11440    %}
11441   ins_pipe(ialu_reg_reg);
11442 %}
11443 
11444 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11445 %{
11446   match(Set dst (SubI src1 (AndI src2 mask)));
11447   ins_cost(INSN_COST);
11448   format %{ "subw  $dst, $src1, $src2, uxtb" %}
11449 
11450    ins_encode %{
11451      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11452             as_Register($src2$$reg), ext::uxtb);
11453    %}
11454   ins_pipe(ialu_reg_reg);
11455 %}
11456 
11457 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11458 %{
11459   match(Set dst (SubI src1 (AndI src2 mask)));
11460   ins_cost(INSN_COST);
11461   format %{ "subw  $dst, $src1, $src2, uxth" %}
11462 
11463    ins_encode %{
11464      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11465             as_Register($src2$$reg), ext::uxth);
11466    %}
11467   ins_pipe(ialu_reg_reg);
11468 %}
11469 
11470 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11471 %{
11472   match(Set dst (SubL src1 (AndL src2 mask)));
11473   ins_cost(INSN_COST);
11474   format %{ "sub  $dst, $src1, $src2, uxtb" %}
11475 
11476    ins_encode %{
11477      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11478             as_Register($src2$$reg), ext::uxtb);
11479    %}
11480   ins_pipe(ialu_reg_reg);
11481 %}
11482 
11483 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11484 %{
11485   match(Set dst (SubL src1 (AndL src2 mask)));
11486   ins_cost(INSN_COST);
11487   format %{ "sub  $dst, $src1, $src2, uxth" %}
11488 
11489    ins_encode %{
11490      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11491             as_Register($src2$$reg), ext::uxth);
11492    %}
11493   ins_pipe(ialu_reg_reg);
11494 %}
11495 
11496 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11497 %{
11498   match(Set dst (SubL src1 (AndL src2 mask)));
11499   ins_cost(INSN_COST);
11500   format %{ "sub  $dst, $src1, $src2, uxtw" %}
11501 
11502    ins_encode %{
11503      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11504             as_Register($src2$$reg), ext::uxtw);
11505    %}
11506   ins_pipe(ialu_reg_reg);
11507 %}
11508 
11509 
11510 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
11511 %{
11512   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11513   ins_cost(1.9 * INSN_COST);
11514   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
11515 
11516    ins_encode %{
11517      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11518             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11519    %}
11520   ins_pipe(ialu_reg_reg_shift);
11521 %}
11522 
11523 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
11524 %{
11525   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11526   ins_cost(1.9 * INSN_COST);
11527   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
11528 
11529    ins_encode %{
11530      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11531             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
11532    %}
11533   ins_pipe(ialu_reg_reg_shift);
11534 %}
11535 
11536 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
11537 %{
11538   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11539   ins_cost(1.9 * INSN_COST);
11540   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
11541 
11542    ins_encode %{
11543      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11544             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
11545    %}
11546   ins_pipe(ialu_reg_reg_shift);
11547 %}
11548 
11549 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
11550 %{
11551   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11552   ins_cost(1.9 * INSN_COST);
11553   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
11554 
11555    ins_encode %{
11556      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11557             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11558    %}
11559   ins_pipe(ialu_reg_reg_shift);
11560 %}
11561 
11562 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
11563 %{
11564   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11565   ins_cost(1.9 * INSN_COST);
11566   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
11567 
11568    ins_encode %{
11569      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11570             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
11571    %}
11572   ins_pipe(ialu_reg_reg_shift);
11573 %}
11574 
11575 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
11576 %{
11577   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11578   ins_cost(1.9 * INSN_COST);
11579   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
11580 
11581    ins_encode %{
11582      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11583             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
11584    %}
11585   ins_pipe(ialu_reg_reg_shift);
11586 %}
11587 
11588 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
11589 %{
11590   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
11591   ins_cost(1.9 * INSN_COST);
11592   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
11593 
11594    ins_encode %{
11595      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11596             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11597    %}
11598   ins_pipe(ialu_reg_reg_shift);
11599 %}
11600 
11601 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
11602 %{
11603   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
11604   ins_cost(1.9 * INSN_COST);
11605   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
11606 
11607    ins_encode %{
11608      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11609             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
11610    %}
11611   ins_pipe(ialu_reg_reg_shift);
11612 %}
11613 
11614 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
11615 %{
11616   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
11617   ins_cost(1.9 * INSN_COST);
11618   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
11619 
11620    ins_encode %{
11621      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11622             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11623    %}
11624   ins_pipe(ialu_reg_reg_shift);
11625 %}
11626 
11627 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
11628 %{
11629   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
11630   ins_cost(1.9 * INSN_COST);
11631   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
11632 
11633    ins_encode %{
11634      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11635             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
11636    %}
11637   ins_pipe(ialu_reg_reg_shift);
11638 %}
11639 
11640 
11641 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
11642 %{
11643   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
11644   ins_cost(1.9 * INSN_COST);
11645   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
11646 
11647    ins_encode %{
11648      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11649             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
11650    %}
11651   ins_pipe(ialu_reg_reg_shift);
11652 %};
11653 
11654 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
11655 %{
11656   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
11657   ins_cost(1.9 * INSN_COST);
11658   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
11659 
11660    ins_encode %{
11661      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11662             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
11663    %}
11664   ins_pipe(ialu_reg_reg_shift);
11665 %};
11666 
11667 
11668 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
11669 %{
11670   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
11671   ins_cost(1.9 * INSN_COST);
11672   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
11673 
11674    ins_encode %{
11675      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11676             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
11677    %}
11678   ins_pipe(ialu_reg_reg_shift);
11679 %}
11680 
11681 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
11682 %{
11683   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
11684   ins_cost(1.9 * INSN_COST);
11685   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
11686 
11687    ins_encode %{
11688      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11689             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
11690    %}
11691   ins_pipe(ialu_reg_reg_shift);
11692 %}
11693 
11694 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
11695 %{
11696   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
11697   ins_cost(1.9 * INSN_COST);
11698   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
11699 
11700    ins_encode %{
11701      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11702             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
11703    %}
11704   ins_pipe(ialu_reg_reg_shift);
11705 %}
11706 
11707 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
11708 %{
11709   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
11710   ins_cost(1.9 * INSN_COST);
11711   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
11712 
11713    ins_encode %{
11714      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11715             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
11716    %}
11717   ins_pipe(ialu_reg_reg_shift);
11718 %}
11719 
11720 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
11721 %{
11722   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
11723   ins_cost(1.9 * INSN_COST);
11724   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
11725 
11726    ins_encode %{
11727      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11728             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
11729    %}
11730   ins_pipe(ialu_reg_reg_shift);
11731 %}
11732 
11733 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
11734 %{
11735   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
11736   ins_cost(1.9 * INSN_COST);
11737   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
11738 
11739    ins_encode %{
11740      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11741             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
11742    %}
11743   ins_pipe(ialu_reg_reg_shift);
11744 %}
11745 
11746 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
11747 %{
11748   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
11749   ins_cost(1.9 * INSN_COST);
11750   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
11751 
11752    ins_encode %{
11753      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11754             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
11755    %}
11756   ins_pipe(ialu_reg_reg_shift);
11757 %}
11758 
11759 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
11760 %{
11761   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
11762   ins_cost(1.9 * INSN_COST);
11763   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
11764 
11765    ins_encode %{
11766      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11767             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
11768    %}
11769   ins_pipe(ialu_reg_reg_shift);
11770 %}
11771 
11772 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
11773 %{
11774   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
11775   ins_cost(1.9 * INSN_COST);
11776   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
11777 
11778    ins_encode %{
11779      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11780             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
11781    %}
11782   ins_pipe(ialu_reg_reg_shift);
11783 %}
11784 
11785 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
11786 %{
11787   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
11788   ins_cost(1.9 * INSN_COST);
11789   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
11790 
11791    ins_encode %{
11792      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11793             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
11794    %}
11795   ins_pipe(ialu_reg_reg_shift);
11796 %}
11797 // END This section of the file is automatically generated. Do not edit --------------
11798 
11799 // ============================================================================
11800 // Floating Point Arithmetic Instructions
11801 
11802 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11803   match(Set dst (AddF src1 src2));
11804 
11805   ins_cost(INSN_COST * 5);
11806   format %{ "fadds   $dst, $src1, $src2" %}
11807 
11808   ins_encode %{
11809     __ fadds(as_FloatRegister($dst$$reg),
11810              as_FloatRegister($src1$$reg),
11811              as_FloatRegister($src2$$reg));
11812   %}
11813 
11814   ins_pipe(fp_dop_reg_reg_s);
11815 %}
11816 
11817 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11818   match(Set dst (AddD src1 src2));
11819 
11820   ins_cost(INSN_COST * 5);
11821   format %{ "faddd   $dst, $src1, $src2" %}
11822 
11823   ins_encode %{
11824     __ faddd(as_FloatRegister($dst$$reg),
11825              as_FloatRegister($src1$$reg),
11826              as_FloatRegister($src2$$reg));
11827   %}
11828 
11829   ins_pipe(fp_dop_reg_reg_d);
11830 %}
11831 
11832 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11833   match(Set dst (SubF src1 src2));
11834 
11835   ins_cost(INSN_COST * 5);
11836   format %{ "fsubs   $dst, $src1, $src2" %}
11837 
11838   ins_encode %{
11839     __ fsubs(as_FloatRegister($dst$$reg),
11840              as_FloatRegister($src1$$reg),
11841              as_FloatRegister($src2$$reg));
11842   %}
11843 
11844   ins_pipe(fp_dop_reg_reg_s);
11845 %}
11846 
11847 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11848   match(Set dst (SubD src1 src2));
11849 
11850   ins_cost(INSN_COST * 5);
11851   format %{ "fsubd   $dst, $src1, $src2" %}
11852 
11853   ins_encode %{
11854     __ fsubd(as_FloatRegister($dst$$reg),
11855              as_FloatRegister($src1$$reg),
11856              as_FloatRegister($src2$$reg));
11857   %}
11858 
11859   ins_pipe(fp_dop_reg_reg_d);
11860 %}
11861 
11862 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11863   match(Set dst (MulF src1 src2));
11864 
11865   ins_cost(INSN_COST * 6);
11866   format %{ "fmuls   $dst, $src1, $src2" %}
11867 
11868   ins_encode %{
11869     __ fmuls(as_FloatRegister($dst$$reg),
11870              as_FloatRegister($src1$$reg),
11871              as_FloatRegister($src2$$reg));
11872   %}
11873 
11874   ins_pipe(fp_dop_reg_reg_s);
11875 %}
11876 
11877 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11878   match(Set dst (MulD src1 src2));
11879 
11880   ins_cost(INSN_COST * 6);
11881   format %{ "fmuld   $dst, $src1, $src2" %}
11882 
11883   ins_encode %{
11884     __ fmuld(as_FloatRegister($dst$$reg),
11885              as_FloatRegister($src1$$reg),
11886              as_FloatRegister($src2$$reg));
11887   %}
11888 
11889   ins_pipe(fp_dop_reg_reg_d);
11890 %}
11891 
11892 // src1 * src2 + src3
11893 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11894   predicate(UseFMA);
11895   match(Set dst (FmaF src3 (Binary src1 src2)));
11896 
11897   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
11898 
11899   ins_encode %{
11900     __ fmadds(as_FloatRegister($dst$$reg),
11901              as_FloatRegister($src1$$reg),
11902              as_FloatRegister($src2$$reg),
11903              as_FloatRegister($src3$$reg));
11904   %}
11905 
11906   ins_pipe(pipe_class_default);
11907 %}
11908 
11909 // src1 * src2 + src3
11910 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11911   predicate(UseFMA);
11912   match(Set dst (FmaD src3 (Binary src1 src2)));
11913 
11914   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
11915 
11916   ins_encode %{
11917     __ fmaddd(as_FloatRegister($dst$$reg),
11918              as_FloatRegister($src1$$reg),
11919              as_FloatRegister($src2$$reg),
11920              as_FloatRegister($src3$$reg));
11921   %}
11922 
11923   ins_pipe(pipe_class_default);
11924 %}
11925 
11926 // -src1 * src2 + src3
11927 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11928   predicate(UseFMA);
11929   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
11930   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
11931 
11932   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
11933 
11934   ins_encode %{
11935     __ fmsubs(as_FloatRegister($dst$$reg),
11936               as_FloatRegister($src1$$reg),
11937               as_FloatRegister($src2$$reg),
11938               as_FloatRegister($src3$$reg));
11939   %}
11940 
11941   ins_pipe(pipe_class_default);
11942 %}
11943 
11944 // -src1 * src2 + src3
11945 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11946   predicate(UseFMA);
11947   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
11948   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
11949 
11950   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
11951 
11952   ins_encode %{
11953     __ fmsubd(as_FloatRegister($dst$$reg),
11954               as_FloatRegister($src1$$reg),
11955               as_FloatRegister($src2$$reg),
11956               as_FloatRegister($src3$$reg));
11957   %}
11958 
11959   ins_pipe(pipe_class_default);
11960 %}
11961 
11962 // -src1 * src2 - src3
11963 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11964   predicate(UseFMA);
11965   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
11966   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
11967 
11968   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
11969 
11970   ins_encode %{
11971     __ fnmadds(as_FloatRegister($dst$$reg),
11972                as_FloatRegister($src1$$reg),
11973                as_FloatRegister($src2$$reg),
11974                as_FloatRegister($src3$$reg));
11975   %}
11976 
11977   ins_pipe(pipe_class_default);
11978 %}
11979 
11980 // -src1 * src2 - src3
11981 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11982   predicate(UseFMA);
11983   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
11984   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
11985 
11986   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
11987 
11988   ins_encode %{
11989     __ fnmaddd(as_FloatRegister($dst$$reg),
11990                as_FloatRegister($src1$$reg),
11991                as_FloatRegister($src2$$reg),
11992                as_FloatRegister($src3$$reg));
11993   %}
11994 
11995   ins_pipe(pipe_class_default);
11996 %}
11997 
11998 // src1 * src2 - src3
11999 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
12000   predicate(UseFMA);
12001   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
12002 
12003   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
12004 
12005   ins_encode %{
12006     __ fnmsubs(as_FloatRegister($dst$$reg),
12007                as_FloatRegister($src1$$reg),
12008                as_FloatRegister($src2$$reg),
12009                as_FloatRegister($src3$$reg));
12010   %}
12011 
12012   ins_pipe(pipe_class_default);
12013 %}
12014 
12015 // src1 * src2 - src3
12016 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
12017   predicate(UseFMA);
12018   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
12019 
12020   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
12021 
12022   ins_encode %{
12023   // n.b. insn name should be fnmsubd
12024     __ fnmsub(as_FloatRegister($dst$$reg),
12025               as_FloatRegister($src1$$reg),
12026               as_FloatRegister($src2$$reg),
12027               as_FloatRegister($src3$$reg));
12028   %}
12029 
12030   ins_pipe(pipe_class_default);
12031 %}
12032 
12033 
12034 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12035   match(Set dst (DivF src1  src2));
12036 
12037   ins_cost(INSN_COST * 18);
12038   format %{ "fdivs   $dst, $src1, $src2" %}
12039 
12040   ins_encode %{
12041     __ fdivs(as_FloatRegister($dst$$reg),
12042              as_FloatRegister($src1$$reg),
12043              as_FloatRegister($src2$$reg));
12044   %}
12045 
12046   ins_pipe(fp_div_s);
12047 %}
12048 
12049 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12050   match(Set dst (DivD src1  src2));
12051 
12052   ins_cost(INSN_COST * 32);
12053   format %{ "fdivd   $dst, $src1, $src2" %}
12054 
12055   ins_encode %{
12056     __ fdivd(as_FloatRegister($dst$$reg),
12057              as_FloatRegister($src1$$reg),
12058              as_FloatRegister($src2$$reg));
12059   %}
12060 
12061   ins_pipe(fp_div_d);
12062 %}
12063 
12064 instruct negF_reg_reg(vRegF dst, vRegF src) %{
12065   match(Set dst (NegF src));
12066 
12067   ins_cost(INSN_COST * 3);
12068   format %{ "fneg   $dst, $src" %}
12069 
12070   ins_encode %{
12071     __ fnegs(as_FloatRegister($dst$$reg),
12072              as_FloatRegister($src$$reg));
12073   %}
12074 
12075   ins_pipe(fp_uop_s);
12076 %}
12077 
12078 instruct negD_reg_reg(vRegD dst, vRegD src) %{
12079   match(Set dst (NegD src));
12080 
12081   ins_cost(INSN_COST * 3);
12082   format %{ "fnegd   $dst, $src" %}
12083 
12084   ins_encode %{
12085     __ fnegd(as_FloatRegister($dst$$reg),
12086              as_FloatRegister($src$$reg));
12087   %}
12088 
12089   ins_pipe(fp_uop_d);
12090 %}
12091 
12092 instruct absF_reg(vRegF dst, vRegF src) %{
12093   match(Set dst (AbsF src));
12094 
12095   ins_cost(INSN_COST * 3);
12096   format %{ "fabss   $dst, $src" %}
12097   ins_encode %{
12098     __ fabss(as_FloatRegister($dst$$reg),
12099              as_FloatRegister($src$$reg));
12100   %}
12101 
12102   ins_pipe(fp_uop_s);
12103 %}
12104 
12105 instruct absD_reg(vRegD dst, vRegD src) %{
12106   match(Set dst (AbsD src));
12107 
12108   ins_cost(INSN_COST * 3);
12109   format %{ "fabsd   $dst, $src" %}
12110   ins_encode %{
12111     __ fabsd(as_FloatRegister($dst$$reg),
12112              as_FloatRegister($src$$reg));
12113   %}
12114 
12115   ins_pipe(fp_uop_d);
12116 %}
12117 
12118 instruct sqrtD_reg(vRegD dst, vRegD src) %{
12119   match(Set dst (SqrtD src));
12120 
12121   ins_cost(INSN_COST * 50);
12122   format %{ "fsqrtd  $dst, $src" %}
12123   ins_encode %{
12124     __ fsqrtd(as_FloatRegister($dst$$reg),
12125              as_FloatRegister($src$$reg));
12126   %}
12127 
12128   ins_pipe(fp_div_s);
12129 %}
12130 
12131 instruct sqrtF_reg(vRegF dst, vRegF src) %{
12132   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
12133 
12134   ins_cost(INSN_COST * 50);
12135   format %{ "fsqrts  $dst, $src" %}
12136   ins_encode %{
12137     __ fsqrts(as_FloatRegister($dst$$reg),
12138              as_FloatRegister($src$$reg));
12139   %}
12140 
12141   ins_pipe(fp_div_d);
12142 %}
12143 
12144 // ============================================================================
12145 // Logical Instructions
12146 
12147 // Integer Logical Instructions
12148 
12149 // And Instructions
12150 
12151 
12152 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
12153   match(Set dst (AndI src1 src2));
12154 
12155   format %{ "andw  $dst, $src1, $src2\t# int" %}
12156 
12157   ins_cost(INSN_COST);
12158   ins_encode %{
12159     __ andw(as_Register($dst$$reg),
12160             as_Register($src1$$reg),
12161             as_Register($src2$$reg));
12162   %}
12163 
12164   ins_pipe(ialu_reg_reg);
12165 %}
12166 
12167 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
12168   match(Set dst (AndI src1 src2));
12169 
12170   format %{ "andsw  $dst, $src1, $src2\t# int" %}
12171 
12172   ins_cost(INSN_COST);
12173   ins_encode %{
12174     __ andw(as_Register($dst$$reg),
12175             as_Register($src1$$reg),
12176             (unsigned long)($src2$$constant));
12177   %}
12178 
12179   ins_pipe(ialu_reg_imm);
12180 %}
12181 
12182 // Or Instructions
12183 
12184 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12185   match(Set dst (OrI src1 src2));
12186 
12187   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12188 
12189   ins_cost(INSN_COST);
12190   ins_encode %{
12191     __ orrw(as_Register($dst$$reg),
12192             as_Register($src1$$reg),
12193             as_Register($src2$$reg));
12194   %}
12195 
12196   ins_pipe(ialu_reg_reg);
12197 %}
12198 
12199 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12200   match(Set dst (OrI src1 src2));
12201 
12202   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12203 
12204   ins_cost(INSN_COST);
12205   ins_encode %{
12206     __ orrw(as_Register($dst$$reg),
12207             as_Register($src1$$reg),
12208             (unsigned long)($src2$$constant));
12209   %}
12210 
12211   ins_pipe(ialu_reg_imm);
12212 %}
12213 
12214 // Xor Instructions
12215 
12216 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12217   match(Set dst (XorI src1 src2));
12218 
12219   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12220 
12221   ins_cost(INSN_COST);
12222   ins_encode %{
12223     __ eorw(as_Register($dst$$reg),
12224             as_Register($src1$$reg),
12225             as_Register($src2$$reg));
12226   %}
12227 
12228   ins_pipe(ialu_reg_reg);
12229 %}
12230 
12231 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12232   match(Set dst (XorI src1 src2));
12233 
12234   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12235 
12236   ins_cost(INSN_COST);
12237   ins_encode %{
12238     __ eorw(as_Register($dst$$reg),
12239             as_Register($src1$$reg),
12240             (unsigned long)($src2$$constant));
12241   %}
12242 
12243   ins_pipe(ialu_reg_imm);
12244 %}
12245 
12246 // Long Logical Instructions
12247 // TODO
12248 
12249 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
12250   match(Set dst (AndL src1 src2));
12251 
12252   format %{ "and  $dst, $src1, $src2\t# int" %}
12253 
12254   ins_cost(INSN_COST);
12255   ins_encode %{
12256     __ andr(as_Register($dst$$reg),
12257             as_Register($src1$$reg),
12258             as_Register($src2$$reg));
12259   %}
12260 
12261   ins_pipe(ialu_reg_reg);
12262 %}
12263 
12264 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
12265   match(Set dst (AndL src1 src2));
12266 
12267   format %{ "and  $dst, $src1, $src2\t# int" %}
12268 
12269   ins_cost(INSN_COST);
12270   ins_encode %{
12271     __ andr(as_Register($dst$$reg),
12272             as_Register($src1$$reg),
12273             (unsigned long)($src2$$constant));
12274   %}
12275 
12276   ins_pipe(ialu_reg_imm);
12277 %}
12278 
12279 // Or Instructions
12280 
12281 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12282   match(Set dst (OrL src1 src2));
12283 
12284   format %{ "orr  $dst, $src1, $src2\t# int" %}
12285 
12286   ins_cost(INSN_COST);
12287   ins_encode %{
12288     __ orr(as_Register($dst$$reg),
12289            as_Register($src1$$reg),
12290            as_Register($src2$$reg));
12291   %}
12292 
12293   ins_pipe(ialu_reg_reg);
12294 %}
12295 
12296 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12297   match(Set dst (OrL src1 src2));
12298 
12299   format %{ "orr  $dst, $src1, $src2\t# int" %}
12300 
12301   ins_cost(INSN_COST);
12302   ins_encode %{
12303     __ orr(as_Register($dst$$reg),
12304            as_Register($src1$$reg),
12305            (unsigned long)($src2$$constant));
12306   %}
12307 
12308   ins_pipe(ialu_reg_imm);
12309 %}
12310 
12311 // Xor Instructions
12312 
12313 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12314   match(Set dst (XorL src1 src2));
12315 
12316   format %{ "eor  $dst, $src1, $src2\t# int" %}
12317 
12318   ins_cost(INSN_COST);
12319   ins_encode %{
12320     __ eor(as_Register($dst$$reg),
12321            as_Register($src1$$reg),
12322            as_Register($src2$$reg));
12323   %}
12324 
12325   ins_pipe(ialu_reg_reg);
12326 %}
12327 
12328 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12329   match(Set dst (XorL src1 src2));
12330 
12331   ins_cost(INSN_COST);
12332   format %{ "eor  $dst, $src1, $src2\t# int" %}
12333 
12334   ins_encode %{
12335     __ eor(as_Register($dst$$reg),
12336            as_Register($src1$$reg),
12337            (unsigned long)($src2$$constant));
12338   %}
12339 
12340   ins_pipe(ialu_reg_imm);
12341 %}
12342 
12343 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
12344 %{
12345   match(Set dst (ConvI2L src));
12346 
12347   ins_cost(INSN_COST);
12348   format %{ "sxtw  $dst, $src\t# i2l" %}
12349   ins_encode %{
12350     __ sbfm($dst$$Register, $src$$Register, 0, 31);
12351   %}
12352   ins_pipe(ialu_reg_shift);
12353 %}
12354 
12355 // this pattern occurs in bigmath arithmetic
12356 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
12357 %{
12358   match(Set dst (AndL (ConvI2L src) mask));
12359 
12360   ins_cost(INSN_COST);
12361   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
12362   ins_encode %{
12363     __ ubfm($dst$$Register, $src$$Register, 0, 31);
12364   %}
12365 
12366   ins_pipe(ialu_reg_shift);
12367 %}
12368 
12369 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
12370   match(Set dst (ConvL2I src));
12371 
12372   ins_cost(INSN_COST);
12373   format %{ "movw  $dst, $src \t// l2i" %}
12374 
12375   ins_encode %{
12376     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
12377   %}
12378 
12379   ins_pipe(ialu_reg);
12380 %}
12381 
12382 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
12383 %{
12384   match(Set dst (Conv2B src));
12385   effect(KILL cr);
12386 
12387   format %{
12388     "cmpw $src, zr\n\t"
12389     "cset $dst, ne"
12390   %}
12391 
12392   ins_encode %{
12393     __ cmpw(as_Register($src$$reg), zr);
12394     __ cset(as_Register($dst$$reg), Assembler::NE);
12395   %}
12396 
12397   ins_pipe(ialu_reg);
12398 %}
12399 
12400 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
12401 %{
12402   match(Set dst (Conv2B src));
12403   effect(KILL cr);
12404 
12405   format %{
12406     "cmp  $src, zr\n\t"
12407     "cset $dst, ne"
12408   %}
12409 
12410   ins_encode %{
12411     __ cmp(as_Register($src$$reg), zr);
12412     __ cset(as_Register($dst$$reg), Assembler::NE);
12413   %}
12414 
12415   ins_pipe(ialu_reg);
12416 %}
12417 
12418 instruct convD2F_reg(vRegF dst, vRegD src) %{
12419   match(Set dst (ConvD2F src));
12420 
12421   ins_cost(INSN_COST * 5);
12422   format %{ "fcvtd  $dst, $src \t// d2f" %}
12423 
12424   ins_encode %{
12425     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12426   %}
12427 
12428   ins_pipe(fp_d2f);
12429 %}
12430 
12431 instruct convF2D_reg(vRegD dst, vRegF src) %{
12432   match(Set dst (ConvF2D src));
12433 
12434   ins_cost(INSN_COST * 5);
12435   format %{ "fcvts  $dst, $src \t// f2d" %}
12436 
12437   ins_encode %{
12438     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12439   %}
12440 
12441   ins_pipe(fp_f2d);
12442 %}
12443 
12444 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12445   match(Set dst (ConvF2I src));
12446 
12447   ins_cost(INSN_COST * 5);
12448   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
12449 
12450   ins_encode %{
12451     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12452   %}
12453 
12454   ins_pipe(fp_f2i);
12455 %}
12456 
12457 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
12458   match(Set dst (ConvF2L src));
12459 
12460   ins_cost(INSN_COST * 5);
12461   format %{ "fcvtzs  $dst, $src \t// f2l" %}
12462 
12463   ins_encode %{
12464     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12465   %}
12466 
12467   ins_pipe(fp_f2l);
12468 %}
12469 
12470 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
12471   match(Set dst (ConvI2F src));
12472 
12473   ins_cost(INSN_COST * 5);
12474   format %{ "scvtfws  $dst, $src \t// i2f" %}
12475 
12476   ins_encode %{
12477     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12478   %}
12479 
12480   ins_pipe(fp_i2f);
12481 %}
12482 
12483 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
12484   match(Set dst (ConvL2F src));
12485 
12486   ins_cost(INSN_COST * 5);
12487   format %{ "scvtfs  $dst, $src \t// l2f" %}
12488 
12489   ins_encode %{
12490     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12491   %}
12492 
12493   ins_pipe(fp_l2f);
12494 %}
12495 
12496 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
12497   match(Set dst (ConvD2I src));
12498 
12499   ins_cost(INSN_COST * 5);
12500   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
12501 
12502   ins_encode %{
12503     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12504   %}
12505 
12506   ins_pipe(fp_d2i);
12507 %}
12508 
12509 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12510   match(Set dst (ConvD2L src));
12511 
12512   ins_cost(INSN_COST * 5);
12513   format %{ "fcvtzd  $dst, $src \t// d2l" %}
12514 
12515   ins_encode %{
12516     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12517   %}
12518 
12519   ins_pipe(fp_d2l);
12520 %}
12521 
12522 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
12523   match(Set dst (ConvI2D src));
12524 
12525   ins_cost(INSN_COST * 5);
12526   format %{ "scvtfwd  $dst, $src \t// i2d" %}
12527 
12528   ins_encode %{
12529     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12530   %}
12531 
12532   ins_pipe(fp_i2d);
12533 %}
12534 
12535 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
12536   match(Set dst (ConvL2D src));
12537 
12538   ins_cost(INSN_COST * 5);
12539   format %{ "scvtfd  $dst, $src \t// l2d" %}
12540 
12541   ins_encode %{
12542     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12543   %}
12544 
12545   ins_pipe(fp_l2d);
12546 %}
12547 
12548 // stack <-> reg and reg <-> reg shuffles with no conversion
12549 
12550 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
12551 
12552   match(Set dst (MoveF2I src));
12553 
12554   effect(DEF dst, USE src);
12555 
12556   ins_cost(4 * INSN_COST);
12557 
12558   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
12559 
12560   ins_encode %{
12561     __ ldrw($dst$$Register, Address(sp, $src$$disp));
12562   %}
12563 
12564   ins_pipe(iload_reg_reg);
12565 
12566 %}
12567 
12568 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
12569 
12570   match(Set dst (MoveI2F src));
12571 
12572   effect(DEF dst, USE src);
12573 
12574   ins_cost(4 * INSN_COST);
12575 
12576   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
12577 
12578   ins_encode %{
12579     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12580   %}
12581 
12582   ins_pipe(pipe_class_memory);
12583 
12584 %}
12585 
12586 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
12587 
12588   match(Set dst (MoveD2L src));
12589 
12590   effect(DEF dst, USE src);
12591 
12592   ins_cost(4 * INSN_COST);
12593 
12594   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
12595 
12596   ins_encode %{
12597     __ ldr($dst$$Register, Address(sp, $src$$disp));
12598   %}
12599 
12600   ins_pipe(iload_reg_reg);
12601 
12602 %}
12603 
12604 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
12605 
12606   match(Set dst (MoveL2D src));
12607 
12608   effect(DEF dst, USE src);
12609 
12610   ins_cost(4 * INSN_COST);
12611 
12612   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
12613 
12614   ins_encode %{
12615     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12616   %}
12617 
12618   ins_pipe(pipe_class_memory);
12619 
12620 %}
12621 
12622 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
12623 
12624   match(Set dst (MoveF2I src));
12625 
12626   effect(DEF dst, USE src);
12627 
12628   ins_cost(INSN_COST);
12629 
12630   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
12631 
12632   ins_encode %{
12633     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12634   %}
12635 
12636   ins_pipe(pipe_class_memory);
12637 
12638 %}
12639 
12640 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
12641 
12642   match(Set dst (MoveI2F src));
12643 
12644   effect(DEF dst, USE src);
12645 
12646   ins_cost(INSN_COST);
12647 
12648   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
12649 
12650   ins_encode %{
12651     __ strw($src$$Register, Address(sp, $dst$$disp));
12652   %}
12653 
12654   ins_pipe(istore_reg_reg);
12655 
12656 %}
12657 
12658 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
12659 
12660   match(Set dst (MoveD2L src));
12661 
12662   effect(DEF dst, USE src);
12663 
12664   ins_cost(INSN_COST);
12665 
12666   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
12667 
12668   ins_encode %{
12669     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12670   %}
12671 
12672   ins_pipe(pipe_class_memory);
12673 
12674 %}
12675 
12676 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
12677 
12678   match(Set dst (MoveL2D src));
12679 
12680   effect(DEF dst, USE src);
12681 
12682   ins_cost(INSN_COST);
12683 
12684   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
12685 
12686   ins_encode %{
12687     __ str($src$$Register, Address(sp, $dst$$disp));
12688   %}
12689 
12690   ins_pipe(istore_reg_reg);
12691 
12692 %}
12693 
12694 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12695 
12696   match(Set dst (MoveF2I src));
12697 
12698   effect(DEF dst, USE src);
12699 
12700   ins_cost(INSN_COST);
12701 
12702   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
12703 
12704   ins_encode %{
12705     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
12706   %}
12707 
12708   ins_pipe(fp_f2i);
12709 
12710 %}
12711 
12712 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
12713 
12714   match(Set dst (MoveI2F src));
12715 
12716   effect(DEF dst, USE src);
12717 
12718   ins_cost(INSN_COST);
12719 
12720   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
12721 
12722   ins_encode %{
12723     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
12724   %}
12725 
12726   ins_pipe(fp_i2f);
12727 
12728 %}
12729 
12730 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12731 
12732   match(Set dst (MoveD2L src));
12733 
12734   effect(DEF dst, USE src);
12735 
12736   ins_cost(INSN_COST);
12737 
12738   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
12739 
12740   ins_encode %{
12741     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
12742   %}
12743 
12744   ins_pipe(fp_d2l);
12745 
12746 %}
12747 
12748 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
12749 
12750   match(Set dst (MoveL2D src));
12751 
12752   effect(DEF dst, USE src);
12753 
12754   ins_cost(INSN_COST);
12755 
12756   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
12757 
12758   ins_encode %{
12759     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
12760   %}
12761 
12762   ins_pipe(fp_l2d);
12763 
12764 %}
12765 
12766 // ============================================================================
12767 // clearing of an array
12768 
12769 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
12770 %{
12771   match(Set dummy (ClearArray cnt base));
12772   effect(USE_KILL cnt, USE_KILL base);
12773 
12774   ins_cost(4 * INSN_COST);
12775   format %{ "ClearArray $cnt, $base" %}
12776 
12777   ins_encode %{
12778     __ zero_words($base$$Register, $cnt$$Register);
12779   %}
12780 
12781   ins_pipe(pipe_class_memory);
12782 %}
12783 
12784 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
12785 %{
12786   predicate((u_int64_t)n->in(2)->get_long()
12787             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
12788   match(Set dummy (ClearArray cnt base));
12789   effect(USE_KILL base);
12790 
12791   ins_cost(4 * INSN_COST);
12792   format %{ "ClearArray $cnt, $base" %}
12793 
12794   ins_encode %{
12795     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
12796   %}
12797 
12798   ins_pipe(pipe_class_memory);
12799 %}
12800 
12801 // ============================================================================
12802 // Overflow Math Instructions
12803 
12804 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12805 %{
12806   match(Set cr (OverflowAddI op1 op2));
12807 
12808   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12809   ins_cost(INSN_COST);
12810   ins_encode %{
12811     __ cmnw($op1$$Register, $op2$$Register);
12812   %}
12813 
12814   ins_pipe(icmp_reg_reg);
12815 %}
12816 
12817 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12818 %{
12819   match(Set cr (OverflowAddI op1 op2));
12820 
12821   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12822   ins_cost(INSN_COST);
12823   ins_encode %{
12824     __ cmnw($op1$$Register, $op2$$constant);
12825   %}
12826 
12827   ins_pipe(icmp_reg_imm);
12828 %}
12829 
12830 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12831 %{
12832   match(Set cr (OverflowAddL op1 op2));
12833 
12834   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12835   ins_cost(INSN_COST);
12836   ins_encode %{
12837     __ cmn($op1$$Register, $op2$$Register);
12838   %}
12839 
12840   ins_pipe(icmp_reg_reg);
12841 %}
12842 
12843 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12844 %{
12845   match(Set cr (OverflowAddL op1 op2));
12846 
12847   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12848   ins_cost(INSN_COST);
12849   ins_encode %{
12850     __ cmn($op1$$Register, $op2$$constant);
12851   %}
12852 
12853   ins_pipe(icmp_reg_imm);
12854 %}
12855 
12856 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12857 %{
12858   match(Set cr (OverflowSubI op1 op2));
12859 
12860   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12861   ins_cost(INSN_COST);
12862   ins_encode %{
12863     __ cmpw($op1$$Register, $op2$$Register);
12864   %}
12865 
12866   ins_pipe(icmp_reg_reg);
12867 %}
12868 
12869 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12870 %{
12871   match(Set cr (OverflowSubI op1 op2));
12872 
12873   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12874   ins_cost(INSN_COST);
12875   ins_encode %{
12876     __ cmpw($op1$$Register, $op2$$constant);
12877   %}
12878 
12879   ins_pipe(icmp_reg_imm);
12880 %}
12881 
12882 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12883 %{
12884   match(Set cr (OverflowSubL op1 op2));
12885 
12886   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12887   ins_cost(INSN_COST);
12888   ins_encode %{
12889     __ cmp($op1$$Register, $op2$$Register);
12890   %}
12891 
12892   ins_pipe(icmp_reg_reg);
12893 %}
12894 
12895 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12896 %{
12897   match(Set cr (OverflowSubL op1 op2));
12898 
12899   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12900   ins_cost(INSN_COST);
12901   ins_encode %{
12902     __ cmp($op1$$Register, $op2$$constant);
12903   %}
12904 
12905   ins_pipe(icmp_reg_imm);
12906 %}
12907 
12908 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
12909 %{
12910   match(Set cr (OverflowSubI zero op1));
12911 
12912   format %{ "cmpw  zr, $op1\t# overflow check int" %}
12913   ins_cost(INSN_COST);
12914   ins_encode %{
12915     __ cmpw(zr, $op1$$Register);
12916   %}
12917 
12918   ins_pipe(icmp_reg_imm);
12919 %}
12920 
12921 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
12922 %{
12923   match(Set cr (OverflowSubL zero op1));
12924 
12925   format %{ "cmp   zr, $op1\t# overflow check long" %}
12926   ins_cost(INSN_COST);
12927   ins_encode %{
12928     __ cmp(zr, $op1$$Register);
12929   %}
12930 
12931   ins_pipe(icmp_reg_imm);
12932 %}
12933 
12934 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12935 %{
12936   match(Set cr (OverflowMulI op1 op2));
12937 
12938   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12939             "cmp   rscratch1, rscratch1, sxtw\n\t"
12940             "movw  rscratch1, #0x80000000\n\t"
12941             "cselw rscratch1, rscratch1, zr, NE\n\t"
12942             "cmpw  rscratch1, #1" %}
12943   ins_cost(5 * INSN_COST);
12944   ins_encode %{
12945     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12946     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12947     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12948     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12949     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12950   %}
12951 
12952   ins_pipe(pipe_slow);
12953 %}
12954 
12955 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
12956 %{
12957   match(If cmp (OverflowMulI op1 op2));
12958   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12959             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12960   effect(USE labl, KILL cr);
12961 
12962   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12963             "cmp   rscratch1, rscratch1, sxtw\n\t"
12964             "b$cmp   $labl" %}
12965   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
12966   ins_encode %{
12967     Label* L = $labl$$label;
12968     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12969     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12970     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12971     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12972   %}
12973 
12974   ins_pipe(pipe_serial);
12975 %}
12976 
12977 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12978 %{
12979   match(Set cr (OverflowMulL op1 op2));
12980 
12981   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12982             "smulh rscratch2, $op1, $op2\n\t"
12983             "cmp   rscratch2, rscratch1, ASR #63\n\t"
12984             "movw  rscratch1, #0x80000000\n\t"
12985             "cselw rscratch1, rscratch1, zr, NE\n\t"
12986             "cmpw  rscratch1, #1" %}
12987   ins_cost(6 * INSN_COST);
12988   ins_encode %{
12989     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12990     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12991     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
12992     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12993     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12994     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12995   %}
12996 
12997   ins_pipe(pipe_slow);
12998 %}
12999 
13000 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
13001 %{
13002   match(If cmp (OverflowMulL op1 op2));
13003   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13004             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13005   effect(USE labl, KILL cr);
13006 
13007   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13008             "smulh rscratch2, $op1, $op2\n\t"
13009             "cmp   rscratch2, rscratch1, ASR #63\n\t"
13010             "b$cmp $labl" %}
13011   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
13012   ins_encode %{
13013     Label* L = $labl$$label;
13014     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13015     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13016     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13017     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
13018     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13019   %}
13020 
13021   ins_pipe(pipe_serial);
13022 %}
13023 
13024 // ============================================================================
13025 // Compare Instructions
13026 
13027 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
13028 %{
13029   match(Set cr (CmpI op1 op2));
13030 
13031   effect(DEF cr, USE op1, USE op2);
13032 
13033   ins_cost(INSN_COST);
13034   format %{ "cmpw  $op1, $op2" %}
13035 
13036   ins_encode(aarch64_enc_cmpw(op1, op2));
13037 
13038   ins_pipe(icmp_reg_reg);
13039 %}
13040 
13041 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
13042 %{
13043   match(Set cr (CmpI op1 zero));
13044 
13045   effect(DEF cr, USE op1);
13046 
13047   ins_cost(INSN_COST);
13048   format %{ "cmpw $op1, 0" %}
13049 
13050   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13051 
13052   ins_pipe(icmp_reg_imm);
13053 %}
13054 
13055 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
13056 %{
13057   match(Set cr (CmpI op1 op2));
13058 
13059   effect(DEF cr, USE op1);
13060 
13061   ins_cost(INSN_COST);
13062   format %{ "cmpw  $op1, $op2" %}
13063 
13064   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13065 
13066   ins_pipe(icmp_reg_imm);
13067 %}
13068 
13069 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
13070 %{
13071   match(Set cr (CmpI op1 op2));
13072 
13073   effect(DEF cr, USE op1);
13074 
13075   ins_cost(INSN_COST * 2);
13076   format %{ "cmpw  $op1, $op2" %}
13077 
13078   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13079 
13080   ins_pipe(icmp_reg_imm);
13081 %}
13082 
13083 // Unsigned compare Instructions; really, same as signed compare
13084 // except it should only be used to feed an If or a CMovI which takes a
13085 // cmpOpU.
13086 
13087 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
13088 %{
13089   match(Set cr (CmpU op1 op2));
13090 
13091   effect(DEF cr, USE op1, USE op2);
13092 
13093   ins_cost(INSN_COST);
13094   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13095 
13096   ins_encode(aarch64_enc_cmpw(op1, op2));
13097 
13098   ins_pipe(icmp_reg_reg);
13099 %}
13100 
13101 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
13102 %{
13103   match(Set cr (CmpU op1 zero));
13104 
13105   effect(DEF cr, USE op1);
13106 
13107   ins_cost(INSN_COST);
13108   format %{ "cmpw $op1, #0\t# unsigned" %}
13109 
13110   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13111 
13112   ins_pipe(icmp_reg_imm);
13113 %}
13114 
13115 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
13116 %{
13117   match(Set cr (CmpU op1 op2));
13118 
13119   effect(DEF cr, USE op1);
13120 
13121   ins_cost(INSN_COST);
13122   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13123 
13124   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13125 
13126   ins_pipe(icmp_reg_imm);
13127 %}
13128 
13129 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
13130 %{
13131   match(Set cr (CmpU op1 op2));
13132 
13133   effect(DEF cr, USE op1);
13134 
13135   ins_cost(INSN_COST * 2);
13136   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13137 
13138   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13139 
13140   ins_pipe(icmp_reg_imm);
13141 %}
13142 
13143 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13144 %{
13145   match(Set cr (CmpL op1 op2));
13146 
13147   effect(DEF cr, USE op1, USE op2);
13148 
13149   ins_cost(INSN_COST);
13150   format %{ "cmp  $op1, $op2" %}
13151 
13152   ins_encode(aarch64_enc_cmp(op1, op2));
13153 
13154   ins_pipe(icmp_reg_reg);
13155 %}
13156 
13157 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
13158 %{
13159   match(Set cr (CmpL op1 zero));
13160 
13161   effect(DEF cr, USE op1);
13162 
13163   ins_cost(INSN_COST);
13164   format %{ "tst  $op1" %}
13165 
13166   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13167 
13168   ins_pipe(icmp_reg_imm);
13169 %}
13170 
13171 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
13172 %{
13173   match(Set cr (CmpL op1 op2));
13174 
13175   effect(DEF cr, USE op1);
13176 
13177   ins_cost(INSN_COST);
13178   format %{ "cmp  $op1, $op2" %}
13179 
13180   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13181 
13182   ins_pipe(icmp_reg_imm);
13183 %}
13184 
13185 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
13186 %{
13187   match(Set cr (CmpL op1 op2));
13188 
13189   effect(DEF cr, USE op1);
13190 
13191   ins_cost(INSN_COST * 2);
13192   format %{ "cmp  $op1, $op2" %}
13193 
13194   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13195 
13196   ins_pipe(icmp_reg_imm);
13197 %}
13198 
13199 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
13200 %{
13201   match(Set cr (CmpUL op1 op2));
13202 
13203   effect(DEF cr, USE op1, USE op2);
13204 
13205   ins_cost(INSN_COST);
13206   format %{ "cmp  $op1, $op2" %}
13207 
13208   ins_encode(aarch64_enc_cmp(op1, op2));
13209 
13210   ins_pipe(icmp_reg_reg);
13211 %}
13212 
13213 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
13214 %{
13215   match(Set cr (CmpUL op1 zero));
13216 
13217   effect(DEF cr, USE op1);
13218 
13219   ins_cost(INSN_COST);
13220   format %{ "tst  $op1" %}
13221 
13222   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13223 
13224   ins_pipe(icmp_reg_imm);
13225 %}
13226 
13227 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
13228 %{
13229   match(Set cr (CmpUL op1 op2));
13230 
13231   effect(DEF cr, USE op1);
13232 
13233   ins_cost(INSN_COST);
13234   format %{ "cmp  $op1, $op2" %}
13235 
13236   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13237 
13238   ins_pipe(icmp_reg_imm);
13239 %}
13240 
13241 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
13242 %{
13243   match(Set cr (CmpUL op1 op2));
13244 
13245   effect(DEF cr, USE op1);
13246 
13247   ins_cost(INSN_COST * 2);
13248   format %{ "cmp  $op1, $op2" %}
13249 
13250   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13251 
13252   ins_pipe(icmp_reg_imm);
13253 %}
13254 
13255 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
13256 %{
13257   match(Set cr (CmpP op1 op2));
13258 
13259   effect(DEF cr, USE op1, USE op2);
13260 
13261   ins_cost(INSN_COST);
13262   format %{ "cmp  $op1, $op2\t // ptr" %}
13263 
13264   ins_encode(aarch64_enc_cmpp(op1, op2));
13265 
13266   ins_pipe(icmp_reg_reg);
13267 %}
13268 
13269 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
13270 %{
13271   match(Set cr (CmpN op1 op2));
13272 
13273   effect(DEF cr, USE op1, USE op2);
13274 
13275   ins_cost(INSN_COST);
13276   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
13277 
13278   ins_encode(aarch64_enc_cmpn(op1, op2));
13279 
13280   ins_pipe(icmp_reg_reg);
13281 %}
13282 
13283 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
13284 %{
13285   match(Set cr (CmpP op1 zero));
13286 
13287   effect(DEF cr, USE op1, USE zero);
13288 
13289   ins_cost(INSN_COST);
13290   format %{ "cmp  $op1, 0\t // ptr" %}
13291 
13292   ins_encode(aarch64_enc_testp(op1));
13293 
13294   ins_pipe(icmp_reg_imm);
13295 %}
13296 
13297 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
13298 %{
13299   match(Set cr (CmpN op1 zero));
13300 
13301   effect(DEF cr, USE op1, USE zero);
13302 
13303   ins_cost(INSN_COST);
13304   format %{ "cmp  $op1, 0\t // compressed ptr" %}
13305 
13306   ins_encode(aarch64_enc_testn(op1));
13307 
13308   ins_pipe(icmp_reg_imm);
13309 %}
13310 
13311 // FP comparisons
13312 //
13313 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
13314 // using normal cmpOp. See declaration of rFlagsReg for details.
13315 
13316 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
13317 %{
13318   match(Set cr (CmpF src1 src2));
13319 
13320   ins_cost(3 * INSN_COST);
13321   format %{ "fcmps $src1, $src2" %}
13322 
13323   ins_encode %{
13324     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13325   %}
13326 
13327   ins_pipe(pipe_class_compare);
13328 %}
13329 
13330 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
13331 %{
13332   match(Set cr (CmpF src1 src2));
13333 
13334   ins_cost(3 * INSN_COST);
13335   format %{ "fcmps $src1, 0.0" %}
13336 
13337   ins_encode %{
13338     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
13339   %}
13340 
13341   ins_pipe(pipe_class_compare);
13342 %}
13343 // FROM HERE
13344 
13345 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
13346 %{
13347   match(Set cr (CmpD src1 src2));
13348 
13349   ins_cost(3 * INSN_COST);
13350   format %{ "fcmpd $src1, $src2" %}
13351 
13352   ins_encode %{
13353     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13354   %}
13355 
13356   ins_pipe(pipe_class_compare);
13357 %}
13358 
13359 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
13360 %{
13361   match(Set cr (CmpD src1 src2));
13362 
13363   ins_cost(3 * INSN_COST);
13364   format %{ "fcmpd $src1, 0.0" %}
13365 
13366   ins_encode %{
13367     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
13368   %}
13369 
13370   ins_pipe(pipe_class_compare);
13371 %}
13372 
13373 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
13374 %{
13375   match(Set dst (CmpF3 src1 src2));
13376   effect(KILL cr);
13377 
13378   ins_cost(5 * INSN_COST);
13379   format %{ "fcmps $src1, $src2\n\t"
13380             "csinvw($dst, zr, zr, eq\n\t"
13381             "csnegw($dst, $dst, $dst, lt)"
13382   %}
13383 
13384   ins_encode %{
13385     Label done;
13386     FloatRegister s1 = as_FloatRegister($src1$$reg);
13387     FloatRegister s2 = as_FloatRegister($src2$$reg);
13388     Register d = as_Register($dst$$reg);
13389     __ fcmps(s1, s2);
13390     // installs 0 if EQ else -1
13391     __ csinvw(d, zr, zr, Assembler::EQ);
13392     // keeps -1 if less or unordered else installs 1
13393     __ csnegw(d, d, d, Assembler::LT);
13394     __ bind(done);
13395   %}
13396 
13397   ins_pipe(pipe_class_default);
13398 
13399 %}
13400 
13401 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
13402 %{
13403   match(Set dst (CmpD3 src1 src2));
13404   effect(KILL cr);
13405 
13406   ins_cost(5 * INSN_COST);
13407   format %{ "fcmpd $src1, $src2\n\t"
13408             "csinvw($dst, zr, zr, eq\n\t"
13409             "csnegw($dst, $dst, $dst, lt)"
13410   %}
13411 
13412   ins_encode %{
13413     Label done;
13414     FloatRegister s1 = as_FloatRegister($src1$$reg);
13415     FloatRegister s2 = as_FloatRegister($src2$$reg);
13416     Register d = as_Register($dst$$reg);
13417     __ fcmpd(s1, s2);
13418     // installs 0 if EQ else -1
13419     __ csinvw(d, zr, zr, Assembler::EQ);
13420     // keeps -1 if less or unordered else installs 1
13421     __ csnegw(d, d, d, Assembler::LT);
13422     __ bind(done);
13423   %}
13424   ins_pipe(pipe_class_default);
13425 
13426 %}
13427 
13428 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
13429 %{
13430   match(Set dst (CmpF3 src1 zero));
13431   effect(KILL cr);
13432 
13433   ins_cost(5 * INSN_COST);
13434   format %{ "fcmps $src1, 0.0\n\t"
13435             "csinvw($dst, zr, zr, eq\n\t"
13436             "csnegw($dst, $dst, $dst, lt)"
13437   %}
13438 
13439   ins_encode %{
13440     Label done;
13441     FloatRegister s1 = as_FloatRegister($src1$$reg);
13442     Register d = as_Register($dst$$reg);
13443     __ fcmps(s1, 0.0D);
13444     // installs 0 if EQ else -1
13445     __ csinvw(d, zr, zr, Assembler::EQ);
13446     // keeps -1 if less or unordered else installs 1
13447     __ csnegw(d, d, d, Assembler::LT);
13448     __ bind(done);
13449   %}
13450 
13451   ins_pipe(pipe_class_default);
13452 
13453 %}
13454 
13455 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
13456 %{
13457   match(Set dst (CmpD3 src1 zero));
13458   effect(KILL cr);
13459 
13460   ins_cost(5 * INSN_COST);
13461   format %{ "fcmpd $src1, 0.0\n\t"
13462             "csinvw($dst, zr, zr, eq\n\t"
13463             "csnegw($dst, $dst, $dst, lt)"
13464   %}
13465 
13466   ins_encode %{
13467     Label done;
13468     FloatRegister s1 = as_FloatRegister($src1$$reg);
13469     Register d = as_Register($dst$$reg);
13470     __ fcmpd(s1, 0.0D);
13471     // installs 0 if EQ else -1
13472     __ csinvw(d, zr, zr, Assembler::EQ);
13473     // keeps -1 if less or unordered else installs 1
13474     __ csnegw(d, d, d, Assembler::LT);
13475     __ bind(done);
13476   %}
13477   ins_pipe(pipe_class_default);
13478 
13479 %}
13480 
13481 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
13482 %{
13483   match(Set dst (CmpLTMask p q));
13484   effect(KILL cr);
13485 
13486   ins_cost(3 * INSN_COST);
13487 
13488   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
13489             "csetw $dst, lt\n\t"
13490             "subw $dst, zr, $dst"
13491   %}
13492 
13493   ins_encode %{
13494     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
13495     __ csetw(as_Register($dst$$reg), Assembler::LT);
13496     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
13497   %}
13498 
13499   ins_pipe(ialu_reg_reg);
13500 %}
13501 
13502 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
13503 %{
13504   match(Set dst (CmpLTMask src zero));
13505   effect(KILL cr);
13506 
13507   ins_cost(INSN_COST);
13508 
13509   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
13510 
13511   ins_encode %{
13512     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
13513   %}
13514 
13515   ins_pipe(ialu_reg_shift);
13516 %}
13517 
13518 // ============================================================================
13519 // Max and Min
13520 
13521 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13522 %{
13523   match(Set dst (MinI src1 src2));
13524 
13525   effect(DEF dst, USE src1, USE src2, KILL cr);
13526   size(8);
13527 
13528   ins_cost(INSN_COST * 3);
13529   format %{
13530     "cmpw $src1 $src2\t signed int\n\t"
13531     "cselw $dst, $src1, $src2 lt\t"
13532   %}
13533 
13534   ins_encode %{
13535     __ cmpw(as_Register($src1$$reg),
13536             as_Register($src2$$reg));
13537     __ cselw(as_Register($dst$$reg),
13538              as_Register($src1$$reg),
13539              as_Register($src2$$reg),
13540              Assembler::LT);
13541   %}
13542 
13543   ins_pipe(ialu_reg_reg);
13544 %}
13545 // FROM HERE
13546 
13547 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13548 %{
13549   match(Set dst (MaxI src1 src2));
13550 
13551   effect(DEF dst, USE src1, USE src2, KILL cr);
13552   size(8);
13553 
13554   ins_cost(INSN_COST * 3);
13555   format %{
13556     "cmpw $src1 $src2\t signed int\n\t"
13557     "cselw $dst, $src1, $src2 gt\t"
13558   %}
13559 
13560   ins_encode %{
13561     __ cmpw(as_Register($src1$$reg),
13562             as_Register($src2$$reg));
13563     __ cselw(as_Register($dst$$reg),
13564              as_Register($src1$$reg),
13565              as_Register($src2$$reg),
13566              Assembler::GT);
13567   %}
13568 
13569   ins_pipe(ialu_reg_reg);
13570 %}
13571 
13572 // ============================================================================
13573 // Branch Instructions
13574 
13575 // Direct Branch.
13576 instruct branch(label lbl)
13577 %{
13578   match(Goto);
13579 
13580   effect(USE lbl);
13581 
13582   ins_cost(BRANCH_COST);
13583   format %{ "b  $lbl" %}
13584 
13585   ins_encode(aarch64_enc_b(lbl));
13586 
13587   ins_pipe(pipe_branch);
13588 %}
13589 
13590 // Conditional Near Branch
13591 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
13592 %{
13593   // Same match rule as `branchConFar'.
13594   match(If cmp cr);
13595 
13596   effect(USE lbl);
13597 
13598   ins_cost(BRANCH_COST);
13599   // If set to 1 this indicates that the current instruction is a
13600   // short variant of a long branch. This avoids using this
13601   // instruction in first-pass matching. It will then only be used in
13602   // the `Shorten_branches' pass.
13603   // ins_short_branch(1);
13604   format %{ "b$cmp  $lbl" %}
13605 
13606   ins_encode(aarch64_enc_br_con(cmp, lbl));
13607 
13608   ins_pipe(pipe_branch_cond);
13609 %}
13610 
13611 // Conditional Near Branch Unsigned
13612 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13613 %{
13614   // Same match rule as `branchConFar'.
13615   match(If cmp cr);
13616 
13617   effect(USE lbl);
13618 
13619   ins_cost(BRANCH_COST);
13620   // If set to 1 this indicates that the current instruction is a
13621   // short variant of a long branch. This avoids using this
13622   // instruction in first-pass matching. It will then only be used in
13623   // the `Shorten_branches' pass.
13624   // ins_short_branch(1);
13625   format %{ "b$cmp  $lbl\t# unsigned" %}
13626 
13627   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13628 
13629   ins_pipe(pipe_branch_cond);
13630 %}
13631 
13632 // Make use of CBZ and CBNZ.  These instructions, as well as being
13633 // shorter than (cmp; branch), have the additional benefit of not
13634 // killing the flags.
13635 
13636 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
13637   match(If cmp (CmpI op1 op2));
13638   effect(USE labl);
13639 
13640   ins_cost(BRANCH_COST);
13641   format %{ "cbw$cmp   $op1, $labl" %}
13642   ins_encode %{
13643     Label* L = $labl$$label;
13644     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13645     if (cond == Assembler::EQ)
13646       __ cbzw($op1$$Register, *L);
13647     else
13648       __ cbnzw($op1$$Register, *L);
13649   %}
13650   ins_pipe(pipe_cmp_branch);
13651 %}
13652 
13653 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
13654   match(If cmp (CmpL op1 op2));
13655   effect(USE labl);
13656 
13657   ins_cost(BRANCH_COST);
13658   format %{ "cb$cmp   $op1, $labl" %}
13659   ins_encode %{
13660     Label* L = $labl$$label;
13661     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13662     if (cond == Assembler::EQ)
13663       __ cbz($op1$$Register, *L);
13664     else
13665       __ cbnz($op1$$Register, *L);
13666   %}
13667   ins_pipe(pipe_cmp_branch);
13668 %}
13669 
13670 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
13671   match(If cmp (CmpP op1 op2));
13672   effect(USE labl);
13673 
13674   ins_cost(BRANCH_COST);
13675   format %{ "cb$cmp   $op1, $labl" %}
13676   ins_encode %{
13677     Label* L = $labl$$label;
13678     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13679     if (cond == Assembler::EQ)
13680       __ cbz($op1$$Register, *L);
13681     else
13682       __ cbnz($op1$$Register, *L);
13683   %}
13684   ins_pipe(pipe_cmp_branch);
13685 %}
13686 
13687 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
13688   match(If cmp (CmpN op1 op2));
13689   effect(USE labl);
13690 
13691   ins_cost(BRANCH_COST);
13692   format %{ "cbw$cmp   $op1, $labl" %}
13693   ins_encode %{
13694     Label* L = $labl$$label;
13695     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13696     if (cond == Assembler::EQ)
13697       __ cbzw($op1$$Register, *L);
13698     else
13699       __ cbnzw($op1$$Register, *L);
13700   %}
13701   ins_pipe(pipe_cmp_branch);
13702 %}
13703 
13704 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
13705   match(If cmp (CmpP (DecodeN oop) zero));
13706   effect(USE labl);
13707 
13708   ins_cost(BRANCH_COST);
13709   format %{ "cb$cmp   $oop, $labl" %}
13710   ins_encode %{
13711     Label* L = $labl$$label;
13712     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13713     if (cond == Assembler::EQ)
13714       __ cbzw($oop$$Register, *L);
13715     else
13716       __ cbnzw($oop$$Register, *L);
13717   %}
13718   ins_pipe(pipe_cmp_branch);
13719 %}
13720 
13721 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
13722   match(If cmp (CmpU op1 op2));
13723   effect(USE labl);
13724 
13725   ins_cost(BRANCH_COST);
13726   format %{ "cbw$cmp   $op1, $labl" %}
13727   ins_encode %{
13728     Label* L = $labl$$label;
13729     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13730     if (cond == Assembler::EQ || cond == Assembler::LS)
13731       __ cbzw($op1$$Register, *L);
13732     else
13733       __ cbnzw($op1$$Register, *L);
13734   %}
13735   ins_pipe(pipe_cmp_branch);
13736 %}
13737 
13738 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
13739   match(If cmp (CmpUL op1 op2));
13740   effect(USE labl);
13741 
13742   ins_cost(BRANCH_COST);
13743   format %{ "cb$cmp   $op1, $labl" %}
13744   ins_encode %{
13745     Label* L = $labl$$label;
13746     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13747     if (cond == Assembler::EQ || cond == Assembler::LS)
13748       __ cbz($op1$$Register, *L);
13749     else
13750       __ cbnz($op1$$Register, *L);
13751   %}
13752   ins_pipe(pipe_cmp_branch);
13753 %}
13754 
13755 // Test bit and Branch
13756 
13757 // Patterns for short (< 32KiB) variants
13758 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
13759   match(If cmp (CmpL op1 op2));
13760   effect(USE labl);
13761 
13762   ins_cost(BRANCH_COST);
13763   format %{ "cb$cmp   $op1, $labl # long" %}
13764   ins_encode %{
13765     Label* L = $labl$$label;
13766     Assembler::Condition cond =
13767       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13768     __ tbr(cond, $op1$$Register, 63, *L);
13769   %}
13770   ins_pipe(pipe_cmp_branch);
13771   ins_short_branch(1);
13772 %}
13773 
13774 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13775   match(If cmp (CmpI op1 op2));
13776   effect(USE labl);
13777 
13778   ins_cost(BRANCH_COST);
13779   format %{ "cb$cmp   $op1, $labl # int" %}
13780   ins_encode %{
13781     Label* L = $labl$$label;
13782     Assembler::Condition cond =
13783       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13784     __ tbr(cond, $op1$$Register, 31, *L);
13785   %}
13786   ins_pipe(pipe_cmp_branch);
13787   ins_short_branch(1);
13788 %}
13789 
13790 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13791   match(If cmp (CmpL (AndL op1 op2) op3));
13792   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13793   effect(USE labl);
13794 
13795   ins_cost(BRANCH_COST);
13796   format %{ "tb$cmp   $op1, $op2, $labl" %}
13797   ins_encode %{
13798     Label* L = $labl$$label;
13799     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13800     int bit = exact_log2($op2$$constant);
13801     __ tbr(cond, $op1$$Register, bit, *L);
13802   %}
13803   ins_pipe(pipe_cmp_branch);
13804   ins_short_branch(1);
13805 %}
13806 
13807 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
13808   match(If cmp (CmpI (AndI op1 op2) op3));
13809   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
13810   effect(USE labl);
13811 
13812   ins_cost(BRANCH_COST);
13813   format %{ "tb$cmp   $op1, $op2, $labl" %}
13814   ins_encode %{
13815     Label* L = $labl$$label;
13816     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13817     int bit = exact_log2($op2$$constant);
13818     __ tbr(cond, $op1$$Register, bit, *L);
13819   %}
13820   ins_pipe(pipe_cmp_branch);
13821   ins_short_branch(1);
13822 %}
13823 
13824 // And far variants
13825 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
13826   match(If cmp (CmpL op1 op2));
13827   effect(USE labl);
13828 
13829   ins_cost(BRANCH_COST);
13830   format %{ "cb$cmp   $op1, $labl # long" %}
13831   ins_encode %{
13832     Label* L = $labl$$label;
13833     Assembler::Condition cond =
13834       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13835     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
13836   %}
13837   ins_pipe(pipe_cmp_branch);
13838 %}
13839 
13840 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13841   match(If cmp (CmpI op1 op2));
13842   effect(USE labl);
13843 
13844   ins_cost(BRANCH_COST);
13845   format %{ "cb$cmp   $op1, $labl # int" %}
13846   ins_encode %{
13847     Label* L = $labl$$label;
13848     Assembler::Condition cond =
13849       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13850     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
13851   %}
13852   ins_pipe(pipe_cmp_branch);
13853 %}
13854 
13855 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13856   match(If cmp (CmpL (AndL op1 op2) op3));
13857   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13858   effect(USE labl);
13859 
13860   ins_cost(BRANCH_COST);
13861   format %{ "tb$cmp   $op1, $op2, $labl" %}
13862   ins_encode %{
13863     Label* L = $labl$$label;
13864     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13865     int bit = exact_log2($op2$$constant);
13866     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
13867   %}
13868   ins_pipe(pipe_cmp_branch);
13869 %}
13870 
13871 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
13872   match(If cmp (CmpI (AndI op1 op2) op3));
13873   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
13874   effect(USE labl);
13875 
13876   ins_cost(BRANCH_COST);
13877   format %{ "tb$cmp   $op1, $op2, $labl" %}
13878   ins_encode %{
13879     Label* L = $labl$$label;
13880     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13881     int bit = exact_log2($op2$$constant);
13882     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
13883   %}
13884   ins_pipe(pipe_cmp_branch);
13885 %}
13886 
13887 // Test bits
13888 
13889 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
13890   match(Set cr (CmpL (AndL op1 op2) op3));
13891   predicate(Assembler::operand_valid_for_logical_immediate
13892             (/*is_32*/false, n->in(1)->in(2)->get_long()));
13893 
13894   ins_cost(INSN_COST);
13895   format %{ "tst $op1, $op2 # long" %}
13896   ins_encode %{
13897     __ tst($op1$$Register, $op2$$constant);
13898   %}
13899   ins_pipe(ialu_reg_reg);
13900 %}
13901 
13902 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
13903   match(Set cr (CmpI (AndI op1 op2) op3));
13904   predicate(Assembler::operand_valid_for_logical_immediate
13905             (/*is_32*/true, n->in(1)->in(2)->get_int()));
13906 
13907   ins_cost(INSN_COST);
13908   format %{ "tst $op1, $op2 # int" %}
13909   ins_encode %{
13910     __ tstw($op1$$Register, $op2$$constant);
13911   %}
13912   ins_pipe(ialu_reg_reg);
13913 %}
13914 
13915 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
13916   match(Set cr (CmpL (AndL op1 op2) op3));
13917 
13918   ins_cost(INSN_COST);
13919   format %{ "tst $op1, $op2 # long" %}
13920   ins_encode %{
13921     __ tst($op1$$Register, $op2$$Register);
13922   %}
13923   ins_pipe(ialu_reg_reg);
13924 %}
13925 
13926 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
13927   match(Set cr (CmpI (AndI op1 op2) op3));
13928 
13929   ins_cost(INSN_COST);
13930   format %{ "tstw $op1, $op2 # int" %}
13931   ins_encode %{
13932     __ tstw($op1$$Register, $op2$$Register);
13933   %}
13934   ins_pipe(ialu_reg_reg);
13935 %}
13936 
13937 
13938 // Conditional Far Branch
13939 // Conditional Far Branch Unsigned
13940 // TODO: fixme
13941 
13942 // counted loop end branch near
13943 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
13944 %{
13945   match(CountedLoopEnd cmp cr);
13946 
13947   effect(USE lbl);
13948 
13949   ins_cost(BRANCH_COST);
13950   // short variant.
13951   // ins_short_branch(1);
13952   format %{ "b$cmp $lbl \t// counted loop end" %}
13953 
13954   ins_encode(aarch64_enc_br_con(cmp, lbl));
13955 
13956   ins_pipe(pipe_branch);
13957 %}
13958 
13959 // counted loop end branch near Unsigned
13960 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13961 %{
13962   match(CountedLoopEnd cmp cr);
13963 
13964   effect(USE lbl);
13965 
13966   ins_cost(BRANCH_COST);
13967   // short variant.
13968   // ins_short_branch(1);
13969   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
13970 
13971   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13972 
13973   ins_pipe(pipe_branch);
13974 %}
13975 
13976 // counted loop end branch far
13977 // counted loop end branch far unsigned
13978 // TODO: fixme
13979 
13980 // ============================================================================
13981 // inlined locking and unlocking
13982 
13983 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13984 %{
13985   match(Set cr (FastLock object box));
13986   effect(TEMP tmp, TEMP tmp2);
13987 
13988   // TODO
13989   // identify correct cost
13990   ins_cost(5 * INSN_COST);
13991   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
13992 
13993   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
13994 
13995   ins_pipe(pipe_serial);
13996 %}
13997 
13998 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13999 %{
14000   match(Set cr (FastUnlock object box));
14001   effect(TEMP tmp, TEMP tmp2);
14002 
14003   ins_cost(5 * INSN_COST);
14004   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
14005 
14006   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
14007 
14008   ins_pipe(pipe_serial);
14009 %}
14010 
14011 
14012 // ============================================================================
14013 // Safepoint Instructions
14014 
14015 // TODO
14016 // provide a near and far version of this code
14017 
14018 instruct safePoint(rFlagsReg cr, iRegP poll)
14019 %{
14020   match(SafePoint poll);
14021   effect(KILL cr);
14022 
14023   format %{
14024     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
14025   %}
14026   ins_encode %{
14027     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
14028   %}
14029   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
14030 %}
14031 
14032 
14033 // ============================================================================
14034 // Procedure Call/Return Instructions
14035 
14036 // Call Java Static Instruction
14037 
14038 instruct CallStaticJavaDirect(method meth)
14039 %{
14040   match(CallStaticJava);
14041 
14042   effect(USE meth);
14043 
14044   ins_cost(CALL_COST);
14045 
14046   format %{ "call,static $meth \t// ==> " %}
14047 
14048   ins_encode( aarch64_enc_java_static_call(meth),
14049               aarch64_enc_call_epilog );
14050 
14051   ins_pipe(pipe_class_call);
14052 %}
14053 
14054 // TO HERE
14055 
14056 // Call Java Dynamic Instruction
14057 instruct CallDynamicJavaDirect(method meth)
14058 %{
14059   match(CallDynamicJava);
14060 
14061   effect(USE meth);
14062 
14063   ins_cost(CALL_COST);
14064 
14065   format %{ "CALL,dynamic $meth \t// ==> " %}
14066 
14067   ins_encode( aarch64_enc_java_dynamic_call(meth),
14068                aarch64_enc_call_epilog );
14069 
14070   ins_pipe(pipe_class_call);
14071 %}
14072 
14073 // Call Runtime Instruction
14074 
14075 instruct CallRuntimeDirect(method meth)
14076 %{
14077   match(CallRuntime);
14078 
14079   effect(USE meth);
14080 
14081   ins_cost(CALL_COST);
14082 
14083   format %{ "CALL, runtime $meth" %}
14084 
14085   ins_encode( aarch64_enc_java_to_runtime(meth) );
14086 
14087   ins_pipe(pipe_class_call);
14088 %}
14089 
14090 // Call Runtime Instruction
14091 
14092 instruct CallLeafDirect(method meth)
14093 %{
14094   match(CallLeaf);
14095 
14096   effect(USE meth);
14097 
14098   ins_cost(CALL_COST);
14099 
14100   format %{ "CALL, runtime leaf $meth" %}
14101 
14102   ins_encode( aarch64_enc_java_to_runtime(meth) );
14103 
14104   ins_pipe(pipe_class_call);
14105 %}
14106 
14107 // Call Runtime Instruction
14108 
14109 instruct CallLeafNoFPDirect(method meth)
14110 %{
14111   match(CallLeafNoFP);
14112 
14113   effect(USE meth);
14114 
14115   ins_cost(CALL_COST);
14116 
14117   format %{ "CALL, runtime leaf nofp $meth" %}
14118 
14119   ins_encode( aarch64_enc_java_to_runtime(meth) );
14120 
14121   ins_pipe(pipe_class_call);
14122 %}
14123 
14124 // Tail Call; Jump from runtime stub to Java code.
14125 // Also known as an 'interprocedural jump'.
14126 // Target of jump will eventually return to caller.
14127 // TailJump below removes the return address.
14128 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
14129 %{
14130   match(TailCall jump_target method_oop);
14131 
14132   ins_cost(CALL_COST);
14133 
14134   format %{ "br $jump_target\t# $method_oop holds method oop" %}
14135 
14136   ins_encode(aarch64_enc_tail_call(jump_target));
14137 
14138   ins_pipe(pipe_class_call);
14139 %}
14140 
14141 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
14142 %{
14143   match(TailJump jump_target ex_oop);
14144 
14145   ins_cost(CALL_COST);
14146 
14147   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
14148 
14149   ins_encode(aarch64_enc_tail_jmp(jump_target));
14150 
14151   ins_pipe(pipe_class_call);
14152 %}
14153 
14154 // Create exception oop: created by stack-crawling runtime code.
14155 // Created exception is now available to this handler, and is setup
14156 // just prior to jumping to this handler. No code emitted.
14157 // TODO check
14158 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
14159 instruct CreateException(iRegP_R0 ex_oop)
14160 %{
14161   match(Set ex_oop (CreateEx));
14162 
14163   format %{ " -- \t// exception oop; no code emitted" %}
14164 
14165   size(0);
14166 
14167   ins_encode( /*empty*/ );
14168 
14169   ins_pipe(pipe_class_empty);
14170 %}
14171 
14172 // Rethrow exception: The exception oop will come in the first
14173 // argument position. Then JUMP (not call) to the rethrow stub code.
14174 instruct RethrowException() %{
14175   match(Rethrow);
14176   ins_cost(CALL_COST);
14177 
14178   format %{ "b rethrow_stub" %}
14179 
14180   ins_encode( aarch64_enc_rethrow() );
14181 
14182   ins_pipe(pipe_class_call);
14183 %}
14184 
14185 
14186 // Return Instruction
14187 // epilog node loads ret address into lr as part of frame pop
14188 instruct Ret()
14189 %{
14190   match(Return);
14191 
14192   format %{ "ret\t// return register" %}
14193 
14194   ins_encode( aarch64_enc_ret() );
14195 
14196   ins_pipe(pipe_branch);
14197 %}
14198 
14199 // Die now.
14200 instruct ShouldNotReachHere() %{
14201   match(Halt);
14202 
14203   ins_cost(CALL_COST);
14204   format %{ "ShouldNotReachHere" %}
14205 
14206   ins_encode %{
14207     // +1 so NativeInstruction::is_sigill_zombie_not_entrant() doesn't
14208     // return true
14209     __ dpcs1(0xdead + 1);
14210   %}
14211 
14212   ins_pipe(pipe_class_default);
14213 %}
14214 
14215 // ============================================================================
14216 // Partial Subtype Check
14217 //
14218 // superklass array for an instance of the superklass.  Set a hidden
14219 // internal cache on a hit (cache is checked with exposed code in
14220 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
14221 // encoding ALSO sets flags.
14222 
14223 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
14224 %{
14225   match(Set result (PartialSubtypeCheck sub super));
14226   effect(KILL cr, KILL temp);
14227 
14228   ins_cost(1100);  // slightly larger than the next version
14229   format %{ "partialSubtypeCheck $result, $sub, $super" %}
14230 
14231   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14232 
14233   opcode(0x1); // Force zero of result reg on hit
14234 
14235   ins_pipe(pipe_class_memory);
14236 %}
14237 
14238 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
14239 %{
14240   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
14241   effect(KILL temp, KILL result);
14242 
14243   ins_cost(1100);  // slightly larger than the next version
14244   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
14245 
14246   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14247 
14248   opcode(0x0); // Don't zero result reg on hit
14249 
14250   ins_pipe(pipe_class_memory);
14251 %}
14252 
14253 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14254                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
14255 %{
14256   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14257   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14258   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14259 
14260   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14261   ins_encode %{
14262     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14263     __ string_compare($str1$$Register, $str2$$Register,
14264                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14265                       $tmp1$$Register, $tmp2$$Register,
14266                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU);
14267   %}
14268   ins_pipe(pipe_class_memory);
14269 %}
14270 
14271 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14272                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
14273 %{
14274   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14275   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14276   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14277 
14278   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14279   ins_encode %{
14280     __ string_compare($str1$$Register, $str2$$Register,
14281                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14282                       $tmp1$$Register, $tmp2$$Register,
14283                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL);
14284   %}
14285   ins_pipe(pipe_class_memory);
14286 %}
14287 
14288 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14289                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
14290                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
14291 %{
14292   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
14293   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14294   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
14295          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14296 
14297   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
14298   ins_encode %{
14299     __ string_compare($str1$$Register, $str2$$Register,
14300                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14301                       $tmp1$$Register, $tmp2$$Register,
14302                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
14303                       $vtmp3$$FloatRegister, StrIntrinsicNode::UL);
14304   %}
14305   ins_pipe(pipe_class_memory);
14306 %}
14307 
14308 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14309                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
14310                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
14311 %{
14312   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
14313   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14314   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
14315          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14316 
14317   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
14318   ins_encode %{
14319     __ string_compare($str1$$Register, $str2$$Register,
14320                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14321                       $tmp1$$Register, $tmp2$$Register,
14322                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
14323                       $vtmp3$$FloatRegister,StrIntrinsicNode::LU);
14324   %}
14325   ins_pipe(pipe_class_memory);
14326 %}
14327 
14328 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14329        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14330        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14331 %{
14332   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
14333   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14334   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14335          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14336   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
14337 
14338   ins_encode %{
14339     __ string_indexof($str1$$Register, $str2$$Register,
14340                       $cnt1$$Register, $cnt2$$Register,
14341                       $tmp1$$Register, $tmp2$$Register,
14342                       $tmp3$$Register, $tmp4$$Register,
14343                       $tmp5$$Register, $tmp6$$Register,
14344                       -1, $result$$Register, StrIntrinsicNode::UU);
14345   %}
14346   ins_pipe(pipe_class_memory);
14347 %}
14348 
14349 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14350        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14351        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14352 %{
14353   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
14354   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14355   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14356          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14357   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
14358 
14359   ins_encode %{
14360     __ string_indexof($str1$$Register, $str2$$Register,
14361                       $cnt1$$Register, $cnt2$$Register,
14362                       $tmp1$$Register, $tmp2$$Register,
14363                       $tmp3$$Register, $tmp4$$Register,
14364                       $tmp5$$Register, $tmp6$$Register,
14365                       -1, $result$$Register, StrIntrinsicNode::LL);
14366   %}
14367   ins_pipe(pipe_class_memory);
14368 %}
14369 
14370 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14371        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14372        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14373 %{
14374   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
14375   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14376   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14377          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14378   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
14379 
14380   ins_encode %{
14381     __ string_indexof($str1$$Register, $str2$$Register,
14382                       $cnt1$$Register, $cnt2$$Register,
14383                       $tmp1$$Register, $tmp2$$Register,
14384                       $tmp3$$Register, $tmp4$$Register,
14385                       $tmp5$$Register, $tmp6$$Register,
14386                       -1, $result$$Register, StrIntrinsicNode::UL);
14387   %}
14388   ins_pipe(pipe_class_memory);
14389 %}
14390 
14391 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14392                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14393                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14394 %{
14395   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
14396   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14397   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14398          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14399   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
14400 
14401   ins_encode %{
14402     int icnt2 = (int)$int_cnt2$$constant;
14403     __ string_indexof($str1$$Register, $str2$$Register,
14404                       $cnt1$$Register, zr,
14405                       $tmp1$$Register, $tmp2$$Register,
14406                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14407                       icnt2, $result$$Register, StrIntrinsicNode::UU);
14408   %}
14409   ins_pipe(pipe_class_memory);
14410 %}
14411 
14412 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14413                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14414                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14415 %{
14416   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
14417   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14418   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14419          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14420   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
14421 
14422   ins_encode %{
14423     int icnt2 = (int)$int_cnt2$$constant;
14424     __ string_indexof($str1$$Register, $str2$$Register,
14425                       $cnt1$$Register, zr,
14426                       $tmp1$$Register, $tmp2$$Register,
14427                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14428                       icnt2, $result$$Register, StrIntrinsicNode::LL);
14429   %}
14430   ins_pipe(pipe_class_memory);
14431 %}
14432 
14433 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14434                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14435                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14436 %{
14437   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
14438   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14439   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14440          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14441   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
14442 
14443   ins_encode %{
14444     int icnt2 = (int)$int_cnt2$$constant;
14445     __ string_indexof($str1$$Register, $str2$$Register,
14446                       $cnt1$$Register, zr,
14447                       $tmp1$$Register, $tmp2$$Register,
14448                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14449                       icnt2, $result$$Register, StrIntrinsicNode::UL);
14450   %}
14451   ins_pipe(pipe_class_memory);
14452 %}
14453 
14454 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
14455                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14456                               iRegINoSp tmp3, rFlagsReg cr)
14457 %{
14458   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
14459   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
14460          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
14461 
14462   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
14463 
14464   ins_encode %{
14465     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
14466                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
14467                            $tmp3$$Register);
14468   %}
14469   ins_pipe(pipe_class_memory);
14470 %}
14471 
14472 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
14473                         iRegI_R0 result, rFlagsReg cr)
14474 %{
14475   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
14476   match(Set result (StrEquals (Binary str1 str2) cnt));
14477   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
14478 
14479   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
14480   ins_encode %{
14481     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14482     __ string_equals($str1$$Register, $str2$$Register,
14483                      $result$$Register, $cnt$$Register, 1);
14484   %}
14485   ins_pipe(pipe_class_memory);
14486 %}
14487 
14488 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
14489                         iRegI_R0 result, rFlagsReg cr)
14490 %{
14491   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
14492   match(Set result (StrEquals (Binary str1 str2) cnt));
14493   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
14494 
14495   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
14496   ins_encode %{
14497     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14498     __ string_equals($str1$$Register, $str2$$Register,
14499                      $result$$Register, $cnt$$Register, 2);
14500   %}
14501   ins_pipe(pipe_class_memory);
14502 %}
14503 
14504 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
14505                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
14506                        iRegP_R10 tmp, rFlagsReg cr)
14507 %{
14508   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
14509   match(Set result (AryEq ary1 ary2));
14510   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
14511 
14512   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
14513   ins_encode %{
14514     __ arrays_equals($ary1$$Register, $ary2$$Register,
14515                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
14516                      $result$$Register, $tmp$$Register, 1);
14517     %}
14518   ins_pipe(pipe_class_memory);
14519 %}
14520 
14521 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
14522                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
14523                        iRegP_R10 tmp, rFlagsReg cr)
14524 %{
14525   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
14526   match(Set result (AryEq ary1 ary2));
14527   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
14528 
14529   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
14530   ins_encode %{
14531     __ arrays_equals($ary1$$Register, $ary2$$Register,
14532                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
14533                      $result$$Register, $tmp$$Register, 2);
14534   %}
14535   ins_pipe(pipe_class_memory);
14536 %}
14537 
14538 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
14539 %{
14540   match(Set result (HasNegatives ary1 len));
14541   effect(USE_KILL ary1, USE_KILL len, KILL cr);
14542   format %{ "has negatives byte[] $ary1,$len -> $result" %}
14543   ins_encode %{
14544     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
14545   %}
14546   ins_pipe( pipe_slow );
14547 %}
14548 
14549 // fast char[] to byte[] compression
14550 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
14551                          vRegD_V0 tmp1, vRegD_V1 tmp2,
14552                          vRegD_V2 tmp3, vRegD_V3 tmp4,
14553                          iRegI_R0 result, rFlagsReg cr)
14554 %{
14555   match(Set result (StrCompressedCopy src (Binary dst len)));
14556   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
14557 
14558   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
14559   ins_encode %{
14560     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
14561                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
14562                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
14563                            $result$$Register);
14564   %}
14565   ins_pipe( pipe_slow );
14566 %}
14567 
14568 // fast byte[] to char[] inflation
14569 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
14570                         vRegD_V0 tmp1, vRegD_V1 tmp2, vRegD_V2 tmp3, iRegP_R3 tmp4, rFlagsReg cr)
14571 %{
14572   match(Set dummy (StrInflatedCopy src (Binary dst len)));
14573   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
14574 
14575   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
14576   ins_encode %{
14577     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
14578                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
14579   %}
14580   ins_pipe(pipe_class_memory);
14581 %}
14582 
14583 // encode char[] to byte[] in ISO_8859_1
14584 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
14585                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
14586                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
14587                           iRegI_R0 result, rFlagsReg cr)
14588 %{
14589   match(Set result (EncodeISOArray src (Binary dst len)));
14590   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
14591          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
14592 
14593   format %{ "Encode array $src,$dst,$len -> $result" %}
14594   ins_encode %{
14595     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
14596          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
14597          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
14598   %}
14599   ins_pipe( pipe_class_memory );
14600 %}
14601 
14602 // ============================================================================
14603 // This name is KNOWN by the ADLC and cannot be changed.
14604 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
14605 // for this guy.
14606 instruct tlsLoadP(thread_RegP dst)
14607 %{
14608   match(Set dst (ThreadLocal));
14609 
14610   ins_cost(0);
14611 
14612   format %{ " -- \t// $dst=Thread::current(), empty" %}
14613 
14614   size(0);
14615 
14616   ins_encode( /*empty*/ );
14617 
14618   ins_pipe(pipe_class_empty);
14619 %}
14620 
14621 // ====================VECTOR INSTRUCTIONS=====================================
14622 
14623 // Load vector (32 bits)
14624 instruct loadV4(vecD dst, vmem4 mem)
14625 %{
14626   predicate(n->as_LoadVector()->memory_size() == 4);
14627   match(Set dst (LoadVector mem));
14628   ins_cost(4 * INSN_COST);
14629   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
14630   ins_encode( aarch64_enc_ldrvS(dst, mem) );
14631   ins_pipe(vload_reg_mem64);
14632 %}
14633 
14634 // Load vector (64 bits)
14635 instruct loadV8(vecD dst, vmem8 mem)
14636 %{
14637   predicate(n->as_LoadVector()->memory_size() == 8);
14638   match(Set dst (LoadVector mem));
14639   ins_cost(4 * INSN_COST);
14640   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
14641   ins_encode( aarch64_enc_ldrvD(dst, mem) );
14642   ins_pipe(vload_reg_mem64);
14643 %}
14644 
14645 // Load Vector (128 bits)
14646 instruct loadV16(vecX dst, vmem16 mem)
14647 %{
14648   predicate(n->as_LoadVector()->memory_size() == 16);
14649   match(Set dst (LoadVector mem));
14650   ins_cost(4 * INSN_COST);
14651   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
14652   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
14653   ins_pipe(vload_reg_mem128);
14654 %}
14655 
14656 // Store Vector (32 bits)
14657 instruct storeV4(vecD src, vmem4 mem)
14658 %{
14659   predicate(n->as_StoreVector()->memory_size() == 4);
14660   match(Set mem (StoreVector mem src));
14661   ins_cost(4 * INSN_COST);
14662   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
14663   ins_encode( aarch64_enc_strvS(src, mem) );
14664   ins_pipe(vstore_reg_mem64);
14665 %}
14666 
14667 // Store Vector (64 bits)
14668 instruct storeV8(vecD src, vmem8 mem)
14669 %{
14670   predicate(n->as_StoreVector()->memory_size() == 8);
14671   match(Set mem (StoreVector mem src));
14672   ins_cost(4 * INSN_COST);
14673   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
14674   ins_encode( aarch64_enc_strvD(src, mem) );
14675   ins_pipe(vstore_reg_mem64);
14676 %}
14677 
14678 // Store Vector (128 bits)
14679 instruct storeV16(vecX src, vmem16 mem)
14680 %{
14681   predicate(n->as_StoreVector()->memory_size() == 16);
14682   match(Set mem (StoreVector mem src));
14683   ins_cost(4 * INSN_COST);
14684   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
14685   ins_encode( aarch64_enc_strvQ(src, mem) );
14686   ins_pipe(vstore_reg_mem128);
14687 %}
14688 
14689 instruct replicate8B(vecD dst, iRegIorL2I src)
14690 %{
14691   predicate(n->as_Vector()->length() == 4 ||
14692             n->as_Vector()->length() == 8);
14693   match(Set dst (ReplicateB src));
14694   ins_cost(INSN_COST);
14695   format %{ "dup  $dst, $src\t# vector (8B)" %}
14696   ins_encode %{
14697     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
14698   %}
14699   ins_pipe(vdup_reg_reg64);
14700 %}
14701 
14702 instruct replicate16B(vecX dst, iRegIorL2I src)
14703 %{
14704   predicate(n->as_Vector()->length() == 16);
14705   match(Set dst (ReplicateB src));
14706   ins_cost(INSN_COST);
14707   format %{ "dup  $dst, $src\t# vector (16B)" %}
14708   ins_encode %{
14709     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
14710   %}
14711   ins_pipe(vdup_reg_reg128);
14712 %}
14713 
14714 instruct replicate8B_imm(vecD dst, immI con)
14715 %{
14716   predicate(n->as_Vector()->length() == 4 ||
14717             n->as_Vector()->length() == 8);
14718   match(Set dst (ReplicateB con));
14719   ins_cost(INSN_COST);
14720   format %{ "movi  $dst, $con\t# vector(8B)" %}
14721   ins_encode %{
14722     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
14723   %}
14724   ins_pipe(vmovi_reg_imm64);
14725 %}
14726 
14727 instruct replicate16B_imm(vecX dst, immI con)
14728 %{
14729   predicate(n->as_Vector()->length() == 16);
14730   match(Set dst (ReplicateB con));
14731   ins_cost(INSN_COST);
14732   format %{ "movi  $dst, $con\t# vector(16B)" %}
14733   ins_encode %{
14734     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
14735   %}
14736   ins_pipe(vmovi_reg_imm128);
14737 %}
14738 
14739 instruct replicate4S(vecD dst, iRegIorL2I src)
14740 %{
14741   predicate(n->as_Vector()->length() == 2 ||
14742             n->as_Vector()->length() == 4);
14743   match(Set dst (ReplicateS src));
14744   ins_cost(INSN_COST);
14745   format %{ "dup  $dst, $src\t# vector (4S)" %}
14746   ins_encode %{
14747     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
14748   %}
14749   ins_pipe(vdup_reg_reg64);
14750 %}
14751 
14752 instruct replicate8S(vecX dst, iRegIorL2I src)
14753 %{
14754   predicate(n->as_Vector()->length() == 8);
14755   match(Set dst (ReplicateS src));
14756   ins_cost(INSN_COST);
14757   format %{ "dup  $dst, $src\t# vector (8S)" %}
14758   ins_encode %{
14759     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
14760   %}
14761   ins_pipe(vdup_reg_reg128);
14762 %}
14763 
14764 instruct replicate4S_imm(vecD dst, immI con)
14765 %{
14766   predicate(n->as_Vector()->length() == 2 ||
14767             n->as_Vector()->length() == 4);
14768   match(Set dst (ReplicateS con));
14769   ins_cost(INSN_COST);
14770   format %{ "movi  $dst, $con\t# vector(4H)" %}
14771   ins_encode %{
14772     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
14773   %}
14774   ins_pipe(vmovi_reg_imm64);
14775 %}
14776 
14777 instruct replicate8S_imm(vecX dst, immI con)
14778 %{
14779   predicate(n->as_Vector()->length() == 8);
14780   match(Set dst (ReplicateS con));
14781   ins_cost(INSN_COST);
14782   format %{ "movi  $dst, $con\t# vector(8H)" %}
14783   ins_encode %{
14784     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
14785   %}
14786   ins_pipe(vmovi_reg_imm128);
14787 %}
14788 
14789 instruct replicate2I(vecD dst, iRegIorL2I src)
14790 %{
14791   predicate(n->as_Vector()->length() == 2);
14792   match(Set dst (ReplicateI src));
14793   ins_cost(INSN_COST);
14794   format %{ "dup  $dst, $src\t# vector (2I)" %}
14795   ins_encode %{
14796     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
14797   %}
14798   ins_pipe(vdup_reg_reg64);
14799 %}
14800 
14801 instruct replicate4I(vecX dst, iRegIorL2I src)
14802 %{
14803   predicate(n->as_Vector()->length() == 4);
14804   match(Set dst (ReplicateI src));
14805   ins_cost(INSN_COST);
14806   format %{ "dup  $dst, $src\t# vector (4I)" %}
14807   ins_encode %{
14808     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
14809   %}
14810   ins_pipe(vdup_reg_reg128);
14811 %}
14812 
14813 instruct replicate2I_imm(vecD dst, immI con)
14814 %{
14815   predicate(n->as_Vector()->length() == 2);
14816   match(Set dst (ReplicateI con));
14817   ins_cost(INSN_COST);
14818   format %{ "movi  $dst, $con\t# vector(2I)" %}
14819   ins_encode %{
14820     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
14821   %}
14822   ins_pipe(vmovi_reg_imm64);
14823 %}
14824 
14825 instruct replicate4I_imm(vecX dst, immI con)
14826 %{
14827   predicate(n->as_Vector()->length() == 4);
14828   match(Set dst (ReplicateI con));
14829   ins_cost(INSN_COST);
14830   format %{ "movi  $dst, $con\t# vector(4I)" %}
14831   ins_encode %{
14832     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
14833   %}
14834   ins_pipe(vmovi_reg_imm128);
14835 %}
14836 
14837 instruct replicate2L(vecX dst, iRegL src)
14838 %{
14839   predicate(n->as_Vector()->length() == 2);
14840   match(Set dst (ReplicateL src));
14841   ins_cost(INSN_COST);
14842   format %{ "dup  $dst, $src\t# vector (2L)" %}
14843   ins_encode %{
14844     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
14845   %}
14846   ins_pipe(vdup_reg_reg128);
14847 %}
14848 
14849 instruct replicate2L_zero(vecX dst, immI0 zero)
14850 %{
14851   predicate(n->as_Vector()->length() == 2);
14852   match(Set dst (ReplicateI zero));
14853   ins_cost(INSN_COST);
14854   format %{ "movi  $dst, $zero\t# vector(4I)" %}
14855   ins_encode %{
14856     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14857            as_FloatRegister($dst$$reg),
14858            as_FloatRegister($dst$$reg));
14859   %}
14860   ins_pipe(vmovi_reg_imm128);
14861 %}
14862 
14863 instruct replicate2F(vecD dst, vRegF src)
14864 %{
14865   predicate(n->as_Vector()->length() == 2);
14866   match(Set dst (ReplicateF src));
14867   ins_cost(INSN_COST);
14868   format %{ "dup  $dst, $src\t# vector (2F)" %}
14869   ins_encode %{
14870     __ dup(as_FloatRegister($dst$$reg), __ T2S,
14871            as_FloatRegister($src$$reg));
14872   %}
14873   ins_pipe(vdup_reg_freg64);
14874 %}
14875 
14876 instruct replicate4F(vecX dst, vRegF src)
14877 %{
14878   predicate(n->as_Vector()->length() == 4);
14879   match(Set dst (ReplicateF src));
14880   ins_cost(INSN_COST);
14881   format %{ "dup  $dst, $src\t# vector (4F)" %}
14882   ins_encode %{
14883     __ dup(as_FloatRegister($dst$$reg), __ T4S,
14884            as_FloatRegister($src$$reg));
14885   %}
14886   ins_pipe(vdup_reg_freg128);
14887 %}
14888 
14889 instruct replicate2D(vecX dst, vRegD src)
14890 %{
14891   predicate(n->as_Vector()->length() == 2);
14892   match(Set dst (ReplicateD src));
14893   ins_cost(INSN_COST);
14894   format %{ "dup  $dst, $src\t# vector (2D)" %}
14895   ins_encode %{
14896     __ dup(as_FloatRegister($dst$$reg), __ T2D,
14897            as_FloatRegister($src$$reg));
14898   %}
14899   ins_pipe(vdup_reg_dreg128);
14900 %}
14901 
14902 // ====================REDUCTION ARITHMETIC====================================
14903 
14904 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
14905 %{
14906   match(Set dst (AddReductionVI src1 src2));
14907   ins_cost(INSN_COST);
14908   effect(TEMP tmp, TEMP tmp2);
14909   format %{ "umov  $tmp, $src2, S, 0\n\t"
14910             "umov  $tmp2, $src2, S, 1\n\t"
14911             "addw  $dst, $src1, $tmp\n\t"
14912             "addw  $dst, $dst, $tmp2\t add reduction2i"
14913   %}
14914   ins_encode %{
14915     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
14916     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
14917     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
14918     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
14919   %}
14920   ins_pipe(pipe_class_default);
14921 %}
14922 
14923 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
14924 %{
14925   match(Set dst (AddReductionVI src1 src2));
14926   ins_cost(INSN_COST);
14927   effect(TEMP tmp, TEMP tmp2);
14928   format %{ "addv  $tmp, T4S, $src2\n\t"
14929             "umov  $tmp2, $tmp, S, 0\n\t"
14930             "addw  $dst, $tmp2, $src1\t add reduction4i"
14931   %}
14932   ins_encode %{
14933     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
14934             as_FloatRegister($src2$$reg));
14935     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
14936     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
14937   %}
14938   ins_pipe(pipe_class_default);
14939 %}
14940 
14941 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
14942 %{
14943   match(Set dst (MulReductionVI src1 src2));
14944   ins_cost(INSN_COST);
14945   effect(TEMP tmp, TEMP dst);
14946   format %{ "umov  $tmp, $src2, S, 0\n\t"
14947             "mul   $dst, $tmp, $src1\n\t"
14948             "umov  $tmp, $src2, S, 1\n\t"
14949             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
14950   %}
14951   ins_encode %{
14952     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
14953     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
14954     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
14955     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
14956   %}
14957   ins_pipe(pipe_class_default);
14958 %}
14959 
14960 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
14961 %{
14962   match(Set dst (MulReductionVI src1 src2));
14963   ins_cost(INSN_COST);
14964   effect(TEMP tmp, TEMP tmp2, TEMP dst);
14965   format %{ "ins   $tmp, $src2, 0, 1\n\t"
14966             "mul   $tmp, $tmp, $src2\n\t"
14967             "umov  $tmp2, $tmp, S, 0\n\t"
14968             "mul   $dst, $tmp2, $src1\n\t"
14969             "umov  $tmp2, $tmp, S, 1\n\t"
14970             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
14971   %}
14972   ins_encode %{
14973     __ ins(as_FloatRegister($tmp$$reg), __ D,
14974            as_FloatRegister($src2$$reg), 0, 1);
14975     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
14976            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
14977     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
14978     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
14979     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
14980     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
14981   %}
14982   ins_pipe(pipe_class_default);
14983 %}
14984 
14985 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
14986 %{
14987   match(Set dst (AddReductionVF src1 src2));
14988   ins_cost(INSN_COST);
14989   effect(TEMP tmp, TEMP dst);
14990   format %{ "fadds $dst, $src1, $src2\n\t"
14991             "ins   $tmp, S, $src2, 0, 1\n\t"
14992             "fadds $dst, $dst, $tmp\t add reduction2f"
14993   %}
14994   ins_encode %{
14995     __ fadds(as_FloatRegister($dst$$reg),
14996              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14997     __ ins(as_FloatRegister($tmp$$reg), __ S,
14998            as_FloatRegister($src2$$reg), 0, 1);
14999     __ fadds(as_FloatRegister($dst$$reg),
15000              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15001   %}
15002   ins_pipe(pipe_class_default);
15003 %}
15004 
15005 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15006 %{
15007   match(Set dst (AddReductionVF src1 src2));
15008   ins_cost(INSN_COST);
15009   effect(TEMP tmp, TEMP dst);
15010   format %{ "fadds $dst, $src1, $src2\n\t"
15011             "ins   $tmp, S, $src2, 0, 1\n\t"
15012             "fadds $dst, $dst, $tmp\n\t"
15013             "ins   $tmp, S, $src2, 0, 2\n\t"
15014             "fadds $dst, $dst, $tmp\n\t"
15015             "ins   $tmp, S, $src2, 0, 3\n\t"
15016             "fadds $dst, $dst, $tmp\t add reduction4f"
15017   %}
15018   ins_encode %{
15019     __ fadds(as_FloatRegister($dst$$reg),
15020              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15021     __ ins(as_FloatRegister($tmp$$reg), __ S,
15022            as_FloatRegister($src2$$reg), 0, 1);
15023     __ fadds(as_FloatRegister($dst$$reg),
15024              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15025     __ ins(as_FloatRegister($tmp$$reg), __ S,
15026            as_FloatRegister($src2$$reg), 0, 2);
15027     __ fadds(as_FloatRegister($dst$$reg),
15028              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15029     __ ins(as_FloatRegister($tmp$$reg), __ S,
15030            as_FloatRegister($src2$$reg), 0, 3);
15031     __ fadds(as_FloatRegister($dst$$reg),
15032              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15033   %}
15034   ins_pipe(pipe_class_default);
15035 %}
15036 
15037 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
15038 %{
15039   match(Set dst (MulReductionVF src1 src2));
15040   ins_cost(INSN_COST);
15041   effect(TEMP tmp, TEMP dst);
15042   format %{ "fmuls $dst, $src1, $src2\n\t"
15043             "ins   $tmp, S, $src2, 0, 1\n\t"
15044             "fmuls $dst, $dst, $tmp\t add reduction4f"
15045   %}
15046   ins_encode %{
15047     __ fmuls(as_FloatRegister($dst$$reg),
15048              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15049     __ ins(as_FloatRegister($tmp$$reg), __ S,
15050            as_FloatRegister($src2$$reg), 0, 1);
15051     __ fmuls(as_FloatRegister($dst$$reg),
15052              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15053   %}
15054   ins_pipe(pipe_class_default);
15055 %}
15056 
15057 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15058 %{
15059   match(Set dst (MulReductionVF src1 src2));
15060   ins_cost(INSN_COST);
15061   effect(TEMP tmp, TEMP dst);
15062   format %{ "fmuls $dst, $src1, $src2\n\t"
15063             "ins   $tmp, S, $src2, 0, 1\n\t"
15064             "fmuls $dst, $dst, $tmp\n\t"
15065             "ins   $tmp, S, $src2, 0, 2\n\t"
15066             "fmuls $dst, $dst, $tmp\n\t"
15067             "ins   $tmp, S, $src2, 0, 3\n\t"
15068             "fmuls $dst, $dst, $tmp\t add reduction4f"
15069   %}
15070   ins_encode %{
15071     __ fmuls(as_FloatRegister($dst$$reg),
15072              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15073     __ ins(as_FloatRegister($tmp$$reg), __ S,
15074            as_FloatRegister($src2$$reg), 0, 1);
15075     __ fmuls(as_FloatRegister($dst$$reg),
15076              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15077     __ ins(as_FloatRegister($tmp$$reg), __ S,
15078            as_FloatRegister($src2$$reg), 0, 2);
15079     __ fmuls(as_FloatRegister($dst$$reg),
15080              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15081     __ ins(as_FloatRegister($tmp$$reg), __ S,
15082            as_FloatRegister($src2$$reg), 0, 3);
15083     __ fmuls(as_FloatRegister($dst$$reg),
15084              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15085   %}
15086   ins_pipe(pipe_class_default);
15087 %}
15088 
15089 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15090 %{
15091   match(Set dst (AddReductionVD src1 src2));
15092   ins_cost(INSN_COST);
15093   effect(TEMP tmp, TEMP dst);
15094   format %{ "faddd $dst, $src1, $src2\n\t"
15095             "ins   $tmp, D, $src2, 0, 1\n\t"
15096             "faddd $dst, $dst, $tmp\t add reduction2d"
15097   %}
15098   ins_encode %{
15099     __ faddd(as_FloatRegister($dst$$reg),
15100              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15101     __ ins(as_FloatRegister($tmp$$reg), __ D,
15102            as_FloatRegister($src2$$reg), 0, 1);
15103     __ faddd(as_FloatRegister($dst$$reg),
15104              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15105   %}
15106   ins_pipe(pipe_class_default);
15107 %}
15108 
15109 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15110 %{
15111   match(Set dst (MulReductionVD src1 src2));
15112   ins_cost(INSN_COST);
15113   effect(TEMP tmp, TEMP dst);
15114   format %{ "fmuld $dst, $src1, $src2\n\t"
15115             "ins   $tmp, D, $src2, 0, 1\n\t"
15116             "fmuld $dst, $dst, $tmp\t add reduction2d"
15117   %}
15118   ins_encode %{
15119     __ fmuld(as_FloatRegister($dst$$reg),
15120              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15121     __ ins(as_FloatRegister($tmp$$reg), __ D,
15122            as_FloatRegister($src2$$reg), 0, 1);
15123     __ fmuld(as_FloatRegister($dst$$reg),
15124              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15125   %}
15126   ins_pipe(pipe_class_default);
15127 %}
15128 
15129 // ====================VECTOR ARITHMETIC=======================================
15130 
15131 // --------------------------------- ADD --------------------------------------
15132 
15133 instruct vadd8B(vecD dst, vecD src1, vecD src2)
15134 %{
15135   predicate(n->as_Vector()->length() == 4 ||
15136             n->as_Vector()->length() == 8);
15137   match(Set dst (AddVB src1 src2));
15138   ins_cost(INSN_COST);
15139   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
15140   ins_encode %{
15141     __ addv(as_FloatRegister($dst$$reg), __ T8B,
15142             as_FloatRegister($src1$$reg),
15143             as_FloatRegister($src2$$reg));
15144   %}
15145   ins_pipe(vdop64);
15146 %}
15147 
15148 instruct vadd16B(vecX dst, vecX src1, vecX src2)
15149 %{
15150   predicate(n->as_Vector()->length() == 16);
15151   match(Set dst (AddVB src1 src2));
15152   ins_cost(INSN_COST);
15153   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
15154   ins_encode %{
15155     __ addv(as_FloatRegister($dst$$reg), __ T16B,
15156             as_FloatRegister($src1$$reg),
15157             as_FloatRegister($src2$$reg));
15158   %}
15159   ins_pipe(vdop128);
15160 %}
15161 
15162 instruct vadd4S(vecD dst, vecD src1, vecD src2)
15163 %{
15164   predicate(n->as_Vector()->length() == 2 ||
15165             n->as_Vector()->length() == 4);
15166   match(Set dst (AddVS src1 src2));
15167   ins_cost(INSN_COST);
15168   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
15169   ins_encode %{
15170     __ addv(as_FloatRegister($dst$$reg), __ T4H,
15171             as_FloatRegister($src1$$reg),
15172             as_FloatRegister($src2$$reg));
15173   %}
15174   ins_pipe(vdop64);
15175 %}
15176 
15177 instruct vadd8S(vecX dst, vecX src1, vecX src2)
15178 %{
15179   predicate(n->as_Vector()->length() == 8);
15180   match(Set dst (AddVS src1 src2));
15181   ins_cost(INSN_COST);
15182   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
15183   ins_encode %{
15184     __ addv(as_FloatRegister($dst$$reg), __ T8H,
15185             as_FloatRegister($src1$$reg),
15186             as_FloatRegister($src2$$reg));
15187   %}
15188   ins_pipe(vdop128);
15189 %}
15190 
15191 instruct vadd2I(vecD dst, vecD src1, vecD src2)
15192 %{
15193   predicate(n->as_Vector()->length() == 2);
15194   match(Set dst (AddVI src1 src2));
15195   ins_cost(INSN_COST);
15196   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
15197   ins_encode %{
15198     __ addv(as_FloatRegister($dst$$reg), __ T2S,
15199             as_FloatRegister($src1$$reg),
15200             as_FloatRegister($src2$$reg));
15201   %}
15202   ins_pipe(vdop64);
15203 %}
15204 
15205 instruct vadd4I(vecX dst, vecX src1, vecX src2)
15206 %{
15207   predicate(n->as_Vector()->length() == 4);
15208   match(Set dst (AddVI src1 src2));
15209   ins_cost(INSN_COST);
15210   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
15211   ins_encode %{
15212     __ addv(as_FloatRegister($dst$$reg), __ T4S,
15213             as_FloatRegister($src1$$reg),
15214             as_FloatRegister($src2$$reg));
15215   %}
15216   ins_pipe(vdop128);
15217 %}
15218 
15219 instruct vadd2L(vecX dst, vecX src1, vecX src2)
15220 %{
15221   predicate(n->as_Vector()->length() == 2);
15222   match(Set dst (AddVL src1 src2));
15223   ins_cost(INSN_COST);
15224   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
15225   ins_encode %{
15226     __ addv(as_FloatRegister($dst$$reg), __ T2D,
15227             as_FloatRegister($src1$$reg),
15228             as_FloatRegister($src2$$reg));
15229   %}
15230   ins_pipe(vdop128);
15231 %}
15232 
15233 instruct vadd2F(vecD dst, vecD src1, vecD src2)
15234 %{
15235   predicate(n->as_Vector()->length() == 2);
15236   match(Set dst (AddVF src1 src2));
15237   ins_cost(INSN_COST);
15238   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
15239   ins_encode %{
15240     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
15241             as_FloatRegister($src1$$reg),
15242             as_FloatRegister($src2$$reg));
15243   %}
15244   ins_pipe(vdop_fp64);
15245 %}
15246 
15247 instruct vadd4F(vecX dst, vecX src1, vecX src2)
15248 %{
15249   predicate(n->as_Vector()->length() == 4);
15250   match(Set dst (AddVF src1 src2));
15251   ins_cost(INSN_COST);
15252   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
15253   ins_encode %{
15254     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
15255             as_FloatRegister($src1$$reg),
15256             as_FloatRegister($src2$$reg));
15257   %}
15258   ins_pipe(vdop_fp128);
15259 %}
15260 
15261 instruct vadd2D(vecX dst, vecX src1, vecX src2)
15262 %{
15263   match(Set dst (AddVD src1 src2));
15264   ins_cost(INSN_COST);
15265   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
15266   ins_encode %{
15267     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
15268             as_FloatRegister($src1$$reg),
15269             as_FloatRegister($src2$$reg));
15270   %}
15271   ins_pipe(vdop_fp128);
15272 %}
15273 
15274 // --------------------------------- SUB --------------------------------------
15275 
15276 instruct vsub8B(vecD dst, vecD src1, vecD src2)
15277 %{
15278   predicate(n->as_Vector()->length() == 4 ||
15279             n->as_Vector()->length() == 8);
15280   match(Set dst (SubVB src1 src2));
15281   ins_cost(INSN_COST);
15282   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
15283   ins_encode %{
15284     __ subv(as_FloatRegister($dst$$reg), __ T8B,
15285             as_FloatRegister($src1$$reg),
15286             as_FloatRegister($src2$$reg));
15287   %}
15288   ins_pipe(vdop64);
15289 %}
15290 
15291 instruct vsub16B(vecX dst, vecX src1, vecX src2)
15292 %{
15293   predicate(n->as_Vector()->length() == 16);
15294   match(Set dst (SubVB src1 src2));
15295   ins_cost(INSN_COST);
15296   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
15297   ins_encode %{
15298     __ subv(as_FloatRegister($dst$$reg), __ T16B,
15299             as_FloatRegister($src1$$reg),
15300             as_FloatRegister($src2$$reg));
15301   %}
15302   ins_pipe(vdop128);
15303 %}
15304 
15305 instruct vsub4S(vecD dst, vecD src1, vecD src2)
15306 %{
15307   predicate(n->as_Vector()->length() == 2 ||
15308             n->as_Vector()->length() == 4);
15309   match(Set dst (SubVS src1 src2));
15310   ins_cost(INSN_COST);
15311   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
15312   ins_encode %{
15313     __ subv(as_FloatRegister($dst$$reg), __ T4H,
15314             as_FloatRegister($src1$$reg),
15315             as_FloatRegister($src2$$reg));
15316   %}
15317   ins_pipe(vdop64);
15318 %}
15319 
15320 instruct vsub8S(vecX dst, vecX src1, vecX src2)
15321 %{
15322   predicate(n->as_Vector()->length() == 8);
15323   match(Set dst (SubVS src1 src2));
15324   ins_cost(INSN_COST);
15325   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
15326   ins_encode %{
15327     __ subv(as_FloatRegister($dst$$reg), __ T8H,
15328             as_FloatRegister($src1$$reg),
15329             as_FloatRegister($src2$$reg));
15330   %}
15331   ins_pipe(vdop128);
15332 %}
15333 
15334 instruct vsub2I(vecD dst, vecD src1, vecD src2)
15335 %{
15336   predicate(n->as_Vector()->length() == 2);
15337   match(Set dst (SubVI src1 src2));
15338   ins_cost(INSN_COST);
15339   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
15340   ins_encode %{
15341     __ subv(as_FloatRegister($dst$$reg), __ T2S,
15342             as_FloatRegister($src1$$reg),
15343             as_FloatRegister($src2$$reg));
15344   %}
15345   ins_pipe(vdop64);
15346 %}
15347 
15348 instruct vsub4I(vecX dst, vecX src1, vecX src2)
15349 %{
15350   predicate(n->as_Vector()->length() == 4);
15351   match(Set dst (SubVI src1 src2));
15352   ins_cost(INSN_COST);
15353   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
15354   ins_encode %{
15355     __ subv(as_FloatRegister($dst$$reg), __ T4S,
15356             as_FloatRegister($src1$$reg),
15357             as_FloatRegister($src2$$reg));
15358   %}
15359   ins_pipe(vdop128);
15360 %}
15361 
15362 instruct vsub2L(vecX dst, vecX src1, vecX src2)
15363 %{
15364   predicate(n->as_Vector()->length() == 2);
15365   match(Set dst (SubVL src1 src2));
15366   ins_cost(INSN_COST);
15367   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
15368   ins_encode %{
15369     __ subv(as_FloatRegister($dst$$reg), __ T2D,
15370             as_FloatRegister($src1$$reg),
15371             as_FloatRegister($src2$$reg));
15372   %}
15373   ins_pipe(vdop128);
15374 %}
15375 
15376 instruct vsub2F(vecD dst, vecD src1, vecD src2)
15377 %{
15378   predicate(n->as_Vector()->length() == 2);
15379   match(Set dst (SubVF src1 src2));
15380   ins_cost(INSN_COST);
15381   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
15382   ins_encode %{
15383     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
15384             as_FloatRegister($src1$$reg),
15385             as_FloatRegister($src2$$reg));
15386   %}
15387   ins_pipe(vdop_fp64);
15388 %}
15389 
15390 instruct vsub4F(vecX dst, vecX src1, vecX src2)
15391 %{
15392   predicate(n->as_Vector()->length() == 4);
15393   match(Set dst (SubVF src1 src2));
15394   ins_cost(INSN_COST);
15395   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
15396   ins_encode %{
15397     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
15398             as_FloatRegister($src1$$reg),
15399             as_FloatRegister($src2$$reg));
15400   %}
15401   ins_pipe(vdop_fp128);
15402 %}
15403 
15404 instruct vsub2D(vecX dst, vecX src1, vecX src2)
15405 %{
15406   predicate(n->as_Vector()->length() == 2);
15407   match(Set dst (SubVD src1 src2));
15408   ins_cost(INSN_COST);
15409   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
15410   ins_encode %{
15411     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
15412             as_FloatRegister($src1$$reg),
15413             as_FloatRegister($src2$$reg));
15414   %}
15415   ins_pipe(vdop_fp128);
15416 %}
15417 
15418 // --------------------------------- MUL --------------------------------------
15419 
15420 instruct vmul4S(vecD dst, vecD src1, vecD src2)
15421 %{
15422   predicate(n->as_Vector()->length() == 2 ||
15423             n->as_Vector()->length() == 4);
15424   match(Set dst (MulVS src1 src2));
15425   ins_cost(INSN_COST);
15426   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
15427   ins_encode %{
15428     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
15429             as_FloatRegister($src1$$reg),
15430             as_FloatRegister($src2$$reg));
15431   %}
15432   ins_pipe(vmul64);
15433 %}
15434 
15435 instruct vmul8S(vecX dst, vecX src1, vecX src2)
15436 %{
15437   predicate(n->as_Vector()->length() == 8);
15438   match(Set dst (MulVS src1 src2));
15439   ins_cost(INSN_COST);
15440   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
15441   ins_encode %{
15442     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
15443             as_FloatRegister($src1$$reg),
15444             as_FloatRegister($src2$$reg));
15445   %}
15446   ins_pipe(vmul128);
15447 %}
15448 
15449 instruct vmul2I(vecD dst, vecD src1, vecD src2)
15450 %{
15451   predicate(n->as_Vector()->length() == 2);
15452   match(Set dst (MulVI src1 src2));
15453   ins_cost(INSN_COST);
15454   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
15455   ins_encode %{
15456     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
15457             as_FloatRegister($src1$$reg),
15458             as_FloatRegister($src2$$reg));
15459   %}
15460   ins_pipe(vmul64);
15461 %}
15462 
15463 instruct vmul4I(vecX dst, vecX src1, vecX src2)
15464 %{
15465   predicate(n->as_Vector()->length() == 4);
15466   match(Set dst (MulVI src1 src2));
15467   ins_cost(INSN_COST);
15468   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
15469   ins_encode %{
15470     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
15471             as_FloatRegister($src1$$reg),
15472             as_FloatRegister($src2$$reg));
15473   %}
15474   ins_pipe(vmul128);
15475 %}
15476 
15477 instruct vmul2F(vecD dst, vecD src1, vecD src2)
15478 %{
15479   predicate(n->as_Vector()->length() == 2);
15480   match(Set dst (MulVF src1 src2));
15481   ins_cost(INSN_COST);
15482   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
15483   ins_encode %{
15484     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
15485             as_FloatRegister($src1$$reg),
15486             as_FloatRegister($src2$$reg));
15487   %}
15488   ins_pipe(vmuldiv_fp64);
15489 %}
15490 
15491 instruct vmul4F(vecX dst, vecX src1, vecX src2)
15492 %{
15493   predicate(n->as_Vector()->length() == 4);
15494   match(Set dst (MulVF src1 src2));
15495   ins_cost(INSN_COST);
15496   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
15497   ins_encode %{
15498     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
15499             as_FloatRegister($src1$$reg),
15500             as_FloatRegister($src2$$reg));
15501   %}
15502   ins_pipe(vmuldiv_fp128);
15503 %}
15504 
15505 instruct vmul2D(vecX dst, vecX src1, vecX src2)
15506 %{
15507   predicate(n->as_Vector()->length() == 2);
15508   match(Set dst (MulVD src1 src2));
15509   ins_cost(INSN_COST);
15510   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
15511   ins_encode %{
15512     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
15513             as_FloatRegister($src1$$reg),
15514             as_FloatRegister($src2$$reg));
15515   %}
15516   ins_pipe(vmuldiv_fp128);
15517 %}
15518 
15519 // --------------------------------- MLA --------------------------------------
15520 
15521 instruct vmla4S(vecD dst, vecD src1, vecD src2)
15522 %{
15523   predicate(n->as_Vector()->length() == 2 ||
15524             n->as_Vector()->length() == 4);
15525   match(Set dst (AddVS dst (MulVS src1 src2)));
15526   ins_cost(INSN_COST);
15527   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
15528   ins_encode %{
15529     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
15530             as_FloatRegister($src1$$reg),
15531             as_FloatRegister($src2$$reg));
15532   %}
15533   ins_pipe(vmla64);
15534 %}
15535 
15536 instruct vmla8S(vecX dst, vecX src1, vecX src2)
15537 %{
15538   predicate(n->as_Vector()->length() == 8);
15539   match(Set dst (AddVS dst (MulVS src1 src2)));
15540   ins_cost(INSN_COST);
15541   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
15542   ins_encode %{
15543     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
15544             as_FloatRegister($src1$$reg),
15545             as_FloatRegister($src2$$reg));
15546   %}
15547   ins_pipe(vmla128);
15548 %}
15549 
15550 instruct vmla2I(vecD dst, vecD src1, vecD src2)
15551 %{
15552   predicate(n->as_Vector()->length() == 2);
15553   match(Set dst (AddVI dst (MulVI src1 src2)));
15554   ins_cost(INSN_COST);
15555   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
15556   ins_encode %{
15557     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
15558             as_FloatRegister($src1$$reg),
15559             as_FloatRegister($src2$$reg));
15560   %}
15561   ins_pipe(vmla64);
15562 %}
15563 
15564 instruct vmla4I(vecX dst, vecX src1, vecX src2)
15565 %{
15566   predicate(n->as_Vector()->length() == 4);
15567   match(Set dst (AddVI dst (MulVI src1 src2)));
15568   ins_cost(INSN_COST);
15569   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
15570   ins_encode %{
15571     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
15572             as_FloatRegister($src1$$reg),
15573             as_FloatRegister($src2$$reg));
15574   %}
15575   ins_pipe(vmla128);
15576 %}
15577 
15578 // dst + src1 * src2
15579 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
15580   predicate(UseFMA && n->as_Vector()->length() == 2);
15581   match(Set dst (FmaVF  dst (Binary src1 src2)));
15582   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
15583   ins_cost(INSN_COST);
15584   ins_encode %{
15585     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
15586             as_FloatRegister($src1$$reg),
15587             as_FloatRegister($src2$$reg));
15588   %}
15589   ins_pipe(vmuldiv_fp64);
15590 %}
15591 
15592 // dst + src1 * src2
15593 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
15594   predicate(UseFMA && n->as_Vector()->length() == 4);
15595   match(Set dst (FmaVF  dst (Binary src1 src2)));
15596   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
15597   ins_cost(INSN_COST);
15598   ins_encode %{
15599     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
15600             as_FloatRegister($src1$$reg),
15601             as_FloatRegister($src2$$reg));
15602   %}
15603   ins_pipe(vmuldiv_fp128);
15604 %}
15605 
15606 // dst + src1 * src2
15607 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
15608   predicate(UseFMA && n->as_Vector()->length() == 2);
15609   match(Set dst (FmaVD  dst (Binary src1 src2)));
15610   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
15611   ins_cost(INSN_COST);
15612   ins_encode %{
15613     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
15614             as_FloatRegister($src1$$reg),
15615             as_FloatRegister($src2$$reg));
15616   %}
15617   ins_pipe(vmuldiv_fp128);
15618 %}
15619 
15620 // --------------------------------- MLS --------------------------------------
15621 
15622 instruct vmls4S(vecD dst, vecD src1, vecD src2)
15623 %{
15624   predicate(n->as_Vector()->length() == 2 ||
15625             n->as_Vector()->length() == 4);
15626   match(Set dst (SubVS dst (MulVS src1 src2)));
15627   ins_cost(INSN_COST);
15628   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
15629   ins_encode %{
15630     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
15631             as_FloatRegister($src1$$reg),
15632             as_FloatRegister($src2$$reg));
15633   %}
15634   ins_pipe(vmla64);
15635 %}
15636 
15637 instruct vmls8S(vecX dst, vecX src1, vecX src2)
15638 %{
15639   predicate(n->as_Vector()->length() == 8);
15640   match(Set dst (SubVS dst (MulVS src1 src2)));
15641   ins_cost(INSN_COST);
15642   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
15643   ins_encode %{
15644     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
15645             as_FloatRegister($src1$$reg),
15646             as_FloatRegister($src2$$reg));
15647   %}
15648   ins_pipe(vmla128);
15649 %}
15650 
15651 instruct vmls2I(vecD dst, vecD src1, vecD src2)
15652 %{
15653   predicate(n->as_Vector()->length() == 2);
15654   match(Set dst (SubVI dst (MulVI src1 src2)));
15655   ins_cost(INSN_COST);
15656   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
15657   ins_encode %{
15658     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
15659             as_FloatRegister($src1$$reg),
15660             as_FloatRegister($src2$$reg));
15661   %}
15662   ins_pipe(vmla64);
15663 %}
15664 
15665 instruct vmls4I(vecX dst, vecX src1, vecX src2)
15666 %{
15667   predicate(n->as_Vector()->length() == 4);
15668   match(Set dst (SubVI dst (MulVI src1 src2)));
15669   ins_cost(INSN_COST);
15670   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
15671   ins_encode %{
15672     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
15673             as_FloatRegister($src1$$reg),
15674             as_FloatRegister($src2$$reg));
15675   %}
15676   ins_pipe(vmla128);
15677 %}
15678 
15679 // dst - src1 * src2
15680 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
15681   predicate(UseFMA && n->as_Vector()->length() == 2);
15682   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
15683   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
15684   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
15685   ins_cost(INSN_COST);
15686   ins_encode %{
15687     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
15688             as_FloatRegister($src1$$reg),
15689             as_FloatRegister($src2$$reg));
15690   %}
15691   ins_pipe(vmuldiv_fp64);
15692 %}
15693 
15694 // dst - src1 * src2
15695 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
15696   predicate(UseFMA && n->as_Vector()->length() == 4);
15697   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
15698   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
15699   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
15700   ins_cost(INSN_COST);
15701   ins_encode %{
15702     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
15703             as_FloatRegister($src1$$reg),
15704             as_FloatRegister($src2$$reg));
15705   %}
15706   ins_pipe(vmuldiv_fp128);
15707 %}
15708 
15709 // dst - src1 * src2
15710 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
15711   predicate(UseFMA && n->as_Vector()->length() == 2);
15712   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
15713   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
15714   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
15715   ins_cost(INSN_COST);
15716   ins_encode %{
15717     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
15718             as_FloatRegister($src1$$reg),
15719             as_FloatRegister($src2$$reg));
15720   %}
15721   ins_pipe(vmuldiv_fp128);
15722 %}
15723 
15724 // --------------------------------- DIV --------------------------------------
15725 
15726 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
15727 %{
15728   predicate(n->as_Vector()->length() == 2);
15729   match(Set dst (DivVF src1 src2));
15730   ins_cost(INSN_COST);
15731   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
15732   ins_encode %{
15733     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
15734             as_FloatRegister($src1$$reg),
15735             as_FloatRegister($src2$$reg));
15736   %}
15737   ins_pipe(vmuldiv_fp64);
15738 %}
15739 
15740 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
15741 %{
15742   predicate(n->as_Vector()->length() == 4);
15743   match(Set dst (DivVF src1 src2));
15744   ins_cost(INSN_COST);
15745   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
15746   ins_encode %{
15747     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
15748             as_FloatRegister($src1$$reg),
15749             as_FloatRegister($src2$$reg));
15750   %}
15751   ins_pipe(vmuldiv_fp128);
15752 %}
15753 
15754 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
15755 %{
15756   predicate(n->as_Vector()->length() == 2);
15757   match(Set dst (DivVD src1 src2));
15758   ins_cost(INSN_COST);
15759   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
15760   ins_encode %{
15761     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
15762             as_FloatRegister($src1$$reg),
15763             as_FloatRegister($src2$$reg));
15764   %}
15765   ins_pipe(vmuldiv_fp128);
15766 %}
15767 
15768 // --------------------------------- SQRT -------------------------------------
15769 
15770 instruct vsqrt2D(vecX dst, vecX src)
15771 %{
15772   predicate(n->as_Vector()->length() == 2);
15773   match(Set dst (SqrtVD src));
15774   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
15775   ins_encode %{
15776     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
15777              as_FloatRegister($src$$reg));
15778   %}
15779   ins_pipe(vsqrt_fp128);
15780 %}
15781 
15782 // --------------------------------- ABS --------------------------------------
15783 
15784 instruct vabs2F(vecD dst, vecD src)
15785 %{
15786   predicate(n->as_Vector()->length() == 2);
15787   match(Set dst (AbsVF src));
15788   ins_cost(INSN_COST * 3);
15789   format %{ "fabs  $dst,$src\t# vector (2S)" %}
15790   ins_encode %{
15791     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
15792             as_FloatRegister($src$$reg));
15793   %}
15794   ins_pipe(vunop_fp64);
15795 %}
15796 
15797 instruct vabs4F(vecX dst, vecX src)
15798 %{
15799   predicate(n->as_Vector()->length() == 4);
15800   match(Set dst (AbsVF src));
15801   ins_cost(INSN_COST * 3);
15802   format %{ "fabs  $dst,$src\t# vector (4S)" %}
15803   ins_encode %{
15804     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
15805             as_FloatRegister($src$$reg));
15806   %}
15807   ins_pipe(vunop_fp128);
15808 %}
15809 
15810 instruct vabs2D(vecX dst, vecX src)
15811 %{
15812   predicate(n->as_Vector()->length() == 2);
15813   match(Set dst (AbsVD src));
15814   ins_cost(INSN_COST * 3);
15815   format %{ "fabs  $dst,$src\t# vector (2D)" %}
15816   ins_encode %{
15817     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
15818             as_FloatRegister($src$$reg));
15819   %}
15820   ins_pipe(vunop_fp128);
15821 %}
15822 
15823 // --------------------------------- NEG --------------------------------------
15824 
15825 instruct vneg2F(vecD dst, vecD src)
15826 %{
15827   predicate(n->as_Vector()->length() == 2);
15828   match(Set dst (NegVF src));
15829   ins_cost(INSN_COST * 3);
15830   format %{ "fneg  $dst,$src\t# vector (2S)" %}
15831   ins_encode %{
15832     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
15833             as_FloatRegister($src$$reg));
15834   %}
15835   ins_pipe(vunop_fp64);
15836 %}
15837 
15838 instruct vneg4F(vecX dst, vecX src)
15839 %{
15840   predicate(n->as_Vector()->length() == 4);
15841   match(Set dst (NegVF src));
15842   ins_cost(INSN_COST * 3);
15843   format %{ "fneg  $dst,$src\t# vector (4S)" %}
15844   ins_encode %{
15845     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
15846             as_FloatRegister($src$$reg));
15847   %}
15848   ins_pipe(vunop_fp128);
15849 %}
15850 
15851 instruct vneg2D(vecX dst, vecX src)
15852 %{
15853   predicate(n->as_Vector()->length() == 2);
15854   match(Set dst (NegVD src));
15855   ins_cost(INSN_COST * 3);
15856   format %{ "fneg  $dst,$src\t# vector (2D)" %}
15857   ins_encode %{
15858     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
15859             as_FloatRegister($src$$reg));
15860   %}
15861   ins_pipe(vunop_fp128);
15862 %}
15863 
15864 // --------------------------------- AND --------------------------------------
15865 
15866 instruct vand8B(vecD dst, vecD src1, vecD src2)
15867 %{
15868   predicate(n->as_Vector()->length_in_bytes() == 4 ||
15869             n->as_Vector()->length_in_bytes() == 8);
15870   match(Set dst (AndV src1 src2));
15871   ins_cost(INSN_COST);
15872   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
15873   ins_encode %{
15874     __ andr(as_FloatRegister($dst$$reg), __ T8B,
15875             as_FloatRegister($src1$$reg),
15876             as_FloatRegister($src2$$reg));
15877   %}
15878   ins_pipe(vlogical64);
15879 %}
15880 
15881 instruct vand16B(vecX dst, vecX src1, vecX src2)
15882 %{
15883   predicate(n->as_Vector()->length_in_bytes() == 16);
15884   match(Set dst (AndV src1 src2));
15885   ins_cost(INSN_COST);
15886   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
15887   ins_encode %{
15888     __ andr(as_FloatRegister($dst$$reg), __ T16B,
15889             as_FloatRegister($src1$$reg),
15890             as_FloatRegister($src2$$reg));
15891   %}
15892   ins_pipe(vlogical128);
15893 %}
15894 
15895 // --------------------------------- OR ---------------------------------------
15896 
15897 instruct vor8B(vecD dst, vecD src1, vecD src2)
15898 %{
15899   predicate(n->as_Vector()->length_in_bytes() == 4 ||
15900             n->as_Vector()->length_in_bytes() == 8);
15901   match(Set dst (OrV src1 src2));
15902   ins_cost(INSN_COST);
15903   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
15904   ins_encode %{
15905     __ orr(as_FloatRegister($dst$$reg), __ T8B,
15906             as_FloatRegister($src1$$reg),
15907             as_FloatRegister($src2$$reg));
15908   %}
15909   ins_pipe(vlogical64);
15910 %}
15911 
15912 instruct vor16B(vecX dst, vecX src1, vecX src2)
15913 %{
15914   predicate(n->as_Vector()->length_in_bytes() == 16);
15915   match(Set dst (OrV src1 src2));
15916   ins_cost(INSN_COST);
15917   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
15918   ins_encode %{
15919     __ orr(as_FloatRegister($dst$$reg), __ T16B,
15920             as_FloatRegister($src1$$reg),
15921             as_FloatRegister($src2$$reg));
15922   %}
15923   ins_pipe(vlogical128);
15924 %}
15925 
15926 // --------------------------------- XOR --------------------------------------
15927 
15928 instruct vxor8B(vecD dst, vecD src1, vecD src2)
15929 %{
15930   predicate(n->as_Vector()->length_in_bytes() == 4 ||
15931             n->as_Vector()->length_in_bytes() == 8);
15932   match(Set dst (XorV src1 src2));
15933   ins_cost(INSN_COST);
15934   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
15935   ins_encode %{
15936     __ eor(as_FloatRegister($dst$$reg), __ T8B,
15937             as_FloatRegister($src1$$reg),
15938             as_FloatRegister($src2$$reg));
15939   %}
15940   ins_pipe(vlogical64);
15941 %}
15942 
15943 instruct vxor16B(vecX dst, vecX src1, vecX src2)
15944 %{
15945   predicate(n->as_Vector()->length_in_bytes() == 16);
15946   match(Set dst (XorV src1 src2));
15947   ins_cost(INSN_COST);
15948   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
15949   ins_encode %{
15950     __ eor(as_FloatRegister($dst$$reg), __ T16B,
15951             as_FloatRegister($src1$$reg),
15952             as_FloatRegister($src2$$reg));
15953   %}
15954   ins_pipe(vlogical128);
15955 %}
15956 
15957 // ------------------------------ Shift ---------------------------------------
15958 instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
15959   predicate(n->as_Vector()->length_in_bytes() == 8);
15960   match(Set dst (LShiftCntV cnt));
15961   match(Set dst (RShiftCntV cnt));
15962   format %{ "dup  $dst, $cnt\t# shift count vector (8B)" %}
15963   ins_encode %{
15964     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg));
15965   %}
15966   ins_pipe(vdup_reg_reg64);
15967 %}
15968 
15969 instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{
15970   predicate(n->as_Vector()->length_in_bytes() == 16);
15971   match(Set dst (LShiftCntV cnt));
15972   match(Set dst (RShiftCntV cnt));
15973   format %{ "dup  $dst, $cnt\t# shift count vector (16B)" %}
15974   ins_encode %{
15975     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
15976   %}
15977   ins_pipe(vdup_reg_reg128);
15978 %}
15979 
15980 instruct vsll8B(vecD dst, vecD src, vecD shift) %{
15981   predicate(n->as_Vector()->length() == 4 ||
15982             n->as_Vector()->length() == 8);
15983   match(Set dst (LShiftVB src shift));
15984   ins_cost(INSN_COST);
15985   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
15986   ins_encode %{
15987     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
15988             as_FloatRegister($src$$reg),
15989             as_FloatRegister($shift$$reg));
15990   %}
15991   ins_pipe(vshift64);
15992 %}
15993 
15994 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
15995   predicate(n->as_Vector()->length() == 16);
15996   match(Set dst (LShiftVB src shift));
15997   ins_cost(INSN_COST);
15998   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
15999   ins_encode %{
16000     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
16001             as_FloatRegister($src$$reg),
16002             as_FloatRegister($shift$$reg));
16003   %}
16004   ins_pipe(vshift128);
16005 %}
16006 
16007 // Right shifts with vector shift count on aarch64 SIMD are implemented
16008 // as left shift by negative shift count.
16009 // There are two cases for vector shift count.
16010 //
16011 // Case 1: The vector shift count is from replication.
16012 //        |            |
16013 //    LoadVector  RShiftCntV
16014 //        |       /
16015 //     RShiftVI
16016 // Note: In inner loop, multiple neg instructions are used, which can be
16017 // moved to outer loop and merge into one neg instruction.
16018 //
16019 // Case 2: The vector shift count is from loading.
16020 // This case isn't supported by middle-end now. But it's supported by
16021 // panama/vectorIntrinsics(JEP 338: Vector API).
16022 //        |            |
16023 //    LoadVector  LoadVector
16024 //        |       /
16025 //     RShiftVI
16026 //
16027 
16028 instruct vsra8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
16029   predicate(n->as_Vector()->length() == 4 ||
16030             n->as_Vector()->length() == 8);
16031   match(Set dst (RShiftVB src shift));
16032   ins_cost(INSN_COST);
16033   effect(TEMP tmp);
16034   format %{ "negr  $tmp,$shift\t"
16035             "sshl  $dst,$src,$tmp\t# vector (8B)" %}
16036   ins_encode %{
16037     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16038             as_FloatRegister($shift$$reg));
16039     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
16040             as_FloatRegister($src$$reg),
16041             as_FloatRegister($tmp$$reg));
16042   %}
16043   ins_pipe(vshift64);
16044 %}
16045 
16046 instruct vsra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
16047   predicate(n->as_Vector()->length() == 16);
16048   match(Set dst (RShiftVB src shift));
16049   ins_cost(INSN_COST);
16050   effect(TEMP tmp);
16051   format %{ "negr  $tmp,$shift\t"
16052             "sshl  $dst,$src,$tmp\t# vector (16B)" %}
16053   ins_encode %{
16054     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16055             as_FloatRegister($shift$$reg));
16056     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
16057             as_FloatRegister($src$$reg),
16058             as_FloatRegister($tmp$$reg));
16059   %}
16060   ins_pipe(vshift128);
16061 %}
16062 
16063 instruct vsrl8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
16064   predicate(n->as_Vector()->length() == 4 ||
16065             n->as_Vector()->length() == 8);
16066   match(Set dst (URShiftVB src shift));
16067   ins_cost(INSN_COST);
16068   effect(TEMP tmp);
16069   format %{ "negr  $tmp,$shift\t"
16070             "ushl  $dst,$src,$tmp\t# vector (8B)" %}
16071   ins_encode %{
16072     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16073             as_FloatRegister($shift$$reg));
16074     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
16075             as_FloatRegister($src$$reg),
16076             as_FloatRegister($tmp$$reg));
16077   %}
16078   ins_pipe(vshift64);
16079 %}
16080 
16081 instruct vsrl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
16082   predicate(n->as_Vector()->length() == 16);
16083   match(Set dst (URShiftVB src shift));
16084   ins_cost(INSN_COST);
16085   effect(TEMP tmp);
16086   format %{ "negr  $tmp,$shift\t"
16087             "ushl  $dst,$src,$tmp\t# vector (16B)" %}
16088   ins_encode %{
16089     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16090             as_FloatRegister($shift$$reg));
16091     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
16092             as_FloatRegister($src$$reg),
16093             as_FloatRegister($tmp$$reg));
16094   %}
16095   ins_pipe(vshift128);
16096 %}
16097 
16098 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
16099   predicate(n->as_Vector()->length() == 4 ||
16100             n->as_Vector()->length() == 8);
16101   match(Set dst (LShiftVB src shift));
16102   ins_cost(INSN_COST);
16103   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
16104   ins_encode %{
16105     int sh = (int)$shift$$constant;
16106     if (sh >= 8) {
16107       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16108              as_FloatRegister($src$$reg),
16109              as_FloatRegister($src$$reg));
16110     } else {
16111       __ shl(as_FloatRegister($dst$$reg), __ T8B,
16112              as_FloatRegister($src$$reg), sh);
16113     }
16114   %}
16115   ins_pipe(vshift64_imm);
16116 %}
16117 
16118 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
16119   predicate(n->as_Vector()->length() == 16);
16120   match(Set dst (LShiftVB src shift));
16121   ins_cost(INSN_COST);
16122   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
16123   ins_encode %{
16124     int sh = (int)$shift$$constant;
16125     if (sh >= 8) {
16126       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16127              as_FloatRegister($src$$reg),
16128              as_FloatRegister($src$$reg));
16129     } else {
16130       __ shl(as_FloatRegister($dst$$reg), __ T16B,
16131              as_FloatRegister($src$$reg), sh);
16132     }
16133   %}
16134   ins_pipe(vshift128_imm);
16135 %}
16136 
16137 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
16138   predicate(n->as_Vector()->length() == 4 ||
16139             n->as_Vector()->length() == 8);
16140   match(Set dst (RShiftVB src shift));
16141   ins_cost(INSN_COST);
16142   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
16143   ins_encode %{
16144     int sh = (int)$shift$$constant;
16145     if (sh >= 8) sh = 7;
16146     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
16147            as_FloatRegister($src$$reg), sh);
16148   %}
16149   ins_pipe(vshift64_imm);
16150 %}
16151 
16152 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
16153   predicate(n->as_Vector()->length() == 16);
16154   match(Set dst (RShiftVB src shift));
16155   ins_cost(INSN_COST);
16156   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
16157   ins_encode %{
16158     int sh = (int)$shift$$constant;
16159     if (sh >= 8) sh = 7;
16160     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
16161            as_FloatRegister($src$$reg), sh);
16162   %}
16163   ins_pipe(vshift128_imm);
16164 %}
16165 
16166 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
16167   predicate(n->as_Vector()->length() == 4 ||
16168             n->as_Vector()->length() == 8);
16169   match(Set dst (URShiftVB src shift));
16170   ins_cost(INSN_COST);
16171   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
16172   ins_encode %{
16173     int sh = (int)$shift$$constant;
16174     if (sh >= 8) {
16175       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16176              as_FloatRegister($src$$reg),
16177              as_FloatRegister($src$$reg));
16178     } else {
16179       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
16180              as_FloatRegister($src$$reg), sh);
16181     }
16182   %}
16183   ins_pipe(vshift64_imm);
16184 %}
16185 
16186 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
16187   predicate(n->as_Vector()->length() == 16);
16188   match(Set dst (URShiftVB src shift));
16189   ins_cost(INSN_COST);
16190   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
16191   ins_encode %{
16192     int sh = (int)$shift$$constant;
16193     if (sh >= 8) {
16194       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16195              as_FloatRegister($src$$reg),
16196              as_FloatRegister($src$$reg));
16197     } else {
16198       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
16199              as_FloatRegister($src$$reg), sh);
16200     }
16201   %}
16202   ins_pipe(vshift128_imm);
16203 %}
16204 
16205 instruct vsll4S(vecD dst, vecD src, vecD shift) %{
16206   predicate(n->as_Vector()->length() == 2 ||
16207             n->as_Vector()->length() == 4);
16208   match(Set dst (LShiftVS src shift));
16209   ins_cost(INSN_COST);
16210   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
16211   ins_encode %{
16212     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
16213             as_FloatRegister($src$$reg),
16214             as_FloatRegister($shift$$reg));
16215   %}
16216   ins_pipe(vshift64);
16217 %}
16218 
16219 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
16220   predicate(n->as_Vector()->length() == 8);
16221   match(Set dst (LShiftVS src shift));
16222   ins_cost(INSN_COST);
16223   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
16224   ins_encode %{
16225     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
16226             as_FloatRegister($src$$reg),
16227             as_FloatRegister($shift$$reg));
16228   %}
16229   ins_pipe(vshift128);
16230 %}
16231 
16232 instruct vsra4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
16233   predicate(n->as_Vector()->length() == 2 ||
16234             n->as_Vector()->length() == 4);
16235   match(Set dst (RShiftVS src shift));
16236   ins_cost(INSN_COST);
16237   effect(TEMP tmp);
16238   format %{ "negr  $tmp,$shift\t"
16239             "sshl  $dst,$src,$tmp\t# vector (4H)" %}
16240   ins_encode %{
16241     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16242             as_FloatRegister($shift$$reg));
16243     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
16244             as_FloatRegister($src$$reg),
16245             as_FloatRegister($tmp$$reg));
16246   %}
16247   ins_pipe(vshift64);
16248 %}
16249 
16250 instruct vsra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
16251   predicate(n->as_Vector()->length() == 8);
16252   match(Set dst (RShiftVS src shift));
16253   ins_cost(INSN_COST);
16254   effect(TEMP tmp);
16255   format %{ "negr  $tmp,$shift\t"
16256             "sshl  $dst,$src,$tmp\t# vector (8H)" %}
16257   ins_encode %{
16258     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16259             as_FloatRegister($shift$$reg));
16260     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
16261             as_FloatRegister($src$$reg),
16262             as_FloatRegister($tmp$$reg));
16263   %}
16264   ins_pipe(vshift128);
16265 %}
16266 
16267 instruct vsrl4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
16268   predicate(n->as_Vector()->length() == 2 ||
16269             n->as_Vector()->length() == 4);
16270   match(Set dst (URShiftVS src shift));
16271   ins_cost(INSN_COST);
16272   effect(TEMP tmp);
16273   format %{ "negr  $tmp,$shift\t"
16274             "ushl  $dst,$src,$tmp\t# vector (4H)" %}
16275   ins_encode %{
16276     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16277             as_FloatRegister($shift$$reg));
16278     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
16279             as_FloatRegister($src$$reg),
16280             as_FloatRegister($tmp$$reg));
16281   %}
16282   ins_pipe(vshift64);
16283 %}
16284 
16285 instruct vsrl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
16286   predicate(n->as_Vector()->length() == 8);
16287   match(Set dst (URShiftVS src shift));
16288   ins_cost(INSN_COST);
16289   effect(TEMP tmp);
16290   format %{ "negr  $tmp,$shift\t"
16291             "ushl  $dst,$src,$tmp\t# vector (8H)" %}
16292   ins_encode %{
16293     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16294             as_FloatRegister($shift$$reg));
16295     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
16296             as_FloatRegister($src$$reg),
16297             as_FloatRegister($tmp$$reg));
16298   %}
16299   ins_pipe(vshift128);
16300 %}
16301 
16302 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
16303   predicate(n->as_Vector()->length() == 2 ||
16304             n->as_Vector()->length() == 4);
16305   match(Set dst (LShiftVS src shift));
16306   ins_cost(INSN_COST);
16307   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
16308   ins_encode %{
16309     int sh = (int)$shift$$constant;
16310     if (sh >= 16) {
16311       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16312              as_FloatRegister($src$$reg),
16313              as_FloatRegister($src$$reg));
16314     } else {
16315       __ shl(as_FloatRegister($dst$$reg), __ T4H,
16316              as_FloatRegister($src$$reg), sh);
16317     }
16318   %}
16319   ins_pipe(vshift64_imm);
16320 %}
16321 
16322 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
16323   predicate(n->as_Vector()->length() == 8);
16324   match(Set dst (LShiftVS src shift));
16325   ins_cost(INSN_COST);
16326   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
16327   ins_encode %{
16328     int sh = (int)$shift$$constant;
16329     if (sh >= 16) {
16330       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16331              as_FloatRegister($src$$reg),
16332              as_FloatRegister($src$$reg));
16333     } else {
16334       __ shl(as_FloatRegister($dst$$reg), __ T8H,
16335              as_FloatRegister($src$$reg), sh);
16336     }
16337   %}
16338   ins_pipe(vshift128_imm);
16339 %}
16340 
16341 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
16342   predicate(n->as_Vector()->length() == 2 ||
16343             n->as_Vector()->length() == 4);
16344   match(Set dst (RShiftVS src shift));
16345   ins_cost(INSN_COST);
16346   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
16347   ins_encode %{
16348     int sh = (int)$shift$$constant;
16349     if (sh >= 16) sh = 15;
16350     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
16351            as_FloatRegister($src$$reg), sh);
16352   %}
16353   ins_pipe(vshift64_imm);
16354 %}
16355 
16356 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
16357   predicate(n->as_Vector()->length() == 8);
16358   match(Set dst (RShiftVS src shift));
16359   ins_cost(INSN_COST);
16360   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
16361   ins_encode %{
16362     int sh = (int)$shift$$constant;
16363     if (sh >= 16) sh = 15;
16364     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
16365            as_FloatRegister($src$$reg), sh);
16366   %}
16367   ins_pipe(vshift128_imm);
16368 %}
16369 
16370 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
16371   predicate(n->as_Vector()->length() == 2 ||
16372             n->as_Vector()->length() == 4);
16373   match(Set dst (URShiftVS src shift));
16374   ins_cost(INSN_COST);
16375   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
16376   ins_encode %{
16377     int sh = (int)$shift$$constant;
16378     if (sh >= 16) {
16379       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16380              as_FloatRegister($src$$reg),
16381              as_FloatRegister($src$$reg));
16382     } else {
16383       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
16384              as_FloatRegister($src$$reg), sh);
16385     }
16386   %}
16387   ins_pipe(vshift64_imm);
16388 %}
16389 
16390 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
16391   predicate(n->as_Vector()->length() == 8);
16392   match(Set dst (URShiftVS src shift));
16393   ins_cost(INSN_COST);
16394   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
16395   ins_encode %{
16396     int sh = (int)$shift$$constant;
16397     if (sh >= 16) {
16398       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16399              as_FloatRegister($src$$reg),
16400              as_FloatRegister($src$$reg));
16401     } else {
16402       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
16403              as_FloatRegister($src$$reg), sh);
16404     }
16405   %}
16406   ins_pipe(vshift128_imm);
16407 %}
16408 
16409 instruct vsll2I(vecD dst, vecD src, vecD shift) %{
16410   predicate(n->as_Vector()->length() == 2);
16411   match(Set dst (LShiftVI src shift));
16412   ins_cost(INSN_COST);
16413   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
16414   ins_encode %{
16415     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
16416             as_FloatRegister($src$$reg),
16417             as_FloatRegister($shift$$reg));
16418   %}
16419   ins_pipe(vshift64);
16420 %}
16421 
16422 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
16423   predicate(n->as_Vector()->length() == 4);
16424   match(Set dst (LShiftVI src shift));
16425   ins_cost(INSN_COST);
16426   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
16427   ins_encode %{
16428     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
16429             as_FloatRegister($src$$reg),
16430             as_FloatRegister($shift$$reg));
16431   %}
16432   ins_pipe(vshift128);
16433 %}
16434 
16435 instruct vsra2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
16436   predicate(n->as_Vector()->length() == 2);
16437   match(Set dst (RShiftVI src shift));
16438   ins_cost(INSN_COST);
16439   effect(TEMP tmp);
16440   format %{ "negr  $tmp,$shift\t"
16441             "sshl  $dst,$src,$tmp\t# vector (2S)" %}
16442   ins_encode %{
16443     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16444             as_FloatRegister($shift$$reg));
16445     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
16446             as_FloatRegister($src$$reg),
16447             as_FloatRegister($tmp$$reg));
16448   %}
16449   ins_pipe(vshift64);
16450 %}
16451 
16452 instruct vsra4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
16453   predicate(n->as_Vector()->length() == 4);
16454   match(Set dst (RShiftVI src shift));
16455   ins_cost(INSN_COST);
16456   effect(TEMP tmp);
16457   format %{ "negr  $tmp,$shift\t"
16458             "sshl  $dst,$src,$tmp\t# vector (4S)" %}
16459   ins_encode %{
16460     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16461             as_FloatRegister($shift$$reg));
16462     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
16463             as_FloatRegister($src$$reg),
16464             as_FloatRegister($tmp$$reg));
16465   %}
16466   ins_pipe(vshift128);
16467 %}
16468 
16469 instruct vsrl2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
16470   predicate(n->as_Vector()->length() == 2);
16471   match(Set dst (URShiftVI src shift));
16472   ins_cost(INSN_COST);
16473   effect(TEMP tmp);
16474   format %{ "negr  $tmp,$shift\t"
16475             "ushl  $dst,$src,$tmp\t# vector (2S)" %}
16476   ins_encode %{
16477     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16478             as_FloatRegister($shift$$reg));
16479     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
16480             as_FloatRegister($src$$reg),
16481             as_FloatRegister($tmp$$reg));
16482   %}
16483   ins_pipe(vshift64);
16484 %}
16485 
16486 instruct vsrl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
16487   predicate(n->as_Vector()->length() == 4);
16488   match(Set dst (URShiftVI src shift));
16489   ins_cost(INSN_COST);
16490   effect(TEMP tmp);
16491   format %{ "negr  $tmp,$shift\t"
16492             "ushl  $dst,$src,$tmp\t# vector (4S)" %}
16493   ins_encode %{
16494     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16495             as_FloatRegister($shift$$reg));
16496     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
16497             as_FloatRegister($src$$reg),
16498             as_FloatRegister($tmp$$reg));
16499   %}
16500   ins_pipe(vshift128);
16501 %}
16502 
16503 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
16504   predicate(n->as_Vector()->length() == 2);
16505   match(Set dst (LShiftVI src shift));
16506   ins_cost(INSN_COST);
16507   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
16508   ins_encode %{
16509     __ shl(as_FloatRegister($dst$$reg), __ T2S,
16510            as_FloatRegister($src$$reg),
16511            (int)$shift$$constant);
16512   %}
16513   ins_pipe(vshift64_imm);
16514 %}
16515 
16516 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
16517   predicate(n->as_Vector()->length() == 4);
16518   match(Set dst (LShiftVI src shift));
16519   ins_cost(INSN_COST);
16520   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
16521   ins_encode %{
16522     __ shl(as_FloatRegister($dst$$reg), __ T4S,
16523            as_FloatRegister($src$$reg),
16524            (int)$shift$$constant);
16525   %}
16526   ins_pipe(vshift128_imm);
16527 %}
16528 
16529 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
16530   predicate(n->as_Vector()->length() == 2);
16531   match(Set dst (RShiftVI src shift));
16532   ins_cost(INSN_COST);
16533   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
16534   ins_encode %{
16535     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
16536             as_FloatRegister($src$$reg),
16537             (int)$shift$$constant);
16538   %}
16539   ins_pipe(vshift64_imm);
16540 %}
16541 
16542 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
16543   predicate(n->as_Vector()->length() == 4);
16544   match(Set dst (RShiftVI src shift));
16545   ins_cost(INSN_COST);
16546   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
16547   ins_encode %{
16548     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
16549             as_FloatRegister($src$$reg),
16550             (int)$shift$$constant);
16551   %}
16552   ins_pipe(vshift128_imm);
16553 %}
16554 
16555 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
16556   predicate(n->as_Vector()->length() == 2);
16557   match(Set dst (URShiftVI src shift));
16558   ins_cost(INSN_COST);
16559   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
16560   ins_encode %{
16561     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
16562             as_FloatRegister($src$$reg),
16563             (int)$shift$$constant);
16564   %}
16565   ins_pipe(vshift64_imm);
16566 %}
16567 
16568 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
16569   predicate(n->as_Vector()->length() == 4);
16570   match(Set dst (URShiftVI src shift));
16571   ins_cost(INSN_COST);
16572   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
16573   ins_encode %{
16574     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
16575             as_FloatRegister($src$$reg),
16576             (int)$shift$$constant);
16577   %}
16578   ins_pipe(vshift128_imm);
16579 %}
16580 
16581 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
16582   predicate(n->as_Vector()->length() == 2);
16583   match(Set dst (LShiftVL src shift));
16584   ins_cost(INSN_COST);
16585   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
16586   ins_encode %{
16587     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
16588             as_FloatRegister($src$$reg),
16589             as_FloatRegister($shift$$reg));
16590   %}
16591   ins_pipe(vshift128);
16592 %}
16593 
16594 instruct vsra2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
16595   predicate(n->as_Vector()->length() == 2);
16596   match(Set dst (RShiftVL src shift));
16597   ins_cost(INSN_COST);
16598   effect(TEMP tmp);
16599   format %{ "negr  $tmp,$shift\t"
16600             "sshl  $dst,$src,$tmp\t# vector (2D)" %}
16601   ins_encode %{
16602     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16603             as_FloatRegister($shift$$reg));
16604     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
16605             as_FloatRegister($src$$reg),
16606             as_FloatRegister($tmp$$reg));
16607   %}
16608   ins_pipe(vshift128);
16609 %}
16610 
16611 instruct vsrl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
16612   predicate(n->as_Vector()->length() == 2);
16613   match(Set dst (URShiftVL src shift));
16614   ins_cost(INSN_COST);
16615   effect(TEMP tmp);
16616   format %{ "negr  $tmp,$shift\t"
16617             "ushl  $dst,$src,$tmp\t# vector (2D)" %}
16618   ins_encode %{
16619     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16620             as_FloatRegister($shift$$reg));
16621     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
16622             as_FloatRegister($src$$reg),
16623             as_FloatRegister($tmp$$reg));
16624   %}
16625   ins_pipe(vshift128);
16626 %}
16627 
16628 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
16629   predicate(n->as_Vector()->length() == 2);
16630   match(Set dst (LShiftVL src shift));
16631   ins_cost(INSN_COST);
16632   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
16633   ins_encode %{
16634     __ shl(as_FloatRegister($dst$$reg), __ T2D,
16635            as_FloatRegister($src$$reg),
16636            (int)$shift$$constant);
16637   %}
16638   ins_pipe(vshift128_imm);
16639 %}
16640 
16641 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
16642   predicate(n->as_Vector()->length() == 2);
16643   match(Set dst (RShiftVL src shift));
16644   ins_cost(INSN_COST);
16645   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
16646   ins_encode %{
16647     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
16648             as_FloatRegister($src$$reg),
16649             (int)$shift$$constant);
16650   %}
16651   ins_pipe(vshift128_imm);
16652 %}
16653 
16654 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
16655   predicate(n->as_Vector()->length() == 2);
16656   match(Set dst (URShiftVL src shift));
16657   ins_cost(INSN_COST);
16658   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
16659   ins_encode %{
16660     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
16661             as_FloatRegister($src$$reg),
16662             (int)$shift$$constant);
16663   %}
16664   ins_pipe(vshift128_imm);
16665 %}
16666 
16667 //----------PEEPHOLE RULES-----------------------------------------------------
16668 // These must follow all instruction definitions as they use the names
16669 // defined in the instructions definitions.
16670 //
16671 // peepmatch ( root_instr_name [preceding_instruction]* );
16672 //
16673 // peepconstraint %{
16674 // (instruction_number.operand_name relational_op instruction_number.operand_name
16675 //  [, ...] );
16676 // // instruction numbers are zero-based using left to right order in peepmatch
16677 //
16678 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
16679 // // provide an instruction_number.operand_name for each operand that appears
16680 // // in the replacement instruction's match rule
16681 //
16682 // ---------VM FLAGS---------------------------------------------------------
16683 //
16684 // All peephole optimizations can be turned off using -XX:-OptoPeephole
16685 //
16686 // Each peephole rule is given an identifying number starting with zero and
16687 // increasing by one in the order seen by the parser.  An individual peephole
16688 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
16689 // on the command-line.
16690 //
16691 // ---------CURRENT LIMITATIONS----------------------------------------------
16692 //
16693 // Only match adjacent instructions in same basic block
16694 // Only equality constraints
16695 // Only constraints between operands, not (0.dest_reg == RAX_enc)
16696 // Only one replacement instruction
16697 //
16698 // ---------EXAMPLE----------------------------------------------------------
16699 //
16700 // // pertinent parts of existing instructions in architecture description
16701 // instruct movI(iRegINoSp dst, iRegI src)
16702 // %{
16703 //   match(Set dst (CopyI src));
16704 // %}
16705 //
16706 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
16707 // %{
16708 //   match(Set dst (AddI dst src));
16709 //   effect(KILL cr);
16710 // %}
16711 //
16712 // // Change (inc mov) to lea
16713 // peephole %{
16714 //   // increment preceeded by register-register move
16715 //   peepmatch ( incI_iReg movI );
16716 //   // require that the destination register of the increment
16717 //   // match the destination register of the move
16718 //   peepconstraint ( 0.dst == 1.dst );
16719 //   // construct a replacement instruction that sets
16720 //   // the destination to ( move's source register + one )
16721 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
16722 // %}
16723 //
16724 
16725 // Implementation no longer uses movX instructions since
16726 // machine-independent system no longer uses CopyX nodes.
16727 //
16728 // peephole
16729 // %{
16730 //   peepmatch (incI_iReg movI);
16731 //   peepconstraint (0.dst == 1.dst);
16732 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
16733 // %}
16734 
16735 // peephole
16736 // %{
16737 //   peepmatch (decI_iReg movI);
16738 //   peepconstraint (0.dst == 1.dst);
16739 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
16740 // %}
16741 
16742 // peephole
16743 // %{
16744 //   peepmatch (addI_iReg_imm movI);
16745 //   peepconstraint (0.dst == 1.dst);
16746 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
16747 // %}
16748 
16749 // peephole
16750 // %{
16751 //   peepmatch (incL_iReg movL);
16752 //   peepconstraint (0.dst == 1.dst);
16753 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
16754 // %}
16755 
16756 // peephole
16757 // %{
16758 //   peepmatch (decL_iReg movL);
16759 //   peepconstraint (0.dst == 1.dst);
16760 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
16761 // %}
16762 
16763 // peephole
16764 // %{
16765 //   peepmatch (addL_iReg_imm movL);
16766 //   peepconstraint (0.dst == 1.dst);
16767 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
16768 // %}
16769 
16770 // peephole
16771 // %{
16772 //   peepmatch (addP_iReg_imm movP);
16773 //   peepconstraint (0.dst == 1.dst);
16774 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
16775 // %}
16776 
16777 // // Change load of spilled value to only a spill
16778 // instruct storeI(memory mem, iRegI src)
16779 // %{
16780 //   match(Set mem (StoreI mem src));
16781 // %}
16782 //
16783 // instruct loadI(iRegINoSp dst, memory mem)
16784 // %{
16785 //   match(Set dst (LoadI mem));
16786 // %}
16787 //
16788 
16789 //----------SMARTSPILL RULES---------------------------------------------------
16790 // These must follow all instruction definitions as they use the names
16791 // defined in the instructions definitions.
16792 
16793 // Local Variables:
16794 // mode: c++
16795 // End: